In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler

data = pd.read_csv('screentime_analysis.csv')

data.head()




Unnamed: 0,Date,App,Usage (minutes),Notifications,Times Opened
0,2024-08-07,Instagram,81,24,57
1,2024-08-08,Instagram,90,30,53
2,2024-08-26,Instagram,112,33,17
3,2024-08-22,Instagram,82,11,38
4,2024-08-12,Instagram,59,47,16


In [2]:
# drop unnecessary columns
data_gan = data.drop(columns=['Date', 'App'])

# initialize a MinMaxScaler to normalize the data between 0 and 1
scaler = MinMaxScaler()

# normalize the data
normalized_data = scaler.fit_transform(data_gan)

# convert back to a DataFrame
normalized_df = pd.DataFrame(normalized_data, columns=data_gan.columns)

normalized_df.head()

Unnamed: 0,Usage (minutes),Notifications,Times Opened
0,0.677966,0.163265,0.571429
1,0.754237,0.204082,0.530612
2,0.940678,0.22449,0.163265
3,0.686441,0.07483,0.377551
4,0.491525,0.319728,0.153061


In [3]:
latent_dim = 100  # size of the random noise vector

latent_dim = 100  # latent space dimension (size of the random noise input)

def build_generator(latent_dim):
    model = Sequential([
        Dense(128, input_dim=latent_dim),
        LeakyReLU(alpha=0.01),
        BatchNormalization(momentum=0.8),
        Dense(256),
        LeakyReLU(alpha=0.01),
        BatchNormalization(momentum=0.8),
        Dense(512),
        LeakyReLU(alpha=0.01),
        BatchNormalization(momentum=0.8),
        Dense(3, activation='sigmoid')  # output layer for generating 3 features
    ])
    return model

# create the generator
generator = build_generator(latent_dim)
generator.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               12928     
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 128)               0         
                                                                 
 batch_normalization (Batch  (None, 128)               512       
 Normalization)                                                  
                                                                 
 dense_1 (Dense)             (None, 256)               33024     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 256)               0         
                                                                 
 batch_normalization_1 (Bat  (None, 256)               1024      
 chNormalization)                                      

In [4]:
# generate random noise for 1000 samples
noise = np.random.normal(0, 1, (1000, latent_dim))

# generate synthetic data using the generator
generated_data = generator.predict(noise)

# display the generated data
generated_data[:5]  # show first 5 samples



array([[0.4947375 , 0.5840184 , 0.46266168],
       [0.445957  , 0.5836687 , 0.47440705],
       [0.36027688, 0.57498914, 0.48283097],
       [0.5392939 , 0.52347606, 0.45847267],
       [0.41121292, 0.545952  , 0.5159546 ]], dtype=float32)

In [5]:
def build_discriminator():
    model = Sequential([
        Dense(512, input_shape=(3,)),
        LeakyReLU(alpha=0.01),
        Dense(256),
        LeakyReLU(alpha=0.01),
        Dense(128),
        LeakyReLU(alpha=0.01),
        Dense(1, activation='sigmoid')  # output: 1 neuron for real/fake classification
    ])
    model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    return model

# create the discriminator
discriminator = build_discriminator()
discriminator.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 512)               2048      
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 512)               0         
                                                                 
 dense_5 (Dense)             (None, 256)               131328    
                                                                 
 leaky_re_lu_4 (LeakyReLU)   (None, 256)               0         
                                                                 
 dense_6 (Dense)             (None, 128)               32896     
                                                                 
 leaky_re_lu_5 (LeakyReLU)   (None, 128)               0         
                                                                 
 dense_7 (Dense)             (None, 1)                

In [6]:
def build_gan(generator, discriminator):
    # freeze the discriminator’s weights while training the generator
    discriminator.trainable = False

    model = Sequential([generator, discriminator])
    model.compile(loss='binary_crossentropy', optimizer=Adam())
    return model

# create the GAN
gan = build_gan(generator, discriminator)
gan.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 3)                 182659    
                                                                 
 sequential_1 (Sequential)   (None, 1)                 166401    
                                                                 
Total params: 349060 (1.33 MB)
Trainable params: 180867 (706.51 KB)
Non-trainable params: 168193 (657.00 KB)
_________________________________________________________________


In [7]:
def train_gan(gan, generator, discriminator, data, epochs=10000, batch_size=128, latent_dim=100):
    for epoch in range(epochs):
        # select a random batch of real data
        idx = np.random.randint(0, data.shape[0], batch_size)
        real_data = data[idx]

        # generate a batch of fake data
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        fake_data = generator.predict(noise)

        # labels for real and fake data
        real_labels = np.ones((batch_size, 1))  # real data has label 1
        fake_labels = np.zeros((batch_size, 1))  # fake data has label 0

        # train the discriminator
        d_loss_real = discriminator.train_on_batch(real_data, real_labels)
        d_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)

        # train the generator via the GAN
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_labels = np.ones((batch_size, 1)) 
        g_loss = gan.train_on_batch(noise, valid_labels)

        # print the progress every 1000 epochs
        if epoch % 1000 == 0:
            print(f"Epoch {epoch}: D Loss: {0.5 * np.add(d_loss_real, d_loss_fake)}, G Loss: {g_loss}")

train_gan(gan, generator, discriminator, normalized_data, epochs=10000, batch_size=128, latent_dim=latent_dim)



Epoch 0: D Loss: [0.73932844 0.5       ], G Loss: 0.6152759194374084
Epoch 1000: D Loss: [0.70120889 0.47265625], G Loss: 0.6147332191467285
Epoch 2000: D Loss: [0.6901966 0.546875 ], G Loss: 0.8295761346817017
Epoch 3000: D Loss: [0.75303242 0.53125   ], G Loss: 0.7044615745544434
Epoch 4000: D Loss: [0.66337347 0.5625    ], G Loss: 0.7936248779296875
Epoch 5000: D Loss: [0.65831041 0.546875  ], G Loss: 0.8053476810455322
Epoch 6000: D Loss: [0.6342881 0.59375  ], G Loss: 0.7897089719772339
Epoch 7000: D Loss: [0.63774562 0.66796875], G Loss: 0.8318478465080261
Epoch 8000: D Loss: [0.65822387 0.60546875], G Loss: 0.7873701453208923
Epoch 9000: D Loss: [0.67952418 0.5625    ], G Loss: 0.7330707907676697


In [8]:
# generate new data
noise = np.random.normal(0, 1, (1000, latent_dim))  # generate 1000 synthetic samples
generated_data = generator.predict(noise)

# convert the generated data back to the original scale
generated_data_rescaled = scaler.inverse_transform(generated_data)

# convert to DataFrame
generated_df = pd.DataFrame(generated_data_rescaled, columns=data_gan.columns)

generated_df.head()



Unnamed: 0,Usage (minutes),Notifications,Times Opened
0,33.920258,0.605492,2.314467
1,19.763815,2.579559,2.812085
2,65.78022,123.653282,72.916924
3,27.329279,1.221206,2.135092
4,67.802612,32.552025,18.257778
