<a href="https://colab.research.google.com/github/Ramanparjapati/Synthetic_data_generation/blob/master/Screentime_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Synthetic Data Generation**

In [13]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler

# read data
data = pd.read_csv('/content/screentime_analysis.csv')

# drop unnecessary cloumns
data_gan = data.drop(columns = ['Date', 'App'])
# normalize between 0 to 1
scaler = MinMaxScaler()
# normalize data
normalized_data = scaler.fit_transform(data_gan)
# convert to dataframe
normalized_df = pd.DataFrame(normalized_data, columns = data_gan.columns)

latent_dim = 100 # size of random noise vactor
latent_dim = 100 # latent space dimention(size of random noise input)

# create a function for building the generator
def build_generator(latent_dim):
    model = Sequential([Dense(128, input_dim = latent_dim),
                        LeakyReLU(alpha = 0.01),
                        BatchNormalization(momentum = 0.08),
                        Dense(256),
                        LeakyReLU(alpha = 0.01),
                        BatchNormalization(momentum = 0.08),
                        Dense(512),
                        LeakyReLU(alpha = 0.01),
                        BatchNormalization(momentum = 0.08),
                        Dense(3, activation = 'sigmoid')
    ])
    return model

generator = build_generator(latent_dim)

# generate random noise of 1000 samples
noise = np.random.normal(0, 1, (1000, latent_dim))
# generate synthetic data using generator
generated_data = generator.predict(noise)
# show first 5 samples of generated data
generated_data[:5]

# create a function for building the discriminator
def build_discriminator():
    model = Sequential([Dense(512, input_shape = (3,)),
                        LeakyReLU(alpha = 0.01),
                        Dense(265),
                        LeakyReLU(alpha = 0.01),
                        Dense(128),
                        LeakyReLU(alpha = 0.01),
                        Dense(1, activation = 'sigmoid')
    ])
    # compile model
    model.compile(loss = 'binary_crossentropy', optimizer = Adam(0.0002, 0.5), metrics = ['accuracy'])
    return model

discriminator = build_discriminator()

# Freeze discriminator weights during GAN training
def build_gan(generator, discriminator):
    discriminator.trainable = False
    model = Sequential([generator, discriminator])
    model.compile(loss = 'binary_crossentropy', optimizer = Adam())
    return model

gan = build_gan(generator, discriminator)

# Train generator
def train_gan(gan, generator, discriminator, data, epochs = 10000, batch_size = 128, latent_dim = 100):
  for epoch in range(epochs):
    # generate random batch of real data
    idx = np.random.randint(0, data.shape[0], batch_size)
    real_data = data[idx]
    # generate a batch of fake data
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_data = generator.predict(noise)
    # labels for real and fake data
    real_labels = np.ones((batch_size, 1))
    fake_labels = np.zeros((batch_size, 1))
    # train the discriminator
    d_loss_real = discriminator.train_on_batch(real_data, real_labels)
    d_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)
    # train the generator via the GAN
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    valid_labels = np.ones((batch_size, 1))
    g_loss = gan.train_on_batch(noise, valid_labels)
    # print the progress every 1000 epochs
    if epoch % 100 == 0:
      print(f'Epoch: {epoch}, D_loss: {0.5 * np.add(d_loss_real, d_loss_fake)}, G_loss: {g_loss}')

train_gan(gan, generator, discriminator, normalized_data, epochs = 10000, batch_size = 128, latent_dim = latent_dim)

noise = np.random.normal(0, 1, (1000, latent_dim))
generated_data = generator.predict(noise)
generated_data_rescaled = scaler.inverse_transform(generated_data)
generated_df = pd.DataFrame(generated_data_rescaled, columns = data_gan.columns)
generated_df.head()

[1m 1/32[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 102ms/step

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 




Epoch: 0, D_loss: [0.69190407 0.75      ], G_loss: [array(0.69566417, dtype=float32), array(0.69566417, dtype=float32), array(0.5, dtype=float32)]
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5m

KeyboardInterrupt: 