In [7]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import json




In [8]:
# #Load the JSON and Excel datasets
# x_df = pd.read_json('../../DataSet/assign_json_files/assignment1New.json')
# y_df = pd.read_excel('../../DataBook/assign_xlsx_files/Assignment1_Data_Analyst.xlsx')

# #Display the first few rows of each DataFrame
# print("x_df head:\n", x_df.head())
# print("y_df head:\n", y_df.head())

# Load your Excel dataset
data = pd.read_excel('../../DataBook/assign_xlsx_files/Assignment1_Data_Analyst.xlsx')  # Replace with your file path

# Identify non-numeric columns
non_numeric_columns = data.select_dtypes(include=['object']).columns

# Convert non-numeric columns to numeric values using one-hot encoding
data = pd.get_dummies(data, columns=non_numeric_columns)

# Convert the DataFrame to a NumPy array with a specific dtype
x_train = data.values.astype(np.float32)

# Check for NaNs or inf values in the data
if np.isnan(x_train).any() or np.isinf(x_train).any():
    raise ValueError("Data contains NaNs or infinite values")

# Verify the shape of x_train
print(f"Loaded dataset shape: {x_train.shape}")
if x_train.shape[0] == 0:
    raise ValueError("Dataset is empty. Please check the dataset and ensure it contains valid data.")

num_features = x_train.shape[1]

Loaded dataset shape: (54, 55)


In [9]:
# Define the Generator
def build_generator():
    model = tf.keras.Sequential([
        layers.Dense(128, activation='relu', input_dim=100),
        layers.Dense(256, activation='relu'),
        layers.Dense(num_features, activation='sigmoid')
    ])
    return model

# Define the Discriminator
def build_discriminator():
    model = tf.keras.Sequential([
        layers.Dense(256, activation='relu', input_shape=(num_features,)),
        layers.Dense(128, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    return model


In [10]:
# Compile the GAN
def build_gan(generator, discriminator):
    discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    discriminator.trainable = False
    gan_input = layers.Input(shape=(100,))
    gan_output = discriminator(generator(gan_input))
    gan = tf.keras.Model(gan_input, gan_output)
    gan.compile(optimizer='adam', loss='binary_crossentropy')
    return gan

In [11]:
# Hyperparameters
epochs  = 50000
batch_size = 64
latent_dim = 100

# Build and compile the models
generator = build_generator()
discriminator = build_discriminator()
gan = build_gan(generator, discriminator)


In [12]:
# Training the GAN
for epoch in range(epochs):
    # Train the Discriminator
    idx = np.random.randint(0, x_train.shape[0], batch_size)
    real_data = x_train[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_data = generator.predict(noise)
    d_loss_real = discriminator.train_on_batch(real_data, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_data, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the Generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print the progress
    if epoch % 1000 == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100*d_loss[1]}] [G loss: {g_loss}]")

# Generate and display some samples
import matplotlib.pyplot as plt

noise = np.random.normal(0, 1, (10, latent_dim))
generated_data = generator.predict(noise)

# Display the generated data (assuming 2D data for visualization)
plt.scatter(generated_data[:, 0], generated_data[:, 1])
plt.title("Generated Data")
plt.show()


0 [D loss: 0.7178558707237244, acc.: 32.8125] [G loss: 0.7813005447387695]
1000 [D loss: 0.00013894598305341788, acc.: 100.0] [G loss: 10.104186058044434]
2000 [D loss: 1.625429240448284e-05, acc.: 100.0] [G loss: 12.384584426879883]
3000 [D loss: 0.08143277512863278, acc.: 98.4375] [G loss: 4.4134697914123535]
4000 [D loss: 8.844419767228828e-06, acc.: 100.0] [G loss: 15.023346900939941]
5000 [D loss: 1.5848096701631675e-06, acc.: 100.0] [G loss: 17.361122131347656]
6000 [D loss: 3.161163668607969e-07, acc.: 100.0] [G loss: 18.628944396972656]
7000 [D loss: 1.852514071032374e-07, acc.: 100.0] [G loss: 17.984420776367188]
8000 [D loss: 3.109945881374365e-08, acc.: 100.0] [G loss: 21.257286071777344]
9000 [D loss: 2.60469883647807e-08, acc.: 100.0] [G loss: 22.215084075927734]
10000 [D loss: 2.683809487147304e-09, acc.: 100.0] [G loss: 23.086545944213867]
11000 [D loss: 5.50146097601234e-09, acc.: 100.0] [G loss: 23.65862464904785]
12000 [D loss: 2.033500333866188e-07, acc.: 100.0] [G l