In [1]:
import pandas as pd
import torch
import torch.nn as nn

1. Data Loading

In [2]:
from google.colab import files # for using google colab

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

df = pd.read_csv(fn) # Assuming only one file is uploaded

Saving gan_dataset.csv to gan_dataset.csv
User uploaded file "gan_dataset.csv" with length 193911 bytes


In [3]:
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5
0,0.218162,0.036798,0.008938,0.428720,0.646402
1,0.768704,0.674638,0.493414,1.568932,2.605922
2,0.707831,0.579577,0.467350,1.504258,2.278798
3,0.465760,0.166996,0.070853,0.884033,1.230479
4,0.463032,0.224466,0.099074,0.845267,1.449929
...,...,...,...,...,...
1995,0.195462,0.039587,0.008938,0.375024,0.571866
1996,0.755578,0.536844,0.353136,1.357644,2.134922
1997,0.166379,0.031288,0.004803,0.313366,0.477101
1998,0.757442,0.575892,0.435601,1.600803,2.121846


2. Data Preperation

In [4]:
tensor = torch.from_numpy(df.to_numpy()).float()#converts df to numpy the torch tensor
tensor

tensor([[0.2182, 0.0368, 0.0089, 0.4287, 0.6464],
        [0.7687, 0.6746, 0.4934, 1.5689, 2.6059],
        [0.7078, 0.5796, 0.4673, 1.5043, 2.2788],
        ...,
        [0.1664, 0.0313, 0.0048, 0.3134, 0.4771],
        [0.7574, 0.5759, 0.4356, 1.6008, 2.1218],
        [0.3271, 0.1289, 0.0458, 0.6929, 1.0699]])

3. Define GAN Architecture

In [5]:
# Define the Generator
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(100, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 5)
        )

    def forward(self, z):
        return self.model(z)

# Define the Discriminator
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(5, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

4. Model Initialization and Loss Function

In [6]:
# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the Generator and Discriminator
generator = Generator().to(device)
discriminator = Discriminator().to(device)

# Define the Loss function
criterion = nn.BCELoss()

# Define the optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=0.001)
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=0.001)

5. Traning the GAN


In [7]:
# Number of epochs
num_epochs = 20

# Lists to store the losses for plotting
G_losses = []
D_losses = []

# Training loop
for epoch in range(num_epochs):
    for i in range(len(tensor)):  # Iterate through each data point
      # 1. Train the Discriminator
      # Train on real data
      real_data = tensor[i].to(device)
      real_labels = torch.ones(1).to(device)
      optimizer_D.zero_grad()
      output = discriminator(real_data)
      d_loss_real = criterion(output, real_labels)
      d_loss_real.backward()

      # Train on fake data
      z = torch.randn(1, 100).to(device) # 100 is the latent vector dimension
      fake_data = generator(z)
      fake_labels = torch.zeros(1,1).to(device)
      output = discriminator(fake_data.detach()) # detach to avoid backprop through generator
      d_loss_fake = criterion(output, fake_labels)
      d_loss_fake.backward()

      d_loss = d_loss_real + d_loss_fake
      optimizer_D.step()

      # 2. Train the Generator
      optimizer_G.zero_grad()
      output = discriminator(fake_data)
      g_loss = criterion(output,real_labels.resize_(1,1)) # Try to fool the discriminator
      g_loss.backward()
      optimizer_G.step()

    # Print losses every epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], D Loss: {d_loss.item():.4f}, G Loss: {g_loss.item():.4f}")

    # Append losses to lists for plotting (optional)
    G_losses.append(g_loss.item())
    D_losses.append(d_loss.item())

Epoch [1/20], D Loss: 1.3066, G Loss: 1.2387
Epoch [2/20], D Loss: 1.3848, G Loss: 0.7382
Epoch [3/20], D Loss: 1.3689, G Loss: 0.7149
Epoch [4/20], D Loss: 1.5084, G Loss: 0.6888
Epoch [5/20], D Loss: 1.3397, G Loss: 0.7566
Epoch [6/20], D Loss: 1.4138, G Loss: 0.6756
Epoch [7/20], D Loss: 1.3838, G Loss: 0.6778
Epoch [8/20], D Loss: 1.3953, G Loss: 0.6693
Epoch [9/20], D Loss: 1.3859, G Loss: 0.7477
Epoch [10/20], D Loss: 1.3883, G Loss: 0.6951
Epoch [11/20], D Loss: 1.4044, G Loss: 0.7449
Epoch [12/20], D Loss: 1.3949, G Loss: 0.6719
Epoch [13/20], D Loss: 1.3862, G Loss: 0.6927
Epoch [14/20], D Loss: 1.3776, G Loss: 0.7217
Epoch [15/20], D Loss: 1.2518, G Loss: 0.8109
Epoch [16/20], D Loss: 1.3863, G Loss: 0.6931
Epoch [17/20], D Loss: 1.3863, G Loss: 0.6931
Epoch [18/20], D Loss: 1.3863, G Loss: 0.6931
Epoch [19/20], D Loss: 1.3863, G Loss: 0.6931
Epoch [20/20], D Loss: 1.3863, G Loss: 0.6931


6/7. Generate Good Synthetic Data

In [8]:
# Generate synthetic data
num_samples = 20
generated_data = []
for i in range(num_samples):
    z = torch.randn(1, 100).to(device)
    synthetic_sample = generator(z).cpu().detach().numpy()  # Move to CPU and detach
    generated_data.append(synthetic_sample[0])

# Convert to DataFrame and print
generated_df = pd.DataFrame(generated_data)
print(generated_df.head(20))

           0         1         2         3         4
0   0.813459  0.632325  0.420282  1.622166  2.139724
1   0.813459  0.632325  0.420282  1.622166  2.139724
2   0.910834  0.699132  0.551650  1.870253  2.605454
3   0.813459  0.632325  0.420282  1.622166  2.139724
4   0.421290  0.159877  0.006801  0.682090  1.017283
5   0.815012  0.635725  0.416218  1.628894  2.155650
6   0.580047  0.245538  0.084456  0.960134  1.446863
7   0.813459  0.632325  0.420282  1.622166  2.139724
8   0.813459  0.632325  0.420282  1.622166  2.139724
9   0.779061  0.421681  0.194766  1.518266  2.163888
10  0.813459  0.632325  0.420282  1.622166  2.139724
11  0.813459  0.632325  0.420282  1.622166  2.139724
12  0.813459  0.632325  0.420282  1.622166  2.139724
13  0.813459  0.632325  0.420282  1.622166  2.139724
14  0.617868  0.290691  0.077928  1.195184  1.667754
15  0.813854  0.578722  0.350165  1.609504  2.235190
16  0.729579  0.351812  0.119790  1.420240  1.985826
17  0.572465  0.241656  0.098389  0.962146  1.