In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset

# ==== Load and preprocess your dataset ====
df = pd.read_csv('data.csv')

# 1) KEEP ONLY NUMERIC COLUMNS
numeric_df = df.select_dtypes(include=[np.number])
if numeric_df.shape[1] == 0:
    raise ValueError("No numeric columns found in your CSV—cannot train GAN on strings!")

scaler = MinMaxScaler()
data_np = scaler.fit_transform(numeric_df.values)
data = torch.tensor(data_np, dtype=torch.float32)

# Hyper‑params
batch_size = 64
z_dim      = 100
input_dim  = data.shape[1]
dataloader = DataLoader(TensorDataset(data), batch_size=batch_size, shuffle=True)

# ==== Generator ====
class Generator(nn.Module):
    def __init__(self, z_dim, output_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(z_dim, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, output_dim),
            nn.Sigmoid(),  # match MinMax [0,1]
        )
    def forward(self, z):
        return self.model(z)

# ==== Discriminator ====
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        )
    def forward(self, x):
        return self.model(x)

# Instantiate
G = Generator(z_dim, input_dim)
D = Discriminator(input_dim)

# Loss & Optimizers
criterion   = nn.BCELoss()
lr          = 2e-4
G_optimizer = torch.optim.Adam(G.parameters(), lr=lr)
D_optimizer = torch.optim.Adam(D.parameters(), lr=lr)

# ==== Training Loop ====
epochs = 500
for epoch in range(1, epochs+1):
    for real_batch, in dataloader:
        bs = real_batch.size(0)
        real_labels = torch.ones(bs, 1)
        fake_labels = torch.zeros(bs, 1)

        # 1) Train D on real + fake
        z         = torch.randn(bs, z_dim)
        fake_data = G(z)

        D_real = D(real_batch)
        D_fake = D(fake_data.detach())

        D_loss = criterion(D_real, real_labels) + criterion(D_fake, fake_labels)
        D_optimizer.zero_grad()
        D_loss.backward()
        D_optimizer.step()

        # 2) Train G to fool D
        z        = torch.randn(bs, z_dim)
        fake_out = D(G(z))
        G_loss   = criterion(fake_out, real_labels)

        G_optimizer.zero_grad()
        G_loss.backward()
        G_optimizer.step()

    if epoch % 50 == 0:
        print(f"Epoch {epoch:03d} | D_loss: {D_loss.item():.4f} | G_loss: {G_loss.item():.4f}")

# ==== Generate & Save Fake Data ====
num_samples = 1000
z = torch.randn(num_samples, z_dim)
fake_np = G(z).detach().cpu().numpy()
fake_np = scaler.inverse_transform(fake_np)

fake_df = pd.DataFrame(fake_np, columns=numeric_df.columns)
fake_df.to_csv("generated_fake_data.csv", index=False)
print("✅ saved to generated_fake_data.csv")


KeyboardInterrupt: 