In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Load dataset
data = pd.read_csv(r"C:\Users\asus\Desktop\Masters Project & Thesis\Master Projects\Churn Marketing project\WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [3]:
# Select important features
features = ['gender', 'SeniorCitizen', 'tenure', 'MonthlyCharges', 'TotalCharges', 'Contract', 'PaymentMethod']
df = data[features].dropna()

In [4]:
# Encode categorical features
label_encoders = {}
for col in ['gender', 'Contract', 'PaymentMethod']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [6]:
# Clean TotalCharges column
df['TotalCharges'] = df['TotalCharges'].replace(' ', np.nan)
df['TotalCharges'] = df['TotalCharges'].astype(float)

# Drop rows with NaN (if any)
df = df.dropna(subset=['TotalCharges'])

# Normalize numerical features
scaler = MinMaxScaler()
df[['tenure', 'MonthlyCharges', 'TotalCharges']] = scaler.fit_transform(df[['tenure', 'MonthlyCharges', 'TotalCharges']])


In [7]:
# Convert to tensor
data_tensor = torch.tensor(df.values, dtype=torch.float32)


In [8]:
# Create DataLoader
batch_size = 64
dataset = TensorDataset(data_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [9]:
# 3. Define GAN Architecture
input_dim = df.shape[1]
latent_dim = 16

class Generator(nn.Module):
    def __init__(self, latent_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, output_dim),
            nn.Sigmoid()
        )
    def forward(self, z):
        return self.model(z)

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.model(x)

generator = Generator(latent_dim, input_dim)
discriminator = Discriminator(input_dim)

In [10]:
# Loss and Optimizers
criterion = nn.BCELoss()
optimizer_g = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_d = optim.Adam(discriminator.parameters(), lr=0.0002)

In [11]:
# 4. Train GAN
epochs = 300
for epoch in range(epochs):
    for real_data in dataloader:
        real_data = real_data[0]
        batch_size = real_data.size(0)

        # Real and Fake labels
        real_labels = torch.ones(batch_size, 1)
        fake_labels = torch.zeros(batch_size, 1)

        # Train Discriminator
        optimizer_d.zero_grad()
        outputs = discriminator(real_data)
        loss_real = criterion(outputs, real_labels)

        z = torch.randn(batch_size, latent_dim)
        fake_data = generator(z)
        outputs = discriminator(fake_data.detach())
        loss_fake = criterion(outputs, fake_labels)

        d_loss = loss_real + loss_fake
        d_loss.backward()
        optimizer_d.step()

        # Train Generator
        optimizer_g.zero_grad()
        outputs = discriminator(fake_data)
        g_loss = criterion(outputs, real_labels)
        g_loss.backward()
        optimizer_g.step()

    if epoch % 50 == 0:
        print(f"Epoch [{epoch}/{epochs}] | D Loss: {d_loss.item():.4f} | G Loss: {g_loss.item():.4f}")


Epoch [0/300] | D Loss: 1.1183 | G Loss: 0.7629
Epoch [50/300] | D Loss: 0.2600 | G Loss: 2.4793
Epoch [100/300] | D Loss: 0.1243 | G Loss: 2.9251
Epoch [150/300] | D Loss: 0.3329 | G Loss: 2.5750
Epoch [200/300] | D Loss: 0.2703 | G Loss: 2.6475
Epoch [250/300] | D Loss: 0.3726 | G Loss: 2.8546


In [12]:
# 5. Generate Synthetic Data
z = torch.randn(1000, latent_dim)
synthetic_data = generator(z).detach().numpy()

# Convert back to DataFrame
synthetic_df = pd.DataFrame(synthetic_data, columns=df.columns)

# Inverse transform numerical features
synthetic_df[['tenure', 'MonthlyCharges', 'TotalCharges']] = scaler.inverse_transform(synthetic_df[['tenure', 'MonthlyCharges', 'TotalCharges']])

# Decode categorical features
for col in ['gender', 'Contract', 'PaymentMethod']:
    synthetic_df[col] = synthetic_df[col].round().astype(int)
    synthetic_df[col] = synthetic_df[col].apply(lambda x: label_encoders[col].inverse_transform([x])[0] if x < len(label_encoders[col].classes_) else label_encoders[col].classes_[0])


In [13]:
# Save synthetic dataset to your desired folder
synthetic_df.to_csv(r'C:\Users\asus\Desktop\Masters Project & Thesis\Master Projects\Gen Ai Analytics\synthetic_customer_data.csv', index=False)
print("✅ Synthetic data generated and saved at: C:\\Users\\asus\\Desktop\\Masters Project & Thesis\\Master Projects\\Gen Ai Analytics")


✅ Synthetic data generated and saved at: C:\Users\asus\Desktop\Masters Project & Thesis\Master Projects\Gen Ai Analytics


In [14]:
# Create models directory if not exists
import os
os.makedirs('../models', exist_ok=True)

# Save generator model
torch.save(generator.state_dict(), '../models/gan_generator.pth')

# Save discriminator model (optional)
torch.save(discriminator.state_dict(), '../models/gan_discriminator.pth')

print("✅ GAN models saved in '../models/'")


✅ GAN models saved in '../models/'


In [17]:
class Generator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size),
            nn.Tanh()
        )

    def forward(self, x):
        return self.model(x)




In [19]:
# Initialize generator with exact same architecture
generator = Generator()  # use same constructor as during training
generator.load_state_dict(torch.load('../models/gan_generator.pth'))
generator.eval()
print("✅ GAN Generator model loaded successfully!")



TypeError: Generator.__init__() missing 3 required positional arguments: 'input_size', 'hidden_size', and 'output_size'