In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("nursery.csv")

# Encode categorical features
df_encoded = df.copy()
encoders = {}
for col in df_encoded.columns:
    le = LabelEncoder()
    df_encoded[col] = le.fit_transform(df_encoded[col])
    encoders[col] = le

X = df_encoded.drop(columns=["Target"])
y = df_encoded["Target"]
num_classes = y.nunique()
input_dim = X.shape[1]

# Convert to tensors
X_tensor = torch.tensor(X.values, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.long)
dataset = TensorDataset(X_tensor, y_tensor)
loader = DataLoader(dataset, batch_size=128, shuffle=True)

# Hyperparameters
latent_dim = 32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lambda_gp = 10
epochs = 50

# Generator
class Generator(nn.Module):
    def __init__(self):  # Fixed: Changed init to _init_
        super().__init__()  # Fixed: Changed super().init() to super()._init_()
        self.label_emb = nn.Embedding(num_classes, num_classes)
        self.model = nn.Sequential(
            nn.Linear(latent_dim + num_classes, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim)
        )

    def forward(self, z, labels):
        c = self.label_emb(labels)
        x = torch.cat((z, c), dim=1)
        return self.model(x)

# Critic
class Critic(nn.Module):
    def __init__(self):  # Fixed: Changed init to _init_
        super().__init__()  # Fixed: Changed super().init() to super()._init_()
        self.label_emb = nn.Embedding(num_classes, num_classes)
        self.model = nn.Sequential(
            nn.Linear(input_dim + num_classes, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 1)
        )

    def forward(self, x, labels):
        c = self.label_emb(labels)
        d_in = torch.cat((x, c), dim=1)
        return self.model(d_in)

# Gradient penalty
def compute_gp(critic, real_samples, fake_samples, labels):
    alpha = torch.rand(real_samples.size(0), 1).to(device)
    interpolates = (alpha * real_samples + (1 - alpha) * fake_samples).requires_grad_(True)
    d_interpolates = critic(interpolates, labels)
    fake = torch.ones_like(d_interpolates)
    gradients = torch.autograd.grad(
        outputs=d_interpolates,
        inputs=interpolates,
        grad_outputs=fake,
        create_graph=True,
        retain_graph=True,
        only_inputs=True
    )[0]
    gradients = gradients.view(gradients.size(0), -1)
    return ((gradients.norm(2, dim=1) - 1) ** 2).mean()

# Initialize models
generator = Generator().to(device)
critic = Critic().to(device)
optimizer_G = torch.optim.Adam(generator.parameters(), lr=1e-4, betas=(0.5, 0.9))
optimizer_C = torch.optim.Adam(critic.parameters(), lr=1e-4, betas=(0.5, 0.9))

# Training loop
for epoch in range(epochs):
    for i, (real_samples, labels) in enumerate(loader):
        real_samples = real_samples.to(device)
        labels = labels.to(device)

        # Train Critic
        optimizer_C.zero_grad()
        z = torch.randn(real_samples.size(0), latent_dim).to(device)
        fake_samples = generator(z, labels)
        real_validity = critic(real_samples, labels)
        fake_validity = critic(fake_samples.detach(), labels)
        gp = compute_gp(critic, real_samples.data, fake_samples.data, labels)
        c_loss = -torch.mean(real_validity) + torch.mean(fake_validity) + lambda_gp * gp
        c_loss.backward()
        optimizer_C.step()

        # Train Generator every 5 steps
        if i % 5 == 0:
            optimizer_G.zero_grad()
            gen_samples = generator(z, labels)
            g_loss = -torch.mean(critic(gen_samples, labels))
            g_loss.backward()
            optimizer_G.step()

    print(f"[Epoch {epoch+1}/{epochs}] Critic Loss: {c_loss.item():.4f} Generator Loss: {g_loss.item():.4f}")

# Example: generate 5 samples for class "recommend"
label_value = encoders['Target'].transform(['recommend'])[0]
z = torch.randn(5, latent_dim).to(device)
labels = torch.full((5,), label_value, dtype=torch.long).to(device)
generated = generator(z, labels).detach().cpu().numpy()

# Convert the numeric values back to categorical
generated_df = pd.DataFrame(generated, columns=X.columns)
print("\nGenerated nursery records for class 'recommend':")
print(generated_df)


[Epoch 1/50] Critic Loss: 1.3351 Generator Loss: -0.0850
[Epoch 2/50] Critic Loss: -2.6747 Generator Loss: -0.3967
[Epoch 3/50] Critic Loss: -3.0656 Generator Loss: -0.6185
[Epoch 4/50] Critic Loss: -3.3641 Generator Loss: -0.8219
[Epoch 5/50] Critic Loss: -3.0061 Generator Loss: -1.2652
[Epoch 6/50] Critic Loss: -2.7717 Generator Loss: -1.6896
[Epoch 7/50] Critic Loss: -2.4865 Generator Loss: -1.9120
[Epoch 8/50] Critic Loss: -2.5064 Generator Loss: -1.9676
[Epoch 9/50] Critic Loss: -2.2320 Generator Loss: -1.6823
[Epoch 10/50] Critic Loss: -2.1994 Generator Loss: -1.1902
[Epoch 11/50] Critic Loss: -1.9109 Generator Loss: -0.7154
[Epoch 12/50] Critic Loss: -2.0394 Generator Loss: -0.3772
[Epoch 13/50] Critic Loss: -2.2471 Generator Loss: -0.1546
[Epoch 14/50] Critic Loss: -2.0377 Generator Loss: -0.3199
[Epoch 15/50] Critic Loss: -1.9519 Generator Loss: 0.0023
[Epoch 16/50] Critic Loss: -1.8826 Generator Loss: -0.0470
[Epoch 17/50] Critic Loss: -2.0388 Generator Loss: -0.0342
[Epoch 1

In [7]:
decoded_df = generated_df.copy()
for col in decoded_df.columns:
    # Round to nearest valid index for the encoder
    valid_indices = np.arange(len(encoders[col].classes_))
    rounded_values = np.round(decoded_df[col]).clip(min(valid_indices), max(valid_indices)).astype(int)
    decoded_df[col] = encoders[col].inverse_transform(rounded_values)
print("\nDecoded generated records:")
print(decoded_df)


Decoded generated records:
       parents     has_nurs        form children    housing     finance  \
0  pretentious  less_proper  incomplete        1   critical      inconv   
1        usual  less_proper      foster        3  less_conv  convenient   
2  pretentious     improper   completed        2   critical      inconv   
3  pretentious       proper   completed        1   critical      inconv   
4  pretentious    very_crit  incomplete        2  less_conv      inconv   

        social     health  
0      nonprob   priority  
1  problematic  not_recom  
2      nonprob   priority  
3      nonprob  not_recom  
4  problematic  not_recom  


In [None]:
run_cwgan_for_numerical_features("letter-recognition-2.csv", "letter")

[letter-recognition-2.csv] Epoch 1/30 | Critic Loss: -7.6260 | Generator Loss: 0.0395
[letter-recognition-2.csv] Epoch 2/30 | Critic Loss: -30.3727 | Generator Loss: -0.3738
[letter-recognition-2.csv] Epoch 3/30 | Critic Loss: -36.1018 | Generator Loss: -0.6214
[letter-recognition-2.csv] Epoch 4/30 | Critic Loss: -34.2304 | Generator Loss: -0.4963
[letter-recognition-2.csv] Epoch 5/30 | Critic Loss: -33.0588 | Generator Loss: -2.4799
[letter-recognition-2.csv] Epoch 6/30 | Critic Loss: -30.8468 | Generator Loss: -6.5201
[letter-recognition-2.csv] Epoch 7/30 | Critic Loss: -28.8966 | Generator Loss: -10.5449
[letter-recognition-2.csv] Epoch 8/30 | Critic Loss: -26.3214 | Generator Loss: -13.2584
[letter-recognition-2.csv] Epoch 9/30 | Critic Loss: -23.3543 | Generator Loss: -15.0538
[letter-recognition-2.csv] Epoch 10/30 | Critic Loss: -20.4524 | Generator Loss: -17.6229
[letter-recognition-2.csv] Epoch 11/30 | Critic Loss: -16.1049 | Generator Loss: -18.1881
[letter-recognition-2.csv] 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier: Accuracy = 0.8250
LogisticRegression: Accuracy = 0.9050
XGBoost: Accuracy = 0.6525


{'RandomForest': 0.6375,
 'MLPClassifier': 0.825,
 'LogisticRegression': 0.905,
 'XGBoost': 0.6525}