In [15]:
# ----------------------------- #
#          IMPORTS             #
# ----------------------------- #

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
from collections import defaultdict


# ----------------------------- #
#      CW-GAN PIPELINE         #
# ----------------------------- #

def run_cwgan_pipeline(csv_file, target_column, epochs=300, num_samples=2000):
    df = pd.read_csv(csv_file)

    # Encode categorical variables
    df_encoded = df.copy()
    encoders = {}
    for col in df_encoded.columns:
        le = LabelEncoder()
        df_encoded[col] = le.fit_transform(df_encoded[col])
        encoders[col] = le

    X = df_encoded.drop(columns=[target_column])
    y = df_encoded[target_column]
    num_classes = y.nunique()
    input_dim = X.shape[1]

    # Create tensor dataset
    X_tensor = torch.tensor(X.values, dtype=torch.float32)
    y_tensor = torch.tensor(y.values, dtype=torch.long)
    dataset = TensorDataset(X_tensor, y_tensor)
    loader = DataLoader(dataset, batch_size=128, shuffle=True)

    # Generator model
    class Generator(nn.Module):
        def __init__(self):
            super().__init__()
            self.label_emb = nn.Embedding(num_classes, num_classes)
            self.model = nn.Sequential(
                nn.Linear(32 + num_classes, 64),
                nn.ReLU(),
                nn.Linear(64, 128),
                nn.ReLU(),
                nn.Linear(128, input_dim)
            )
        def forward(self, z, labels):
            c = self.label_emb(labels)
            x = torch.cat((z, c), dim=1)
            return self.model(x)

    # Critic model
    class Critic(nn.Module):
        def __init__(self):
            super().__init__()
            self.label_emb = nn.Embedding(num_classes, num_classes)
            self.model = nn.Sequential(
                nn.Linear(input_dim + num_classes, 128),
                nn.LeakyReLU(0.2),
                nn.Linear(128, 64),
                nn.LeakyReLU(0.2),
                nn.Linear(64, 1)
            )
        def forward(self, x, labels):
            c = self.label_emb(labels)
            d_in = torch.cat((x, c), dim=1)
            return self.model(d_in)

    def compute_gp(critic, real_samples, fake_samples, labels):
        alpha = torch.rand(real_samples.size(0), 1).to(device)
        interpolates = (alpha * real_samples + (1 - alpha) * fake_samples).requires_grad_(True)
        d_interpolates = critic(interpolates, labels)
        fake = torch.ones_like(d_interpolates)
        gradients = torch.autograd.grad(
            outputs=d_interpolates,
            inputs=interpolates,
            grad_outputs=fake,
            create_graph=True,
            retain_graph=True,
            only_inputs=True
        )[0]
        gradients = gradients.view(gradients.size(0), -1)
        return ((gradients.norm(2, dim=1) - 1) ** 2).mean()

    # Training setup
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    generator = Generator().to(device)
    critic = Critic().to(device)
    optimizer_G = torch.optim.Adam(generator.parameters(), lr=1e-4, betas=(0.5, 0.9))
    optimizer_C = torch.optim.Adam(critic.parameters(), lr=1e-4, betas=(0.5, 0.9))

    # Train GAN
    for epoch in range(epochs):
        for i, (real_samples, labels) in enumerate(loader):
            real_samples = real_samples.to(device)
            labels = labels.to(device)

            optimizer_C.zero_grad()
            z = torch.randn(real_samples.size(0), 32).to(device)
            fake_samples = generator(z, labels)
            real_validity = critic(real_samples, labels)
            fake_validity = critic(fake_samples.detach(), labels)
            gp = compute_gp(critic, real_samples.data, fake_samples.data, labels)
            c_loss = -torch.mean(real_validity) + torch.mean(fake_validity) + 10 * gp
            c_loss.backward()
            optimizer_C.step()

            if i % 5 == 0:
                optimizer_G.zero_grad()
                gen_samples = generator(z, labels)
                g_loss = -torch.mean(critic(gen_samples, labels))
                g_loss.backward()
                optimizer_G.step()

         # Print every 10 epochs
        if (epoch + 1) % 10 == 0:
             print(f"[{csv_file}] Epoch {epoch+1}/{epochs} | Critic Loss: {c_loss.item():.4f} | Generator Loss: {g_loss.item():.4f}")
        
    # Generate synthetic data
    z = torch.randn(num_samples, 32).to(device)
    labels = torch.randint(0, num_classes, (num_samples,), dtype=torch.long).to(device)
    gen_data = generator(z, labels).detach().cpu().numpy()
    label_arr = labels.cpu().numpy()

    gen_df = pd.DataFrame(gen_data, columns=X.columns)
    decoded_df = gen_df.copy()
    for col in decoded_df.columns:
        valid_indices = np.arange(len(encoders[col].classes_))
        rounded = np.round(decoded_df[col]).clip(min(valid_indices), max(valid_indices)).astype(int)
        decoded_df[col] = encoders[col].inverse_transform(rounded)
    decoded_df[target_column] = encoders[target_column].inverse_transform(label_arr)

    # Re-encode synthetic data
    X_synth = decoded_df.drop(columns=[target_column])
    y_synth = decoded_df[target_column]
    X_encoded = pd.DataFrame()
    for col in X_synth.columns:
        X_encoded[col] = encoders[col].transform(X_synth[col])
    y_encoded = encoders[target_column].transform(y_synth)

    # Define classifiers
    models = {
        "RandomForest": RandomForestClassifier(),
        "MLPClassifier": MLPClassifier(max_iter=500),
        "LogisticRegression": LogisticRegression(max_iter=300),
        "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="mlogloss")
    }

    # ==== 10-chunk classifier evaluation on synthetic data ====
    full_df = X_encoded.copy()
    full_df['target'] = y_encoded
    total_samples = len(full_df)
    chunk_size = total_samples // 5
    accuracy_results = defaultdict(list)

    for repeat in range(2):  # Before and after shuffle
        print(f"\n🔁 Starting round {repeat + 1} (Shuffle: {'Yes' if repeat == 1 else 'No'})")

        if repeat == 1:
            full_df = shuffle(full_df, random_state=42).reset_index(drop=True)

        for chunk in range(5):
            start = chunk * chunk_size
            end = start + chunk_size if chunk < 4 else total_samples
            chunk_df = full_df.iloc[start:end]

            X_chunk = chunk_df.drop(columns=['target'])
            y_chunk = chunk_df['target']

            X_train, X_test, y_train, y_test = train_test_split(X_chunk, y_chunk, test_size=0.2, random_state=42)

            print(f"\n🧪 Training on Chunk {chunk + 1}/5 of Round {repeat + 1} (Rows {start} to {end})")

            for name, model in models.items():
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                acc = accuracy_score(y_test, y_pred)
                accuracy_results[name].append(acc)
                print(f"{name} - Accuracy: {acc:.4f}")

    print("\n📊 Final Average Accuracy on SYNTHETIC data:")
    for name, scores in accuracy_results.items():
        avg_acc = np.mean(scores)
        print(f"{name}: Average Accuracy = {avg_acc:.4f}")


# ----------------------------- #
#  BASELINE ON REAL DATASET    #
# ----------------------------- #

# def evaluate_on_real_data(csv_file, target_column):
#     df = pd.read_csv(csv_file)

#     # Encode
#     df_encoded = df.copy()
#     encoders = {}
#     for col in df_encoded.columns:
#         le = LabelEncoder()
#         df_encoded[col] = le.fit_transform(df_encoded[col])
#         encoders[col] = le

#     X = df_encoded.drop(columns=[target_column])
#     y = df_encoded[target_column]

#     # Stratified split to avoid class mismatch
#     sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
#     for train_idx, test_idx in sss.split(X, y):
#         X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
#         y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

#     # Define models
#     models = {
#         "RandomForest": RandomForestClassifier(),
#         "MLPClassifier": MLPClassifier(max_iter=500),
#         "LogisticRegression": LogisticRegression(max_iter=300),
#         "XGBoost": XGBClassifier(eval_metric="mlogloss")
#     }

#     print("\n📊 Accuracy of classifiers trained on REAL data:")

#     for name, model in models.items():
#         model.fit(X_train, y_train)
#         y_pred = model.predict(X_test)
#         acc = accuracy_score(y_test, y_pred)
#         print(f"{name}: Accuracy = {acc:.4f}")

# ----------------------------- #
#           RUN IT             #
# ----------------------------- #



In [16]:
# Adjust CSV path as needed
csv_path = "nursery.csv"
target = "Target"

run_cwgan_pipeline(csv_path, target)
#evaluate_on_real_data(csv_path, target)

[nursery.csv] Epoch 10/300 | Critic Loss: -2.4461 | Generator Loss: -1.4058
[nursery.csv] Epoch 20/300 | Critic Loss: -2.0401 | Generator Loss: 0.6440
[nursery.csv] Epoch 30/300 | Critic Loss: -1.1191 | Generator Loss: 0.7584
[nursery.csv] Epoch 40/300 | Critic Loss: 0.1267 | Generator Loss: 1.6083
[nursery.csv] Epoch 50/300 | Critic Loss: -0.0140 | Generator Loss: 4.4617
[nursery.csv] Epoch 60/300 | Critic Loss: -0.1062 | Generator Loss: 1.3424
[nursery.csv] Epoch 70/300 | Critic Loss: -0.4795 | Generator Loss: 2.1275
[nursery.csv] Epoch 80/300 | Critic Loss: 0.5315 | Generator Loss: 4.0256
[nursery.csv] Epoch 90/300 | Critic Loss: -0.2322 | Generator Loss: 5.6971
[nursery.csv] Epoch 100/300 | Critic Loss: -0.2482 | Generator Loss: 6.7590
[nursery.csv] Epoch 110/300 | Critic Loss: -0.2759 | Generator Loss: 4.8370
[nursery.csv] Epoch 120/300 | Critic Loss: -0.0928 | Generator Loss: 4.7405
[nursery.csv] Epoch 130/300 | Critic Loss: -0.2252 | Generator Loss: 5.1103
[nursery.csv] Epoch 14

Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.4750
LogisticRegression - Accuracy: 0.5250
XGBoost - Accuracy: 0.4500

🧪 Training on Chunk 2/5 of Round 1 (Rows 400 to 800)
RandomForest - Accuracy: 0.5250


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.4625
LogisticRegression - Accuracy: 0.4750
XGBoost - Accuracy: 0.4750

🧪 Training on Chunk 3/5 of Round 1 (Rows 800 to 1200)
RandomForest - Accuracy: 0.5000


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.5500
LogisticRegression - Accuracy: 0.5250
XGBoost - Accuracy: 0.4750

🧪 Training on Chunk 4/5 of Round 1 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.4500


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.4625
LogisticRegression - Accuracy: 0.5000
XGBoost - Accuracy: 0.4500

🧪 Training on Chunk 5/5 of Round 1 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.4750


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.5375
LogisticRegression - Accuracy: 0.6125
XGBoost - Accuracy: 0.5125

🔁 Starting round 2 (Shuffle: Yes)

🧪 Training on Chunk 1/5 of Round 2 (Rows 0 to 400)
RandomForest - Accuracy: 0.4750


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.5625
LogisticRegression - Accuracy: 0.5625
XGBoost - Accuracy: 0.4750

🧪 Training on Chunk 2/5 of Round 2 (Rows 400 to 800)
RandomForest - Accuracy: 0.4375


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.5125
LogisticRegression - Accuracy: 0.4875
XGBoost - Accuracy: 0.5125

🧪 Training on Chunk 3/5 of Round 2 (Rows 800 to 1200)
RandomForest - Accuracy: 0.3875


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.4000
LogisticRegression - Accuracy: 0.3625
XGBoost - Accuracy: 0.4500

🧪 Training on Chunk 4/5 of Round 2 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.5625


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.4875
LogisticRegression - Accuracy: 0.4750
XGBoost - Accuracy: 0.5000

🧪 Training on Chunk 5/5 of Round 2 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.5000
MLPClassifier - Accuracy: 0.5500
LogisticRegression - Accuracy: 0.5875
XGBoost - Accuracy: 0.5375

📊 Final Average Accuracy on SYNTHETIC data:
RandomForest: Average Accuracy = 0.4775
MLPClassifier: Average Accuracy = 0.5000
LogisticRegression: Average Accuracy = 0.5112
XGBoost: Average Accuracy = 0.4837


Parameters: { "use_label_encoder" } are not used.



In [17]:
run_cwgan_pipeline("letter-recognition-2.csv", "letter")
#evaluate_on_real_data("letter-recognition-2.csv", "letter")

[letter-recognition-2.csv] Epoch 10/300 | Critic Loss: -18.5876 | Generator Loss: -17.0938
[letter-recognition-2.csv] Epoch 20/300 | Critic Loss: -7.4032 | Generator Loss: -2.3815
[letter-recognition-2.csv] Epoch 30/300 | Critic Loss: -6.5988 | Generator Loss: -1.1215
[letter-recognition-2.csv] Epoch 40/300 | Critic Loss: -5.4533 | Generator Loss: -1.4681
[letter-recognition-2.csv] Epoch 50/300 | Critic Loss: -2.8873 | Generator Loss: -0.4209
[letter-recognition-2.csv] Epoch 60/300 | Critic Loss: -2.8446 | Generator Loss: 3.0356
[letter-recognition-2.csv] Epoch 70/300 | Critic Loss: -3.5078 | Generator Loss: 1.4259
[letter-recognition-2.csv] Epoch 80/300 | Critic Loss: -1.7420 | Generator Loss: -1.0205
[letter-recognition-2.csv] Epoch 90/300 | Critic Loss: -0.5450 | Generator Loss: 0.1197
[letter-recognition-2.csv] Epoch 100/300 | Critic Loss: -2.6525 | Generator Loss: 2.2407
[letter-recognition-2.csv] Epoch 110/300 | Critic Loss: -1.7055 | Generator Loss: 0.8824
[letter-recognition-2.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.7000
LogisticRegression - Accuracy: 0.6875
XGBoost - Accuracy: 0.6375

🧪 Training on Chunk 2/5 of Round 1 (Rows 400 to 800)
RandomForest - Accuracy: 0.5500


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.6125
LogisticRegression - Accuracy: 0.5625
XGBoost - Accuracy: 0.5500

🧪 Training on Chunk 3/5 of Round 1 (Rows 800 to 1200)
RandomForest - Accuracy: 0.6125


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.6625
LogisticRegression - Accuracy: 0.6750
XGBoost - Accuracy: 0.5875

🧪 Training on Chunk 4/5 of Round 1 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.5750


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.6625
LogisticRegression - Accuracy: 0.6875
XGBoost - Accuracy: 0.6000

🧪 Training on Chunk 5/5 of Round 1 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.6000


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.5500
LogisticRegression - Accuracy: 0.6625
XGBoost - Accuracy: 0.5625

🔁 Starting round 2 (Shuffle: Yes)

🧪 Training on Chunk 1/5 of Round 2 (Rows 0 to 400)
RandomForest - Accuracy: 0.5625


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.6500
LogisticRegression - Accuracy: 0.6375
XGBoost - Accuracy: 0.6000

🧪 Training on Chunk 2/5 of Round 2 (Rows 400 to 800)
RandomForest - Accuracy: 0.6250


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.7125
LogisticRegression - Accuracy: 0.6875
XGBoost - Accuracy: 0.4875

🧪 Training on Chunk 3/5 of Round 2 (Rows 800 to 1200)
RandomForest - Accuracy: 0.6250


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.6250
LogisticRegression - Accuracy: 0.6125
XGBoost - Accuracy: 0.5750

🧪 Training on Chunk 4/5 of Round 2 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.5250


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.6375
LogisticRegression - Accuracy: 0.6875
XGBoost - Accuracy: 0.4875

🧪 Training on Chunk 5/5 of Round 2 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.6125
MLPClassifier - Accuracy: 0.6875
LogisticRegression - Accuracy: 0.6750
XGBoost - Accuracy: 0.5500

📊 Final Average Accuracy on SYNTHETIC data:
RandomForest: Average Accuracy = 0.5900
MLPClassifier: Average Accuracy = 0.6500
LogisticRegression: Average Accuracy = 0.6575
XGBoost: Average Accuracy = 0.5637


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



In [19]:
csv_path = "satellite.csv"
target = "Purpose"  # <-- Change this if your target column has a different name

run_cwgan_pipeline(csv_path, target)

[satellite.csv] Epoch 10/300 | Critic Loss: -474.2088 | Generator Loss: -0.0243
[satellite.csv] Epoch 20/300 | Critic Loss: -1489.9869 | Generator Loss: -0.4603
[satellite.csv] Epoch 30/300 | Critic Loss: -3141.7512 | Generator Loss: -2.6643
[satellite.csv] Epoch 40/300 | Critic Loss: -4848.3130 | Generator Loss: -9.9609
[satellite.csv] Epoch 50/300 | Critic Loss: -8936.9209 | Generator Loss: -27.1223
[satellite.csv] Epoch 60/300 | Critic Loss: -13003.3477 | Generator Loss: -59.3526
[satellite.csv] Epoch 70/300 | Critic Loss: -17558.8945 | Generator Loss: -112.6599
[satellite.csv] Epoch 80/300 | Critic Loss: -19841.2422 | Generator Loss: -207.1697
[satellite.csv] Epoch 90/300 | Critic Loss: -28666.8379 | Generator Loss: -335.4250
[satellite.csv] Epoch 100/300 | Critic Loss: -39275.1719 | Generator Loss: -527.4099
[satellite.csv] Epoch 110/300 | Critic Loss: -44569.0781 | Generator Loss: -834.6372
[satellite.csv] Epoch 120/300 | Critic Loss: -44989.5938 | Generator Loss: -1155.3904
[sat

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



XGBoost - Accuracy: 0.3375

🧪 Training on Chunk 2/5 of Round 1 (Rows 400 to 800)
RandomForest - Accuracy: 0.2500
MLPClassifier - Accuracy: 0.1500


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



LogisticRegression - Accuracy: 0.3625
XGBoost - Accuracy: 0.2250

🧪 Training on Chunk 3/5 of Round 1 (Rows 800 to 1200)
RandomForest - Accuracy: 0.3625
MLPClassifier - Accuracy: 0.2125
LogisticRegression - Accuracy: 0.3125


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



XGBoost - Accuracy: 0.3500

🧪 Training on Chunk 4/5 of Round 1 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.2750


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3000
LogisticRegression - Accuracy: 0.3750
XGBoost - Accuracy: 0.3000

🧪 Training on Chunk 5/5 of Round 1 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.3625


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.4125
LogisticRegression - Accuracy: 0.3500
XGBoost - Accuracy: 0.3000

🔁 Starting round 2 (Shuffle: Yes)

🧪 Training on Chunk 1/5 of Round 2 (Rows 0 to 400)
RandomForest - Accuracy: 0.2750


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.2125
LogisticRegression - Accuracy: 0.3375
XGBoost - Accuracy: 0.2750

🧪 Training on Chunk 2/5 of Round 2 (Rows 400 to 800)
RandomForest - Accuracy: 0.3250


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3125
LogisticRegression - Accuracy: 0.3625
XGBoost - Accuracy: 0.2500

🧪 Training on Chunk 3/5 of Round 2 (Rows 800 to 1200)
RandomForest - Accuracy: 0.3625


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3000
LogisticRegression - Accuracy: 0.4000
XGBoost - Accuracy: 0.3750

🧪 Training on Chunk 4/5 of Round 2 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.2500
MLPClassifier - Accuracy: 0.1750
LogisticRegression - Accuracy: 0.3625
XGBoost - Accuracy: 0.3125

🧪 Training on Chunk 5/5 of Round 2 (Rows 1600 to 2000)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



RandomForest - Accuracy: 0.2750
MLPClassifier - Accuracy: 0.2250
LogisticRegression - Accuracy: 0.4000
XGBoost - Accuracy: 0.2500

📊 Final Average Accuracy on SYNTHETIC data:
RandomForest: Average Accuracy = 0.3062
MLPClassifier: Average Accuracy = 0.2487
LogisticRegression: Average Accuracy = 0.3612
XGBoost: Average Accuracy = 0.2975


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



In [20]:
csv_path = "Bank_Personal_Loan.csv"
target = "Personal Loan"  # <-- Confirm exact spelling

run_cwgan_pipeline(csv_path, target)

[Bank_Personal_Loan.csv] Epoch 10/300 | Critic Loss: -5447.5850 | Generator Loss: -2.0383
[Bank_Personal_Loan.csv] Epoch 20/300 | Critic Loss: -20116.5215 | Generator Loss: -19.5606
[Bank_Personal_Loan.csv] Epoch 30/300 | Critic Loss: -43128.8359 | Generator Loss: -70.6840
[Bank_Personal_Loan.csv] Epoch 40/300 | Critic Loss: -77634.7812 | Generator Loss: -188.8972
[Bank_Personal_Loan.csv] Epoch 50/300 | Critic Loss: -102706.7734 | Generator Loss: -427.1605
[Bank_Personal_Loan.csv] Epoch 60/300 | Critic Loss: -242935.9844 | Generator Loss: -863.9886
[Bank_Personal_Loan.csv] Epoch 70/300 | Critic Loss: -124384.5781 | Generator Loss: -1562.5706
[Bank_Personal_Loan.csv] Epoch 80/300 | Critic Loss: -179174.8281 | Generator Loss: -2125.1792
[Bank_Personal_Loan.csv] Epoch 90/300 | Critic Loss: -237937.1875 | Generator Loss: -2707.3740
[Bank_Personal_Loan.csv] Epoch 100/300 | Critic Loss: -178342.6094 | Generator Loss: -3252.7529
[Bank_Personal_Loan.csv] Epoch 110/300 | Critic Loss: -19687.375

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



LogisticRegression - Accuracy: 0.7125
XGBoost - Accuracy: 0.6750

🧪 Training on Chunk 3/5 of Round 1 (Rows 800 to 1200)
RandomForest - Accuracy: 0.6000
MLPClassifier - Accuracy: 0.5000
LogisticRegression - Accuracy: 0.6500
XGBoost - Accuracy: 0.5125

🧪 Training on Chunk 4/5 of Round 1 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.5625


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.5500
LogisticRegression - Accuracy: 0.6250
XGBoost - Accuracy: 0.5750

🧪 Training on Chunk 5/5 of Round 1 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.6000
MLPClassifier - Accuracy: 0.5875
LogisticRegression - Accuracy: 0.6500
XGBoost - Accuracy: 0.6500

🔁 Starting round 2 (Shuffle: Yes)

🧪 Training on Chunk 1/5 of Round 2 (Rows 0 to 400)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



RandomForest - Accuracy: 0.6000
MLPClassifier - Accuracy: 0.4625
LogisticRegression - Accuracy: 0.5375
XGBoost - Accuracy: 0.6250

🧪 Training on Chunk 2/5 of Round 2 (Rows 400 to 800)
RandomForest - Accuracy: 0.6000
MLPClassifier - Accuracy: 0.5500
LogisticRegression - Accuracy: 0.6250
XGBoost - Accuracy: 0.6000

🧪 Training on Chunk 3/5 of Round 2 (Rows 800 to 1200)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



RandomForest - Accuracy: 0.5500
MLPClassifier - Accuracy: 0.5375
LogisticRegression - Accuracy: 0.5875
XGBoost - Accuracy: 0.4750

🧪 Training on Chunk 4/5 of Round 2 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.5250
MLPClassifier - Accuracy: 0.5250
LogisticRegression - Accuracy: 0.6375
XGBoost - Accuracy: 0.5125

🧪 Training on Chunk 5/5 of Round 2 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.6875
MLPClassifier - Accuracy: 0.5875
LogisticRegression - Accuracy: 0.6250
XGBoost - Accuracy: 0.5875

📊 Final Average Accuracy on SYNTHETIC data:
RandomForest: Average Accuracy = 0.6038
MLPClassifier: Average Accuracy = 0.5513
LogisticRegression: Average Accuracy = 0.6275
XGBoost: Average Accuracy = 0.5813


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



In [22]:
csv_path = "car.csv"
target = "Class"  # Or possibly "Acceptability" – depends on exact column names

run_cwgan_pipeline(csv_path, target)

[car.csv] Epoch 10/300 | Critic Loss: 1.5182 | Generator Loss: -0.1121
[car.csv] Epoch 20/300 | Critic Loss: -1.5539 | Generator Loss: -0.3556
[car.csv] Epoch 30/300 | Critic Loss: -2.4864 | Generator Loss: -0.3204
[car.csv] Epoch 40/300 | Critic Loss: -2.6487 | Generator Loss: -0.4396
[car.csv] Epoch 50/300 | Critic Loss: -2.4681 | Generator Loss: -0.7271
[car.csv] Epoch 60/300 | Critic Loss: -2.2503 | Generator Loss: -0.6793
[car.csv] Epoch 70/300 | Critic Loss: -2.0241 | Generator Loss: -0.0542
[car.csv] Epoch 80/300 | Critic Loss: -1.8537 | Generator Loss: 0.3844
[car.csv] Epoch 90/300 | Critic Loss: -1.9097 | Generator Loss: 0.4858
[car.csv] Epoch 100/300 | Critic Loss: -1.6346 | Generator Loss: 0.6216
[car.csv] Epoch 110/300 | Critic Loss: -1.6748 | Generator Loss: 0.6661
[car.csv] Epoch 120/300 | Critic Loss: -1.7652 | Generator Loss: 0.8484
[car.csv] Epoch 130/300 | Critic Loss: -1.5449 | Generator Loss: 0.6940
[car.csv] Epoch 140/300 | Critic Loss: -1.4842 | Generator Loss: 0.

Parameters: { "use_label_encoder" } are not used.



XGBoost - Accuracy: 0.3125

🧪 Training on Chunk 2/5 of Round 1 (Rows 400 to 800)
RandomForest - Accuracy: 0.4375


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3625
LogisticRegression - Accuracy: 0.3250
XGBoost - Accuracy: 0.3875

🧪 Training on Chunk 3/5 of Round 1 (Rows 800 to 1200)
RandomForest - Accuracy: 0.2625


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3375
LogisticRegression - Accuracy: 0.3500
XGBoost - Accuracy: 0.2750

🧪 Training on Chunk 4/5 of Round 1 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.3000


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3250
LogisticRegression - Accuracy: 0.3125
XGBoost - Accuracy: 0.2625

🧪 Training on Chunk 5/5 of Round 1 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.3500


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3750
LogisticRegression - Accuracy: 0.4250
XGBoost - Accuracy: 0.3000

🔁 Starting round 2 (Shuffle: Yes)

🧪 Training on Chunk 1/5 of Round 2 (Rows 0 to 400)
RandomForest - Accuracy: 0.3375


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3625
LogisticRegression - Accuracy: 0.3250
XGBoost - Accuracy: 0.3125

🧪 Training on Chunk 2/5 of Round 2 (Rows 400 to 800)
RandomForest - Accuracy: 0.3375


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3500
LogisticRegression - Accuracy: 0.3250
XGBoost - Accuracy: 0.3625

🧪 Training on Chunk 3/5 of Round 2 (Rows 800 to 1200)
RandomForest - Accuracy: 0.3625


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3875
LogisticRegression - Accuracy: 0.3750
XGBoost - Accuracy: 0.3000

🧪 Training on Chunk 4/5 of Round 2 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.2875


Parameters: { "use_label_encoder" } are not used.



MLPClassifier - Accuracy: 0.3375
LogisticRegression - Accuracy: 0.4625
XGBoost - Accuracy: 0.2500

🧪 Training on Chunk 5/5 of Round 2 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.3750
MLPClassifier - Accuracy: 0.4125
LogisticRegression - Accuracy: 0.5250
XGBoost - Accuracy: 0.3250

📊 Final Average Accuracy on SYNTHETIC data:
RandomForest: Average Accuracy = 0.3425
MLPClassifier: Average Accuracy = 0.3550
LogisticRegression: Average Accuracy = 0.3787
XGBoost: Average Accuracy = 0.3088


Parameters: { "use_label_encoder" } are not used.



In [24]:
csv_path = "games.csv"
target = "winner"  # Best choice for this dataset

run_cwgan_pipeline(csv_path, target)

[games.csv] Epoch 10/300 | Critic Loss: -1135931.1250 | Generator Loss: -871.4750
[games.csv] Epoch 20/300 | Critic Loss: -5138733.5000 | Generator Loss: -20126.5742
[games.csv] Epoch 30/300 | Critic Loss: -7885935.0000 | Generator Loss: -102314.1484
[games.csv] Epoch 40/300 | Critic Loss: -7948184.5000 | Generator Loss: -205883.0156
[games.csv] Epoch 50/300 | Critic Loss: -7926738.5000 | Generator Loss: -370425.0312
[games.csv] Epoch 60/300 | Critic Loss: -7225608.5000 | Generator Loss: -607534.6250
[games.csv] Epoch 70/300 | Critic Loss: -6899740.5000 | Generator Loss: -902128.0625
[games.csv] Epoch 80/300 | Critic Loss: -6264729.0000 | Generator Loss: -1211364.7500
[games.csv] Epoch 90/300 | Critic Loss: -6226793.5000 | Generator Loss: -1767550.5000
[games.csv] Epoch 100/300 | Critic Loss: -5446475.5000 | Generator Loss: -2187105.5000
[games.csv] Epoch 110/300 | Critic Loss: -4845916.0000 | Generator Loss: -2678300.2500
[games.csv] Epoch 120/300 | Critic Loss: -3995768.2500 | Genera

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



RandomForest - Accuracy: 0.8125
MLPClassifier - Accuracy: 0.3125
LogisticRegression - Accuracy: 0.7750
XGBoost - Accuracy: 0.7875

🧪 Training on Chunk 3/5 of Round 1 (Rows 800 to 1200)
RandomForest - Accuracy: 0.7750
MLPClassifier - Accuracy: 0.2875
LogisticRegression - Accuracy: 0.7375
XGBoost - Accuracy: 0.7625

🧪 Training on Chunk 4/5 of Round 1 (Rows 1200 to 1600)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



RandomForest - Accuracy: 0.8625
MLPClassifier - Accuracy: 0.3625
LogisticRegression - Accuracy: 0.6625
XGBoost - Accuracy: 0.8125

🧪 Training on Chunk 5/5 of Round 1 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.7375
MLPClassifier - Accuracy: 0.3750
LogisticRegression - Accuracy: 0.6875
XGBoost - Accuracy: 0.7250

🔁 Starting round 2 (Shuffle: Yes)

🧪 Training on Chunk 1/5 of Round 2 (Rows 0 to 400)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



RandomForest - Accuracy: 0.7375
MLPClassifier - Accuracy: 0.3375
LogisticRegression - Accuracy: 0.6250
XGBoost - Accuracy: 0.7500

🧪 Training on Chunk 2/5 of Round 2 (Rows 400 to 800)
RandomForest - Accuracy: 0.7000
MLPClassifier - Accuracy: 0.2750
LogisticRegression - Accuracy: 0.6375
XGBoost - Accuracy: 0.7250

🧪 Training on Chunk 3/5 of Round 2 (Rows 800 to 1200)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.



RandomForest - Accuracy: 0.8500
MLPClassifier - Accuracy: 0.4875
LogisticRegression - Accuracy: 0.7125
XGBoost - Accuracy: 0.7750

🧪 Training on Chunk 4/5 of Round 2 (Rows 1200 to 1600)
RandomForest - Accuracy: 0.8625
MLPClassifier - Accuracy: 0.3625
LogisticRegression - Accuracy: 0.7250
XGBoost - Accuracy: 0.8500

🧪 Training on Chunk 5/5 of Round 2 (Rows 1600 to 2000)
RandomForest - Accuracy: 0.8125
MLPClassifier - Accuracy: 0.3625
LogisticRegression - Accuracy: 0.6625
XGBoost - Accuracy: 0.7875

📊 Final Average Accuracy on SYNTHETIC data:
RandomForest: Average Accuracy = 0.7900
MLPClassifier: Average Accuracy = 0.3562
LogisticRegression: Average Accuracy = 0.6975
XGBoost: Average Accuracy = 0.7787


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

