In [None]:
# train_medgan.ipynb (extended with classifier evaluation)

from medgan_model import Medgan
from data_loader import load_shuttle_data
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
import pandas as pd

In [None]:
# --- Load and preprocess real shuttle data ---
df = pd.read_csv("datasets/shuttle.csv", header=None)
X_real = df.iloc[:, :-1]
y_real = df.iloc[:, -1]

In [None]:
# Encode target for XGBClassifier
le = LabelEncoder()
y_encoded = le.fit_transform(y_real)

In [None]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_real)

In [None]:
# Train-test split
X_train_real, X_test_real, y_train_real, y_test_real = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# --- Load data without labels for GAN training ---
X_train, X_test = load_shuttle_data("datasets/shuttle.csv")
input_dim = X_train.shape[1]

In [None]:
# --- Initialize and Train MedGAN ---
medgan = Medgan(input_dim=input_dim, ae_loss_type='bce')
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002)
batch_size = 64
n_epochs = 10

for epoch in range(n_epochs):
    np.random.shuffle(X_train)
    for i in range(0, len(X_train), batch_size):
        batch = X_train[i:i + batch_size]
        noise = np.random.normal(size=(batch.shape[0], medgan.random_dim))
        medgan.train_step(batch, noise)
    print(f"Epoch {epoch+1}/{n_epochs} completed")

In [None]:
# --- Generate Synthetic Data ---
synthetic_data = medgan.generate_data(num_samples=len(X_train_real))
synthetic_data_scaled = scaler.transform(synthetic_data)  # Scale similarly


In [None]:
# --- Classifier Evaluation ---
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "MLP Classifier": MLPClassifier(max_iter=300, random_state=42),
    "XGB Classifier": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=300, random_state=42)
}

results = {}
for name, model in models.items():
    model.fit(X_train_real, y_train_real)
    y_pred = model.predict(X_test_real)
    results[name] = {
        "Accuracy": accuracy_score(y_test_real, y_pred),
        "Precision": precision_score(y_test_real, y_pred, average='macro', zero_division=0),
        "Recall": recall_score(y_test_real, y_pred, average='macro', zero_division=0),
        "F1 Score": f1_score(y_test_real, y_pred, average='macro', zero_division=0)
    }

results_df = pd.DataFrame(results).T
print("\nClassifier Evaluation on Real Data:")
print(results_df)
