# Train MedGAN with 5-Fold Cross Validation and 7 Runs

In [None]:

import numpy as np
import tensorflow as tf
from sklearn.model_selection import KFold
from data_loader import load_shuttle_data
from medgan_model import Medgan


In [None]:

# Load dataset
csv_path = "path/to/your/shuttle.csv"  # <-- Replace with your dataset path
X_train, X_test = load_shuttle_data(csv_path)
print("Data loaded successfully.")


In [None]:

# Define parameters
k_folds = 5
n_runs = 7
epochs = 50
batch_size = 128
learning_rate = 0.001

print("Parameters set:")
print(f"  Folds: {k_folds}")
print(f"  Runs: {n_runs}")
print(f"  Epochs per fold: {epochs}")


In [None]:

# Initialize lists to collect losses
all_run_ae_losses = []
all_run_d_losses = []
all_run_g_losses = []

# Start 7 runs
for run in range(n_runs):
    print(f"\n===== Starting Run {run+1} of {n_runs} =====")
    
    # Prepare 5-Fold cross-validation
    kf = KFold(n_splits=k_folds, shuffle=True, random_state=run)
    
    fold_ae_losses = []
    fold_d_losses = []
    fold_g_losses = []
    
    # Start folds
    for fold, (train_idx, val_idx) in enumerate(kf.split(X_train)):
        print(f"  - Fold {fold+1} of {k_folds}")
        
        # Split data
        X_tr, X_val = X_train[train_idx], X_train[val_idx]
        
        # Initialize a fresh MedGAN model
        model = Medgan(input_dim=X_train.shape[1])
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        
        # Train model
        for epoch in range(epochs):
            random_data = np.random.normal(size=(X_tr.shape[0], model.random_dim))
            
            with tf.GradientTape(persistent=True) as tape:
                ae_loss, d_loss, g_loss = model.train_step(X_tr, random_data)
            
            ae_vars = model.encoder.trainable_variables + model.decoder.trainable_variables
            d_vars = model.discriminator.trainable_variables
            g_vars = model.generator.trainable_variables
            
            optimizer.apply_gradients(zip(tape.gradient(ae_loss, ae_vars), ae_vars))
            optimizer.apply_gradients(zip(tape.gradient(d_loss, d_vars), d_vars))
            optimizer.apply_gradients(zip(tape.gradient(g_loss, g_vars), g_vars))
        
        # Validate model
        random_data_val = np.random.normal(size=(X_val.shape[0], model.random_dim))
        ae_loss_val, d_loss_val, g_loss_val = model.train_step(X_val, random_data_val)
        
        fold_ae_losses.append(ae_loss_val.numpy())
        fold_d_losses.append(d_loss_val.numpy())
        fold_g_losses.append(g_loss_val.numpy())
    
    # After all folds, average losses for this run
    run_ae_loss = np.mean(fold_ae_losses)
    run_d_loss = np.mean(fold_d_losses)
    run_g_loss = np.mean(fold_g_losses)
    
    all_run_ae_losses.append(run_ae_loss)
    all_run_d_losses.append(run_d_loss)
    all_run_g_losses.append(run_g_loss)


In [None]:

# Final average losses across all 7 runs
final_ae_loss = np.mean(all_run_ae_losses)
final_d_loss = np.mean(all_run_d_losses)
final_g_loss = np.mean(all_run_g_losses)

print("""
=======================================
Final Results after 7 Runs and 5-Fold CV
=======================================""")
print(f"Autoencoder Loss (AE): {final_ae_loss:.4f}")
print(f"Discriminator Loss (D): {final_d_loss:.4f}")
print(f"Generator Loss (G): {final_g_loss:.4f}")
