# Final 7-Metric Evaluation (CSV-based Pipeline)
This notebook takes a **CSV file** as input and evaluates the synthetic data using:
- Logistic Regression (LR)
- Random Forest (RF)
- MLP
- XGBoost (XGBT)
- Jensen-Shannon Divergence (JSD)
- Wasserstein Distance (WD)
- TSTR (Train on Synthetic, Test on Real)

**Setup:**
- 3 runs of 2-fold cross-validation
- Uses 50% of data rows for evaluation

In [1]:

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from scipy.spatial.distance import jensenshannon
from scipy.stats import wasserstein_distance
import warnings
warnings.filterwarnings("ignore")


In [3]:

# Load CSV file instead of .npy
# Replace 'your_synthetic.csv' with your actual file name
df = pd.read_csv('synthetic_credit.csv')

# Convert to numpy
synthetic_data = df.values.astype(np.float32)

# Use 50% of the data
half_size = synthetic_data.shape[0] // 2
synthetic_data = synthetic_data[:half_size]
synthetic_labels = np.random.randint(0, 2, size=half_size)

# Dummy real data for JSD, WD, and TSTR
np.random.seed(42)
real_data = np.random.rand(*synthetic_data.shape)
real_labels = np.random.randint(0, 2, size=real_data.shape[0])


In [4]:

def evaluate_distributions(real, synth):
    jsd = [jensenshannon(real[:, i], synth[:, i]) for i in range(real.shape[1])]
    wd = [wasserstein_distance(real[:, i], synth[:, i]) for i in range(real.shape[1])]
    return np.mean(jsd), np.mean(wd)


In [5]:

def evaluate_tstr(real_X, real_y, synth_X, synth_y):
    model = MLPClassifier(max_iter=1000)
    skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)
    scores = []
    for train_idx, test_idx in skf.split(real_X, real_y):
        model.fit(synth_X, synth_y)
        preds = model.predict(real_X[test_idx])
        scores.append(accuracy_score(real_y[test_idx], preds))
    return np.mean(scores)


In [6]:

def evaluate_all_metrics(X, y, seed=42):
    models = {
        "LR": LogisticRegression(max_iter=500),
        "MLP": MLPClassifier(max_iter=500),
        "RF": RandomForestClassifier(),
        "XGBT": XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    }

    skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=seed)
    metrics_summary = {name: {"ACC": [], "F1": [], "Precision": [], "Recall": [], "AUC": []} for name in models}

    for repeat in range(3):
        for model_name, model in models.items():
            for train_idx, test_idx in skf.split(X, y):
                model.fit(X[train_idx], y[train_idx])
                preds = model.predict(X[test_idx])
                probas = model.predict_proba(X[test_idx])[:, 1]

                metrics_summary[model_name]["ACC"].append(accuracy_score(y[test_idx], preds))
                metrics_summary[model_name]["F1"].append(f1_score(y[test_idx], preds))
                metrics_summary[model_name]["Precision"].append(precision_score(y[test_idx], preds))
                metrics_summary[model_name]["Recall"].append(recall_score(y[test_idx], preds))
                metrics_summary[model_name]["AUC"].append(roc_auc_score(y[test_idx], probas))

    results = []
    for model_name in models:
        results.append({
            "Model": model_name,
            "ACC": np.mean(metrics_summary[model_name]["ACC"]),
            "F1": np.mean(metrics_summary[model_name]["F1"]),
            "Precision": np.mean(metrics_summary[model_name]["Precision"]),
            "Recall": np.mean(metrics_summary[model_name]["Recall"]),
            "AUC": np.mean(metrics_summary[model_name]["AUC"])
        })

    jsd, wd = evaluate_distributions(real_data, X)
    tstr = evaluate_tstr(real_data, real_labels, X, y)

    results.append({"Model": "JSD", "ACC": None, "F1": None, "Precision": None, "Recall": None, "AUC": jsd})
    results.append({"Model": "WD", "ACC": None, "F1": None, "Precision": None, "Recall": None, "AUC": wd})
    results.append({"Model": "TSTR", "ACC": None, "F1": None, "Precision": None, "Recall": None, "AUC": tstr})

    return pd.DataFrame(results)


In [7]:
evaluation_df = evaluate_all_metrics(synthetic_data, synthetic_labels)
evaluation_df


Unnamed: 0,Model,ACC,F1,Precision,Recall,AUC
0,LR,0.5054,0.515184,0.506375,0.524342,0.498174
1,MLP,0.516267,0.51617,0.517732,0.515297,0.518479
2,RF,0.507733,0.501504,0.509181,0.49428,0.510626
3,XGBT,0.5018,0.505819,0.502996,0.508779,0.505236
4,JSD,,,,,0.336118
5,WD,,,,,0.446122
6,TSTR,,,,,0.4914
