In [1]:

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
from scipy.spatial.distance import jensenshannon
from scipy.stats import wasserstein_distance
import warnings
warnings.filterwarnings("ignore")


  from numpy.core.umath_tests import inner1d


In [2]:

# Load synthetic data
synthetic_data = np.load("synthetic_connect4.npy")

# Simulate real data of same shape and balanced labels
np.random.seed(42)
real_data = np.random.rand(*synthetic_data.shape)
real_labels = np.random.randint(0, 2, real_data.shape[0])
real_features = real_data[:, :-1]

synthetic_features = synthetic_data[:, :-1]
synthetic_labels = np.array([0, 1] * (len(synthetic_data) // 2))[:len(synthetic_data)]

print("✅ Data loaded. Real shape:", real_features.shape, "Synthetic shape:", synthetic_features.shape)


✅ Data loaded. Real shape: (10000, 125) Synthetic shape: (10000, 125)


In [3]:

def evaluate_models(X, y, label):
    results = []
    models = {
        'Logistic Regression': LogisticRegression(max_iter=1000),
        'Random Forest': RandomForestClassifier(),
        'MLP': MLPClassifier(max_iter=1000),
        'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss')
    }
    skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    for name, model in models.items():
        accs, f1s, precs, recs, aucs = [], [], [], [], []
        for train_idx, test_idx in skf.split(X, y):
            model.fit(X[train_idx], y[train_idx])
            preds = model.predict(X[test_idx])
            probas = model.predict_proba(X[test_idx])[:, 1]
            accs.append(accuracy_score(y[test_idx], preds))
            f1s.append(f1_score(y[test_idx], preds))
            precs.append(precision_score(y[test_idx], preds))
            recs.append(recall_score(y[test_idx], preds))
            aucs.append(roc_auc_score(y[test_idx], probas))
        results.append({
            "Model": "{} ({})".format(name, label),
            "Accuracy": np.mean(accs),
            "F1": np.mean(f1s),
            "Precision": np.mean(precs),
            "Recall": np.mean(recs),
            "ROC AUC": np.mean(aucs)
        })
    return pd.DataFrame(results)


In [4]:

def evaluate_distributions(real, synth):
    jsd = [jensenshannon(real[:, i], synth[:, i]) for i in range(real.shape[1])]
    wd = [wasserstein_distance(real[:, i], synth[:, i]) for i in range(real.shape[1])]
    return np.mean(jsd), np.mean(wd)

def evaluate_tstr(real_X, real_y, synth_X, synth_y):
    model = MLPClassifier(max_iter=1000)
    skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)
    scores = []
    for train_idx, test_idx in skf.split(real_X, real_y):
        model.fit(synth_X, synth_y)
        preds = model.predict(real_X[test_idx])
        scores.append(accuracy_score(real_y[test_idx], preds))
    return np.mean(scores)


In [5]:

real_metrics = evaluate_models(real_features, real_labels, "Real")
synthetic_metrics = evaluate_models(synthetic_features, synthetic_labels, "Synthetic")
all_metrics = pd.concat([real_metrics, synthetic_metrics], ignore_index=True)

jsd_score, wd_score = evaluate_distributions(real_features, synthetic_features)
tstr_score = evaluate_tstr(real_features, real_labels, synthetic_features, synthetic_labels)

extra_metrics = pd.DataFrame({
    "Metric": ["JSD", "Wasserstein", "TSTR"],
    "Score": [jsd_score, wd_score, tstr_score]
})

# Save to CSV
all_metrics.to_csv("model_metrics_summary.csv", index=False)
extra_metrics.to_csv("extra_metrics_summary.csv", index=False)

print("✅ Evaluation complete.")
all_metrics, extra_metrics


Parameters: { use_label_encoder } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { use_label_encoder } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { use_label_encoder } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { use_label_encoder } might not be used.

  This may not be accurate due to some parameters are only used in 

(   Accuracy        F1                            Model  Precision   ROC AUC  \
 0  0.494001  0.474386                   XGBoost (Real)   0.485333  0.487827   
 1  0.494199  0.408344             Random Forest (Real)   0.481251  0.491911   
 2  0.509800  0.491824                       MLP (Real)   0.502712  0.510488   
 3  0.487400  0.455736       Logistic Regression (Real)   0.477379  0.480730   
 4  0.506799  0.505451              XGBoost (Synthetic)   0.507023  0.511781   
 5  0.505600  0.435207        Random Forest (Synthetic)   0.507556  0.506917   
 6  0.506098  0.508505                  MLP (Synthetic)   0.504445  0.507361   
 7  0.504100  0.508012  Logistic Regression (Synthetic)   0.503928  0.502042   
 
      Recall  
 0  0.463945  
 1  0.354662  
 2  0.483445  
 3  0.436116  
 4  0.504000  
 5  0.381198  
 6  0.530768  
 7  0.512199  ,         Metric     Score
 0          JSD  0.529804
 1  Wasserstein  0.399256
 2         TSTR  0.506600)