# 03 â€” Evaluation

Evaluate and compare generative models using statistical tests, stylized facts, TSTR, and privacy metrics.

In [None]:
import sys
sys.path.insert(0, '../src')

import numpy as np
import matplotlib.pyplot as plt

from synfin.evaluation.statistical_tests import ks_test, mmd_rbf
from synfin.evaluation.stylized_facts import check_all_stylized_facts
from synfin.evaluation.tstr import tstr_benchmark
from synfin.evaluation.metrics import compute_all_metrics

print('Evaluation tools loaded!')

In [None]:
# Create dummy real and synthetic data for demonstration
np.random.seed(42)
real = np.random.randn(300, 30, 8).astype(np.float32)
synthetic = np.random.randn(300, 30, 8).astype(np.float32) * 1.05

feature_names = ['Open', 'High', 'Low', 'Close', 'Volume', 'LogReturn', 'LogVolume', 'DollarVolume']

# Run KS tests
real_flat = real.reshape(-1, 8)
synth_flat = synthetic.reshape(-1, 8)
ks_results = ks_test(real_flat, synth_flat, feature_names)

print('KS Test Results:')
for feat, res in ks_results.items():
    status = 'PASS' if res['p_value'] > 0.05 else 'FAIL'
    print(f'  {feat:20s}: stat={res["statistic"]:.3f}, p={res["p_value"]:.3f} [{status}]')

In [None]:
# MMD
mmd = mmd_rbf(real_flat, synth_flat)
print(f'MMD: {mmd:.4f}')

# Full evaluation
report = compute_all_metrics(real, synthetic, feature_names=feature_names, run_tstr=True)
print(f'\nOverall Realism Score: {report["realism_score"]:.3f}')