In [9]:
# Cell 1: Setup and Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, Markdown
import warnings
warnings.filterwarnings('ignore')

# Time series analysis
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.stattools import acf, pacf, adfuller, kpss
from statsmodels.stats.diagnostic import acorr_ljungbox
from arch import arch_model
from pygam import LinearGAM, s

# Statistical tests
from scipy import stats
from scipy.signal import periodogram, find_peaks
from scipy.stats import ks_2samp, anderson, jarque_bera, entropy, wasserstein_distance

# Machine learning
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import Ridge, Lasso
from sklearn.neural_network import MLPRegressor
from sklearn.feature_selection import mutual_info_regression

# Advanced time series
try:
    import nolds  # for Hurst exponent
except ImportError:
    print("Warning: nolds not available. Some non-linear metrics will be skipped.")
    nolds = None

try:
    from pyinform import mutual_info, transfer_entropy
    PYINFORM_AVAILABLE = True
except (ImportError, OSError):
    print("Warning: pyinform not available. Using alternative mutual information calculation.")
    PYINFORM_AVAILABLE = False

# Visualization
from matplotlib.gridspec import GridSpec
import matplotlib.patches as mpatches

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
np.random.seed(42)



In [10]:
# Cell 2: Enhanced Data Loading and Preprocessing
class TimeSeriesDataHandler:
    def __init__(self, data_path=None, synthetic_data=None):
        if data_path:
            try:
                # Try to read the CSV file
                self.df = pd.read_csv(data_path)
                
                # Handle different possible column names for temperature
                temp_columns = ['T (degC)', 'temperature', 'temp', 'Temperature', 'TEMP']
                date_columns = ['date', 'Date', 'DATE', 'time', 'Time', 'TIME', 'datetime']
                
                # Find temperature column
                temp_col = None
                for col in temp_columns:
                    if col in self.df.columns:
                        temp_col = col
                        break
                
                if temp_col is None:
                    print(f"Available columns: {list(self.df.columns)}")
                    # Use the first numeric column
                    numeric_cols = self.df.select_dtypes(include=[np.number]).columns
                    if len(numeric_cols) > 0:
                        temp_col = numeric_cols[0]
                        print(f"Using column '{temp_col}' as temperature data")
                    else:
                        raise ValueError("No numeric columns found in the CSV file")
                
                # Try to parse dates if available
                date_col = None
                for col in date_columns:
                    if col in self.df.columns:
                        date_col = col
                        break
                
                if date_col:
                    try:
                        self.df[date_col] = pd.to_datetime(self.df[date_col])
                        self.df = self.df.set_index(date_col)
                    except:
                        print(f"Could not parse dates from column '{date_col}'")
                
                self.y = self.df[temp_col].values
                
                # Remove any NaN values
                self.y = self.y[~np.isnan(self.y)]
                
            except Exception as e:
                print(f"Error loading data from {data_path}: {e}")
                print("Generating synthetic example data instead...")
                self.y = self._generate_example_data()
                
        elif synthetic_data is not None:
            self.y = synthetic_data
        else:
            # Generate example data if no path provided
            self.y = self._generate_example_data()
        
        self.n = len(self.y)
        self.train_end = int(0.8 * self.n)
        self.y_train = self.y[:self.train_end]
        self.y_test = self.y[self.train_end:]
        self.time = np.arange(self.n)
        
    def _generate_example_data(self, n=2000):
        """Generate example temperature-like data"""
        t = np.arange(n)
        trend = 15 + 0.001 * t  # Slow warming trend
        season = 10 * np.sin(2 * np.pi * t / 365.25)  # Annual cycle
        daily = 3 * np.sin(2 * np.pi * t / 1)  # Daily cycle
        noise = np.random.normal(0, 2, n)
        
        # Add some non-linear dynamics
        ar_noise = np.zeros(n)
        for i in range(1, n):
            ar_noise[i] = 0.7 * ar_noise[i-1] + noise[i]
        
        return trend + season + daily + ar_noise
    
    def get_train_test_split(self):
        return self.y_train, self.y_test, self.time[:self.train_end], self.time[self.train_end:]

# Initialize data handler with real data path
data_path = './data/weather/temp.csv'
data_handler = TimeSeriesDataHandler(data_path=data_path)
y_train, y_test, time_train, time_test = data_handler.get_train_test_split()

print(f"Data shape: {data_handler.n} observations")
print(f"Training: {len(y_train)}, Testing: {len(y_test)}")
print(f"Temperature range: [{y_train.min():.1f}, {y_train.max():.1f}] °C")

Data shape: 52696 observations
Training: 42156, Testing: 10540
Temperature range: [-6.4, 34.8] °C


In [None]:
# Cell 3: Enhanced Synthetic Data Generator
class AdvancedSyntheticGenerator:
    def __init__(self, y_train, time_train):
        self.y_train = y_train
        self.time_train = time_train
        self.n_train = len(y_train)
        self.models = {}
        
    def fit(self):
        """Fit all component models"""
        print("Fitting synthetic data generator...")
        
        # 1. STL Decomposition
        print("  - STL decomposition...")
        # Try multiple periods to find the best one
        periods_to_try = [365, 144, 24, 12, 7]
        best_period = None
        
        for period in periods_to_try:
            if period < len(self.y_train) / 2:
                try:
                    self.stl = STL(self.y_train, period=period, robust=True)
                    self.stl_result = self.stl.fit()
                    best_period = period
                    print(f"    Using period={period}")
                    break
                except:
                    continue
        
        if best_period is None:
            # Fallback to simple decomposition
            print("    STL failed, using simple decomposition")
            window = min(365, len(self.y_train) // 4)
            self.stl_result = self._simple_decomposition(window)
        
        # 2. Trend modeling with polynomial or spline
        print("  - Trend model...")
        self._fit_trend()
        
        # 3. Multiple seasonality with Fourier terms
        print("  - Multi-scale seasonality...")
        self._fit_seasonality()
        
        # 4. Residual modeling - GARCH + Jump diffusion
        print("  - GARCH model for residuals...")
        residuals = self.stl_result.resid
        
        # Remove NaN values for GARCH fitting
        clean_residuals = residuals[~np.isnan(residuals)]
        self.residual_std = np.std(clean_residuals)  # Always store this
        
        try:
            self.garch = arch_model(clean_residuals, vol='Garch', p=1, q=1, dist='normal')
            self.garch_result = self.garch.fit(disp='off')
        except:
            print("    GARCH fitting failed, using simple volatility model")
            self.garch_result = None
        
        # 5. Extreme value model
        print("  - Extreme value model...")
        self.threshold = np.percentile(np.abs(clean_residuals), 95)
        self.extremes = clean_residuals[np.abs(clean_residuals) > self.threshold]
        self.extreme_rate = len(self.extremes) / len(clean_residuals)
        
        # 6. Regime detection
        print("  - Regime detection...")
        self._detect_regimes()
        
        print("Generator fitting complete!")
    
    def _simple_decomposition(self, window):
        """Simple decomposition when STL fails"""
        from collections import namedtuple
        
        # Trend: moving average
        trend = pd.Series(self.y_train).rolling(window=window, center=True).mean().values
        
        # Detrend
        detrended = self.y_train - trend
        detrended_clean = detrended[~np.isnan(detrended)]
        
        # Seasonal: average by position in cycle
        if len(detrended_clean) > window:
            seasonal = np.zeros_like(self.y_train)
            for i in range(window):
                indices = np.arange(i, len(self.y_train), window)
                if len(indices) > 0:
                    seasonal[indices] = np.nanmean(detrended[indices])
        else:
            seasonal = np.zeros_like(self.y_train)
        
        # Residual
        resid = self.y_train - trend - seasonal
        
        STLResult = namedtuple('STLResult', ['trend', 'seasonal', 'resid'])
        return STLResult(trend=trend, seasonal=seasonal, resid=resid)
    
    def _fit_trend(self):
        """Fit trend using polynomial regression"""
        # Remove NaN values
        mask = ~np.isnan(self.stl_result.trend)
        x = self.time_train[mask]
        y = self.stl_result.trend[mask]
        
        # Fit polynomial (degree 3 for flexibility)
        self.trend_poly = np.polyfit(x, y, deg=3)
        
    def _fit_seasonality(self):
        """Fit multiple seasonality components"""
        # Potential seasonal periods (in descending order)
        self.seasonal_periods = []
        
        # Detect periods using periodogram
        from scipy.signal import periodogram
        frequencies, power = periodogram(self.stl_result.seasonal[~np.isnan(self.stl_result.seasonal)])
        
        # Find peaks in power spectrum
        peaks, _ = find_peaks(power, height=np.percentile(power, 90))
        
        if len(peaks) > 0:
            # Convert peak frequencies to periods
            for peak in peaks[:3]:  # Top 3 peaks
                if frequencies[peak] > 0:
                    period = int(1 / frequencies[peak])
                    if 2 <= period <= len(self.y_train) // 2:
                        self.seasonal_periods.append(period)
        
        # Default periods if none detected
        if len(self.seasonal_periods) == 0:
            self.seasonal_periods = [365, 7, 1]
        
        # Fit Fourier series for each period
        self.seasonal_models = {}
        seasonal_clean = self.stl_result.seasonal[~np.isnan(self.stl_result.seasonal)]
        time_clean = self.time_train[~np.isnan(self.stl_result.seasonal)]
        
        for period in self.seasonal_periods:
            n_harmonics = min(3, period // 2)
            fourier_features = []
            
            for k in range(1, n_harmonics + 1):
                fourier_features.append(np.sin(2 * np.pi * k * time_clean / period))
                fourier_features.append(np.cos(2 * np.pi * k * time_clean / period))
            
            X_fourier = np.column_stack(fourier_features)
            
            # Fit using Ridge regression
            model = Ridge(alpha=0.1)
            model.fit(X_fourier, seasonal_clean)
            self.seasonal_models[period] = (model, n_harmonics)
        
    def _detect_regimes(self):
        """Detect regime changes in volatility"""
        residuals = self.stl_result.resid[~np.isnan(self.stl_result.resid)]
        rolling_std = pd.Series(residuals).rolling(30).std()
        self.volatility_regimes = (rolling_std > rolling_std.median()).astype(int)
        
    def generate(self, n_samples, include_extremes=True):
        """Generate synthetic time series"""
        time_full = np.arange(n_samples)
        
        # 1. Generate trend
        trend = np.polyval(self.trend_poly, time_full)
        
        # 2. Generate multi-scale seasonality
        seasonal = np.zeros(n_samples)
        
        for period, (model, n_harmonics) in self.seasonal_models.items():
            fourier_features = []
            
            for k in range(1, n_harmonics + 1):
                fourier_features.append(np.sin(2 * np.pi * k * time_full / period))
                fourier_features.append(np.cos(2 * np.pi * k * time_full / period))
            
            X_fourier = np.column_stack(fourier_features)
            seasonal += model.predict(X_fourier) / len(self.seasonal_periods)
        
        # 3. Generate residuals
        if self.garch_result is not None:
            # GARCH residuals
            try:
                # Use the forecast method to simulate
                forecasts = self.garch_result.forecast(horizon=n_samples, method='simulation')
                residuals = np.zeros(n_samples)
                
                # Generate using the conditional variance
                cond_var = self.garch_result.conditional_volatility[-1]
                
                for i in range(n_samples):
                    # Simple GARCH(1,1) simulation
                    if i == 0:
                        residuals[i] = np.random.normal(0, cond_var)
                    else:
                        # Update conditional variance
                        omega = self.garch_result.params['omega']
                        alpha = self.garch_result.params['alpha[1]']
                        beta = self.garch_result.params['beta[1]']
                        
                        cond_var = omega + alpha * residuals[i-1]**2 + beta * cond_var**2
                        cond_var = np.sqrt(max(cond_var, 1e-6))  # Ensure positive
                        residuals[i] = np.random.normal(0, cond_var)
                        
            except:
                # Fallback to simple volatility clustering
                residuals = np.random.normal(0, self.residual_std, n_samples)
                # Add some volatility clustering
                for i in range(1, n_samples):
                    if np.abs(residuals[i-1]) > 2 * self.residual_std:
                        residuals[i] *= 1.5  # Higher volatility after extreme values
        else:
            # Simple random residuals
            residuals = np.random.normal(0, self.residual_std, n_samples)
        
        # 4. Add extreme events
        if include_extremes and len(self.extremes) > 0:
            n_extremes = int(n_samples * self.extreme_rate)
            if n_extremes > 0:
                extreme_indices = np.random.choice(n_samples, n_extremes, replace=False)
                extreme_values = np.random.choice(self.extremes, n_extremes, replace=True)
                residuals[extreme_indices] = extreme_values
        
        # 5. Combine components
        synthetic = trend + seasonal + residuals
        
        return synthetic
    
    def get_components(self):
        """Return fitted components for visualization"""
        return {
            'trend': self.stl_result.trend,
            'seasonal': self.stl_result.seasonal,
            'residual': self.stl_result.resid
        }

# Fit the generator
generator = AdvancedSyntheticGenerator(y_train, time_train)
generator.fit()

# Generate synthetic data
y_synth_full = generator.generate(len(data_handler.y))
y_synth_test = y_synth_full[data_handler.train_end:]

Fitting synthetic data generator...
  - STL decomposition...
    Using period=365
  - Trend model...
  - Multi-scale seasonality...
  - GARCH model for residuals...
  - Extreme value model...
  - Regime detection...
Generator fitting complete!


In [None]:
# Cell 4: Comprehensive Evaluation Framework
class ComprehensiveEvaluator:
    def __init__(self, y_real, y_synth, y_real_train=None):
        self.y_real = y_real
        self.y_synth = y_synth
        self.y_real_train = y_real_train
        self.results = {}
        
    def evaluate_all(self):
        """Run all evaluation metrics"""
        print("Running comprehensive evaluation...")
        
        # 1. Distributional tests
        self._evaluate_distributions()
        
        # 2. Temporal structure tests
        self._evaluate_temporal_structure()
        
        # 3. Spectral analysis
        self._evaluate_spectral_properties()
        
        # 4. Extreme value analysis
        self._evaluate_extremes()
        
        # 5. Non-linear dynamics
        self._evaluate_nonlinear_dynamics()
        
        # 6. Predictive utility
        self._evaluate_predictive_utility()
        
        # 7. Domain-specific tests
        self._evaluate_domain_constraints()
        
        # 8. Machine learning discriminability
        self._evaluate_ml_discriminability()
        
        return self.results
    
    def _evaluate_distributions(self):
        """Test marginal and joint distributions"""
        print("  1. Distributional tests...")
        
        # Basic statistics
        self.results['mean_diff'] = np.abs(np.mean(self.y_real) - np.mean(self.y_synth))
        self.results['std_diff'] = np.abs(np.std(self.y_real) - np.std(self.y_synth))
        self.results['skew_diff'] = np.abs(stats.skew(self.y_real) - stats.skew(self.y_synth))
        self.results['kurt_diff'] = np.abs(stats.kurtosis(self.y_real) - stats.kurtosis(self.y_synth))
        
        # Statistical tests
        ks_stat, ks_pval = ks_2samp(self.y_real, self.y_synth)
        self.results['ks_statistic'] = ks_stat
        self.results['ks_pvalue'] = ks_pval
        
        # Anderson-Darling test
        ad_result = stats.anderson_ksamp([self.y_real, self.y_synth])
        self.results['anderson_statistic'] = ad_result.statistic
        self.results['anderson_pvalue'] = ad_result.significance_level
        
        # Wasserstein distance
        self.results['wasserstein_distance'] = wasserstein_distance(self.y_real, self.y_synth)
        
        # Jarque-Bera normality test
        jb_real = jarque_bera(self.y_real)
        jb_synth = jarque_bera(self.y_synth)
        self.results['jb_real'] = jb_real.statistic
        self.results['jb_synth'] = jb_synth.statistic
        
    def _evaluate_temporal_structure(self):
        """Test temporal dependencies"""
        print("  2. Temporal structure tests...")
        
        # ACF comparison
        max_lag = min(50, len(self.y_real) // 4)
        acf_real = acf(self.y_real, nlags=max_lag, fft=True)
        acf_synth = acf(self.y_synth, nlags=max_lag, fft=True)
        
        # Weighted ACF difference
        weights = 1 / (np.arange(max_lag + 1) + 1)
        self.results['weighted_acf_l2'] = np.sum(weights * (acf_real - acf_synth)**2)
        
        # Ljung-Box test for autocorrelation
        lb_real = acorr_ljungbox(self.y_real, lags=20, return_df=True)
        lb_synth = acorr_ljungbox(self.y_synth, lags=20, return_df=True)
        self.results['ljungbox_real_pval'] = lb_real['lb_pvalue'].min()
        self.results['ljungbox_synth_pval'] = lb_synth['lb_pvalue'].min()
        
        # Stationarity tests
        adf_real = adfuller(self.y_real)
        adf_synth = adfuller(self.y_synth)
        self.results['adf_real_pval'] = adf_real[1]
        self.results['adf_synth_pval'] = adf_synth[1]
        
    def _evaluate_spectral_properties(self):
        """Test frequency domain properties"""
        print("  3. Spectral analysis...")
        
        # Power spectral density
        freq_real, psd_real = periodogram(self.y_real)
        freq_synth, psd_synth = periodogram(self.y_synth)
        
        # Normalize PSDs
        psd_real_norm = psd_real / psd_real.sum()
        psd_synth_norm = psd_synth / psd_synth.sum()
        
        # KL divergence in frequency domain
        self.results['spectral_kl'] = entropy(psd_real_norm, psd_synth_norm)
        
        # Peak frequency comparison
        peaks_real, _ = find_peaks(psd_real, height=np.percentile(psd_real, 90))
        peaks_synth, _ = find_peaks(psd_synth, height=np.percentile(psd_synth, 90))
        
        if len(peaks_real) > 0 and len(peaks_synth) > 0:
            self.results['peak_freq_diff'] = np.abs(freq_real[peaks_real[0]] - freq_synth[peaks_synth[0]])
        else:
            self.results['peak_freq_diff'] = np.nan
            
    def _evaluate_extremes(self):
        """Test extreme value properties"""
        print("  4. Extreme value analysis...")
        
        # Define thresholds
        thresholds = [90, 95, 99]
        
        for thresh in thresholds:
            # Upper tail
            upper_thresh_real = np.percentile(self.y_real, thresh)
            upper_thresh_synth = np.percentile(self.y_synth, thresh)
            
            exceed_rate_real = np.mean(self.y_real > upper_thresh_real)
            exceed_rate_synth = np.mean(self.y_synth > upper_thresh_synth)
            
            self.results[f'exceed_rate_{thresh}_diff'] = np.abs(exceed_rate_real - exceed_rate_synth)
            
            # Lower tail
            lower_thresh_real = np.percentile(self.y_real, 100 - thresh)
            lower_rate_real = np.mean(self.y_real < lower_thresh_real)
            lower_rate_synth = np.mean(self.y_synth < np.percentile(self.y_synth, 100 - thresh))
            
            self.results[f'lower_exceed_rate_{thresh}_diff'] = np.abs(lower_rate_real - lower_rate_synth)
        
        # Extreme value clustering
        extreme_indices_real = np.where(np.abs(self.y_real - np.mean(self.y_real)) > 2 * np.std(self.y_real))[0]
        extreme_indices_synth = np.where(np.abs(self.y_synth - np.mean(self.y_synth)) > 2 * np.std(self.y_synth))[0]
        
        if len(extreme_indices_real) > 1 and len(extreme_indices_synth) > 1:
            cluster_real = np.mean(np.diff(extreme_indices_real))
            cluster_synth = np.mean(np.diff(extreme_indices_synth))
            self.results['extreme_clustering_diff'] = np.abs(cluster_real - cluster_synth)
        else:
            self.results['extreme_clustering_diff'] = np.nan
            
    def _evaluate_nonlinear_dynamics(self):
        """Test non-linear properties"""
        print("  5. Non-linear dynamics...")
        
        # Hurst exponent (long-range dependence)
        if nolds is not None:
            try:
                H_real = nolds.hurst_rs(self.y_real)
                H_synth = nolds.hurst_rs(self.y_synth)
                self.results['hurst_diff'] = np.abs(H_real - H_synth)
            except:
                self.results['hurst_diff'] = np.nan
        else:
            self.results['hurst_diff'] = np.nan
        
        # Sample entropy (complexity)
        if nolds is not None:
            try:
                samp_ent_real = nolds.sampen(self.y_real[:500])  # Use subset for speed
                samp_ent_synth = nolds.sampen(self.y_synth[:500])
                self.results['sample_entropy_diff'] = np.abs(samp_ent_real - samp_ent_synth)
            except:
                self.results['sample_entropy_diff'] = np.nan
        else:
            self.results['sample_entropy_diff'] = np.nan
        
        # Mutual information (non-linear correlation)
        lag = 1
        if PYINFORM_AVAILABLE:
            mi_real = mutual_info.mutual_info(self.y_real[:-lag], self.y_real[lag:])
            mi_synth = mutual_info.mutual_info(self.y_synth[:-lag], self.y_synth[lag:])
        else:
            # Alternative mutual information calculation using sklearn
            from sklearn.feature_selection import mutual_info_regression
            
            # Discretize for mutual information
            n_bins = 10
            real_disc = pd.cut(self.y_real[:-lag], bins=n_bins, labels=False)
            real_disc_lag = pd.cut(self.y_real[lag:], bins=n_bins, labels=False)
            synth_disc = pd.cut(self.y_synth[:-lag], bins=n_bins, labels=False)
            synth_disc_lag = pd.cut(self.y_synth[lag:], bins=n_bins, labels=False)
            
            # Calculate mutual information
            mi_real = mutual_info_regression(real_disc.reshape(-1, 1), real_disc_lag, discrete_features=True)[0]
            mi_synth = mutual_info_regression(synth_disc.reshape(-1, 1), synth_disc_lag, discrete_features=True)[0]
            
        self.results['mutual_info_diff'] = np.abs(mi_real - mi_synth)
        
    def _evaluate_predictive_utility(self):
        """Test predictive performance"""
        print("  6. Predictive utility tests...")
        
        # Prepare data for time series prediction
        window = 24  # Look-back window
        
        def prepare_ts_data(data):
            X, y = [], []
            for i in range(window, len(data)):
                X.append(data[i-window:i])
                y.append(data[i])
            return np.array(X), np.array(y)
        
        # TSTR: Train on Synthetic, Test on Real
        X_synth, y_synth = prepare_ts_data(self.y_synth)
        X_real, y_real = prepare_ts_data(self.y_real)
        
        # Simple models
        models = {
            'ridge': Ridge(alpha=1.0),
            'rf': RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42)
        }
        
        for name, model in models.items():
            if name == 'ridge':
                # Regression task
                model.fit(X_synth[:len(X_synth)//2], y_synth[:len(y_synth)//2])
                pred = model.predict(X_real[len(X_real)//2:])
                self.results[f'tstr_{name}_mse'] = mean_squared_error(y_real[len(y_real)//2:], pred)
            else:
                # Classification task (above/below median)
                y_synth_class = (y_synth > np.median(y_synth)).astype(int)
                y_real_class = (y_real > np.median(y_real)).astype(int)
                
                model.fit(X_synth[:len(X_synth)//2], y_synth_class[:len(y_synth_class)//2])
                pred_proba = model.predict_proba(X_real[len(X_real)//2:])[:, 1]
                self.results[f'tstr_{name}_auc'] = roc_auc_score(y_real_class[len(y_real_class)//2:], pred_proba)
        
    def _evaluate_domain_constraints(self):
        """Test domain-specific properties"""
        print("  7. Domain constraint tests...")
        
        # Physical plausibility (temperature)
        self.results['min_temp_real'] = self.y_real.min()
        self.results['min_temp_synth'] = self.y_synth.min()
        self.results['max_temp_real'] = self.y_real.max()
        self.results['max_temp_synth'] = self.y_synth.max()
        
        # Check for unrealistic values
        self.results['synth_below_absolute_zero'] = np.sum(self.y_synth < -273.15)
        self.results['synth_above_boiling'] = np.sum(self.y_synth > 100)
        
        # Gradient constraints (rate of change)
        grad_real = np.abs(np.diff(self.y_real))
        grad_synth = np.abs(np.diff(self.y_synth))
        
        self.results['max_gradient_real'] = grad_real.max()
        self.results['max_gradient_synth'] = grad_synth.max()
        self.results['gradient_percentile_95_diff'] = np.abs(
            np.percentile(grad_real, 95) - np.percentile(grad_synth, 95)
        )
        
    def _evaluate_ml_discriminability(self):
        """Test if ML can distinguish real from synthetic"""
        print("  8. ML discriminability tests...")
        
        # Create windows
        window_size = 50
        stride = 25
        
        def create_windows(data, label):
            windows = []
            labels = []
            for i in range(0, len(data) - window_size, stride):
                windows.append(data[i:i+window_size])
                labels.append(label)
            return windows, labels
        
        # Create dataset
        real_windows, real_labels = create_windows(self.y_real, 0)
        synth_windows, synth_labels = create_windows(self.y_synth, 1)
        
        X = np.vstack([real_windows, synth_windows])
        y = np.hstack([real_labels, synth_labels])
        
        # Extract features
        features = []
        for window in X:
            feat = [
                np.mean(window),
                np.std(window),
                stats.skew(window),
                stats.kurtosis(window),
                np.percentile(window, 25),
                np.percentile(window, 75),
                len(find_peaks(window)[0])
            ]
            features.append(feat)
        
        X_features = np.array(features)
        
        # Train classifier
        X_train, X_test, y_train, y_test = train_test_split(
            X_features, y, test_size=0.3, random_state=42, stratify=y
        )
        
        rf = RandomForestClassifier(n_estimators=100, random_state=42)
        rf.fit(X_train, y_train)
        
        # Evaluate
        pred_proba = rf.predict_proba(X_test)[:, 1]
        self.results['discriminator_auc'] = roc_auc_score(y_test, pred_proba)
        
        # Feature importance
        importances = rf.feature_importances_
        feature_names = ['mean', 'std', 'skew', 'kurt', 'q25', 'q75', 'n_peaks']
        self.results['top_discriminative_features'] = sorted(
            zip(feature_names, importances), key=lambda x: x[1], reverse=True
        )[:3]

# Run evaluation
evaluator = ComprehensiveEvaluator(y_test, y_synth_test, y_train)
results = evaluator.evaluate_all()

In [None]:
# Cell 5: Bootstrap Confidence Intervals
class BootstrapEvaluator:
    def __init__(self, generator, data_handler, n_bootstrap=100):
        self.generator = generator
        self.data_handler = data_handler
        self.n_bootstrap = n_bootstrap
        
    def bootstrap_metrics(self, metric_func, **kwargs):
        """Bootstrap confidence intervals for any metric"""
        values = []
        
        for i in range(self.n_bootstrap):
            # Generate new synthetic data
            y_synth = self.generator.generate(len(self.data_handler.y))
            y_synth_test = y_synth[self.data_handler.train_end:]
            
            # Resample real data
            indices = np.random.choice(len(y_test), len(y_test), replace=True)
            y_real_boot = y_test[indices]
            
            # Calculate metric
            value = metric_func(y_real_boot, y_synth_test, **kwargs)
            values.append(value)
        
        values = np.array(values)
        return {
            'mean': np.mean(values),
            'std': np.std(values),
            'ci_lower': np.percentile(values, 2.5),
            'ci_upper': np.percentile(values, 97.5)
        }
    
    def run_bootstrap_evaluation(self):
        """Run bootstrap for key metrics"""
        print("Running bootstrap evaluation (this may take a while)...")
        
        # Define metrics
        metrics = {
            'ks_statistic': lambda r, s: ks_2samp(r, s)[0],
            'wasserstein': lambda r, s: wasserstein_distance(r, s),
            'mean_diff': lambda r, s: np.abs(np.mean(r) - np.mean(s)),
            'std_diff': lambda r, s: np.abs(np.std(r) - np.std(s))
        }
        
        bootstrap_results = {}
        for name, func in metrics.items():
            print(f"  Bootstrapping {name}...")
            bootstrap_results[name] = self.bootstrap_metrics(func)
        
        return bootstrap_results

# Run bootstrap evaluation
bootstrap_eval = BootstrapEvaluator(generator, data_handler, n_bootstrap=50)
bootstrap_results = bootstrap_eval.run_bootstrap_evaluation()

In [None]:
# Cell 6: Visualization Suite
class ComprehensiveVisualizer:
    def __init__(self, y_real, y_synth, results, bootstrap_results=None):
        self.y_real = y_real
        self.y_synth = y_synth
        self.results = results
        self.bootstrap_results = bootstrap_results
        
    def plot_all(self):
        """Create comprehensive visualization"""
        fig = plt.figure(figsize=(20, 24))
        gs = GridSpec(8, 3, figure=fig, hspace=0.3, wspace=0.3)
        
        # 1. Time series comparison
        ax1 = fig.add_subplot(gs[0, :])
        self._plot_time_series(ax1)
        
        # 2. Distribution comparison
        ax2 = fig.add_subplot(gs[1, 0])
        self._plot_distributions(ax2)
        
        # 3. Q-Q plot
        ax3 = fig.add_subplot(gs[1, 1])
        self._plot_qq(ax3)
        
        # 4. ACF comparison
        ax4 = fig.add_subplot(gs[1, 2])
        self._plot_acf(ax4)
        
        # 5. Spectral analysis
        ax5 = fig.add_subplot(gs[2, :2])
        self._plot_spectral(ax5)
        
        # 6. Return distribution
        ax6 = fig.add_subplot(gs[2, 2])
        self._plot_returns(ax6)
        
        # 7. Extreme value analysis
        ax7 = fig.add_subplot(gs[3, :])
        self._plot_extremes(ax7)
        
        # 8. Rolling statistics
        ax8 = fig.add_subplot(gs[4, :])
        self._plot_rolling_stats(ax8)
        
        # 9. Metric summary
        ax9 = fig.add_subplot(gs[5:7, :])
        self._plot_metric_summary(ax9)
        
        # 10. Bootstrap results
        if self.bootstrap_results:
            ax10 = fig.add_subplot(gs[7, :])
            self._plot_bootstrap_results(ax10)
        
        plt.suptitle('Comprehensive Synthetic vs Real Data Evaluation', fontsize=16, y=0.995)
        return fig
    
    def _plot_time_series(self, ax):
        """Plot time series comparison"""
        time = np.arange(len(self.y_real))
        ax.plot(time, self.y_real, 'b-', alpha=0.7, linewidth=0.5, label='Real')
        ax.plot(time, self.y_synth, 'r-', alpha=0.7, linewidth=0.5, label='Synthetic')
        ax.set_xlabel('Time')
        ax.set_ylabel('Value')
        ax.set_title('Time Series Comparison')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
    def _plot_distributions(self, ax):
        """Plot distribution comparison"""
        ax.hist(self.y_real, bins=50, alpha=0.5, density=True, label='Real', color='blue')
        ax.hist(self.y_synth, bins=50, alpha=0.5, density=True, label='Synthetic', color='red')
        ax.set_xlabel('Value')
        ax.set_ylabel('Density')
        ax.set_title('Distribution Comparison')
        ax.legend()
        
        # Add KS statistic
        ks_stat = self.results.get('ks_statistic', np.nan)
        ax.text(0.05, 0.95, f'KS stat: {ks_stat:.3f}', transform=ax.transAxes, 
                verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
        
    def _plot_qq(self, ax):
        """Q-Q plot"""
        stats.probplot(self.y_real, dist="norm", plot=ax)
        ax.get_lines()[0].set_color('blue')
        ax.get_lines()[0].set_label('Real')
        
        # Clear and replot for synthetic
        stats.probplot(self.y_synth, dist="norm", plot=ax)
        ax.get_lines()[2].set_color('red')
        ax.get_lines()[2].set_label('Synthetic')
        
        ax.set_title('Q-Q Plot Comparison')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
    def _plot_acf(self, ax):
        """Plot ACF comparison"""
        lags = range(31)
        acf_real = acf(self.y_real, nlags=30, fft=True)
        acf_synth = acf(self.y_synth, nlags=30, fft=True)
        
        ax.plot(lags, acf_real, 'b-', marker='o', markersize=4, label='Real')
        ax.plot(lags, acf_synth, 'r-', marker='s', markersize=4, label='Synthetic')
        ax.axhline(y=0, color='k', linestyle='-', linewidth=0.5)
        ax.set_xlabel('Lag')
        ax.set_ylabel('ACF')
        ax.set_title('Autocorrelation Function Comparison')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
    def _plot_spectral(self, ax):
        """Plot spectral analysis"""
        freq_real, psd_real = periodogram(self.y_real)
        freq_synth, psd_synth = periodogram(self.y_synth)
        
        ax.semilogy(freq_real, psd_real, 'b-', alpha=0.7, label='Real')
        ax.semilogy(freq_synth, psd_synth, 'r-', alpha=0.7, label='Synthetic')
        ax.set_xlabel('Frequency')
        ax.set_ylabel('Power Spectral Density')
        ax.set_title('Spectral Analysis')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        # Add spectral KL
        spectral_kl = self.results.get('spectral_kl', np.nan)
        ax.text(0.95, 0.95, f'Spectral KL: {spectral_kl:.3f}', transform=ax.transAxes, 
                horizontalalignment='right', verticalalignment='top', 
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
        
    def _plot_returns(self, ax):
        """Plot return distribution"""
        returns_real = np.diff(self.y_real)
        returns_synth = np.diff(self.y_synth)
        
        ax.hist(returns_real, bins=50, alpha=0.5, density=True, label='Real', color='blue')
        ax.hist(returns_synth, bins=50, alpha=0.5, density=True, label='Synthetic', color='red')
        ax.set_xlabel('Returns')
        ax.set_ylabel('Density')
        ax.set_title('Return Distribution')
        ax.legend()
        
    def _plot_extremes(self, ax):
        """Plot extreme value analysis"""
        time = np.arange(len(self.y_real))
        
        # Define threshold
        threshold_real = np.percentile(np.abs(self.y_real - np.mean(self.y_real)), 95)
        threshold_synth = np.percentile(np.abs(self.y_synth - np.mean(self.y_synth)), 95)
        
        # Plot with extremes highlighted
        ax.plot(time, self.y_real, 'b-', alpha=0.3, linewidth=0.5, label='Real')
        ax.plot(time, self.y_synth, 'r-', alpha=0.3, linewidth=0.5, label='Synthetic')
        
        # Highlight extremes
        extreme_real = np.abs(self.y_real - np.mean(self.y_real)) > threshold_real
        extreme_synth = np.abs(self.y_synth - np.mean(self.y_synth)) > threshold_synth
        
        ax.scatter(time[extreme_real], self.y_real[extreme_real], c='blue', s=20, alpha=0.8, label='Real extremes')
        ax.scatter(time[extreme_synth], self.y_synth[extreme_synth], c='red', s=20, alpha=0.8, label='Synthetic extremes')
        
        ax.set_xlabel('Time')
        ax.set_ylabel('Value')
        ax.set_title('Extreme Value Analysis')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
    def _plot_rolling_stats(self, ax):
        """Plot rolling statistics"""
        window = 50
        
        # Calculate rolling statistics
        real_series = pd.Series(self.y_real)
        synth_series = pd.Series(self.y_synth)
        
        roll_mean_real = real_series.rolling(window).mean()
        roll_std_real = real_series.rolling(window).std()
        roll_mean_synth = synth_series.rolling(window).mean()
        roll_std_synth = synth_series.rolling(window).std()
        
        time = np.arange(len(self.y_real))
        
        # Plot mean ± std
        ax.plot(time, roll_mean_real, 'b-', label='Real mean')
        ax.fill_between(time, roll_mean_real - roll_std_real, roll_mean_real + roll_std_real, 
                        alpha=0.2, color='blue')
        
        ax.plot(time, roll_mean_synth, 'r-', label='Synthetic mean')
        ax.fill_between(time, roll_mean_synth - roll_std_synth, roll_mean_synth + roll_std_synth, 
                        alpha=0.2, color='red')
        
        ax.set_xlabel('Time')
        ax.set_ylabel('Value')
        ax.set_title(f'Rolling Statistics (window={window})')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
    def _plot_metric_summary(self, ax):
        """Plot metric summary table"""
        ax.axis('tight')
        ax.axis('off')
        
        # Prepare data for table
        metrics_data = []
        
        # Distributional metrics
        metrics_data.append(['Distributional Tests', '', ''])
        metrics_data.append(['KS Statistic', f"{self.results.get('ks_statistic', np.nan):.4f}", 
                           f"p={self.results.get('ks_pvalue', np.nan):.4f}"])
        metrics_data.append(['Wasserstein Distance', f"{self.results.get('wasserstein_distance', np.nan):.4f}", ''])
        metrics_data.append(['Mean Difference', f"{self.results.get('mean_diff', np.nan):.4f}", ''])
        metrics_data.append(['Std Difference', f"{self.results.get('std_diff', np.nan):.4f}", ''])
        
        # Temporal metrics
        metrics_data.append(['', '', ''])
        metrics_data.append(['Temporal Structure', '', ''])
        metrics_data.append(['Weighted ACF L2', f"{self.results.get('weighted_acf_l2', np.nan):.4f}", ''])
        metrics_data.append(['Spectral KL', f"{self.results.get('spectral_kl', np.nan):.4f}", ''])
        
        # ML metrics
        metrics_data.append(['', '', ''])
        metrics_data.append(['ML Discriminability', '', ''])
        metrics_data.append(['Discriminator AUC', f"{self.results.get('discriminator_auc', np.nan):.4f}", 
                           '0.5 = indistinguishable'])
        
        # Extreme value metrics
        metrics_data.append(['', '', ''])
        metrics_data.append(['Extreme Values', '', ''])
        metrics_data.append(['95% Exceed Rate Diff', f"{self.results.get('exceed_rate_95_diff', np.nan):.4f}", ''])
        
        # Non-linear dynamics
        metrics_data.append(['', '', ''])
        metrics_data.append(['Non-linear Dynamics', '', ''])
        metrics_data.append(['Hurst Exponent Diff', f"{self.results.get('hurst_diff', np.nan):.4f}", ''])
        
        # Create table
        table = ax.table(cellText=metrics_data, 
                        colLabels=['Metric', 'Value', 'Notes'],
                        cellLoc='left',
                        loc='center',
                        colWidths=[0.4, 0.3, 0.3])
        
        table.auto_set_font_size(False)
        table.set_fontsize(9)
        table.scale(1.2, 1.5)
        
        # Style the table
        for i in range(len(metrics_data)):
            for j in range(3):
                cell = table[(i+1, j)]
                if metrics_data[i][0] in ['Distributional Tests', 'Temporal Structure', 
                                         'ML Discriminability', 'Extreme Values', 'Non-linear Dynamics']:
                    cell.set_text_props(weight='bold')
                    cell.set_facecolor('#E8E8E8')
                    
        ax.set_title('Comprehensive Metric Summary', pad=20, fontsize=12, weight='bold')
        
    def _plot_bootstrap_results(self, ax):
        """Plot bootstrap confidence intervals"""
        if not self.bootstrap_results:
            return
            
        ax.axis('off')
        
        # Prepare data
        metrics = list(self.bootstrap_results.keys())
        means = [self.bootstrap_results[m]['mean'] for m in metrics]
        ci_lower = [self.bootstrap_results[m]['ci_lower'] for m in metrics]
        ci_upper = [self.bootstrap_results[m]['ci_upper'] for m in metrics]
        
        # Create horizontal bar plot
        y_pos = np.arange(len(metrics))
        
        # Plot confidence intervals
        for i, (m, lower, upper) in enumerate(zip(means, ci_lower, ci_upper)):
            ax.plot([lower, upper], [i, i], 'b-', linewidth=2)
            ax.plot(m, i, 'ro', markersize=8)
            
        ax.set_yticks(y_pos)
        ax.set_yticklabels(metrics)
        ax.set_xlabel('Value')
        ax.set_title('Bootstrap Confidence Intervals (95%)', fontsize=12, weight='bold')
        ax.grid(True, axis='x', alpha=0.3)
        ax.set_xlim(min(ci_lower) * 0.9, max(ci_upper) * 1.1)

# Create visualizations
visualizer = ComprehensiveVisualizer(y_test, y_synth_test, results, bootstrap_results)
fig = visualizer.plot_all()
plt.tight_layout()
plt.show()

In [None]:
# Cell 7: Advanced Model Comparison
class ModelComparisonFramework:
    def __init__(self, y_train, time_train):
        self.y_train = y_train
        self.time_train = time_train
        self.models = {}
        
    def add_model(self, name, generator_class):
        """Add a model to compare"""
        self.models[name] = generator_class(self.y_train, self.time_train)
        
    def compare_all(self, y_test, n_samples=1000):
        """Compare all models"""
        results = {}
        
        for name, model in self.models.items():
            print(f"\nEvaluating {name}...")
            model.fit()
            y_synth = model.generate(n_samples)
            y_synth_test = y_synth[len(self.y_train):]
            
            # Quick evaluation
            evaluator = ComprehensiveEvaluator(y_test, y_synth_test)
            evaluator._evaluate_distributions()
            evaluator._evaluate_temporal_structure()
            
            results[name] = {
                'ks_statistic': evaluator.results['ks_statistic'],
                'wasserstein_distance': evaluator.results['wasserstein_distance'],
                'weighted_acf_l2': evaluator.results['weighted_acf_l2']
            }
            
        return pd.DataFrame(results).T

# Example: Compare with simpler baseline
class SimpleARModel:
    def __init__(self, y_train, time_train):
        self.y_train = y_train
        self.time_train = time_train
        
    def fit(self):
        from statsmodels.tsa.ar_model import AutoReg
        self.model = AutoReg(self.y_train, lags=5)
        self.fitted = self.model.fit()
        
    def generate(self, n_samples):
        # Simple generation
        forecast = self.fitted.forecast(n_samples - len(self.y_train))
        return np.concatenate([self.y_train, forecast])

# Run comparison
comparison = ModelComparisonFramework(y_train, time_train)
comparison.add_model('Advanced (STL+GAM+GARCH)', AdvancedSyntheticGenerator)
comparison.add_model('Simple AR(5)', SimpleARModel)

comparison_results = comparison.compare_all(y_test)
print("\nModel Comparison Results:")
print(comparison_results)

In [None]:
# Cell 8: Final Report Generation
class SyntheticDataReport:
    def __init__(self, results, bootstrap_results, comparison_results=None):
        self.results = results
        self.bootstrap_results = bootstrap_results
        self.comparison_results = comparison_results
        
    def generate_report(self):
        """Generate comprehensive evaluation report"""
        report = []
        
        report.append("# Synthetic Data Generation Evaluation Report\n")
        report.append(f"Generated on: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        
        # Executive Summary
        report.append("## Executive Summary\n")
        
        # Key metrics
        ks_stat = self.results.get('ks_statistic', np.nan)
        ks_pval = self.results.get('ks_pvalue', np.nan)
        disc_auc = self.results.get('discriminator_auc', np.nan)
        
        if ks_pval > 0.05:
            dist_verdict = "PASS ✓"
        else:
            dist_verdict = "FAIL ✗"
            
        if 0.45 <= disc_auc <= 0.55:
            disc_verdict = "EXCELLENT ✓"
        elif 0.4 <= disc_auc <= 0.6:
            disc_verdict = "GOOD ✓"
        else:
            disc_verdict = "POOR ✗"
            
        report.append(f"- **Distribution Match**: {dist_verdict} (KS p-value: {ks_pval:.3f})\n")
        report.append(f"- **ML Discriminability**: {disc_verdict} (AUC: {disc_auc:.3f})\n")
        
        # Detailed Results
        report.append("\n## Detailed Results\n")
        
        # 1. Distributional Properties
        report.append("\n### 1. Distributional Properties\n")
        report.append(f"- KS Statistic: {ks_stat:.4f} (p-value: {ks_pval:.4f})\n")
        report.append(f"- Wasserstein Distance: {self.results.get('wasserstein_distance', np.nan):.4f}\n")
        report.append(f"- Mean Difference: {self.results.get('mean_diff', np.nan):.4f}\n")
        report.append(f"- Std Difference: {self.results.get('std_diff', np.nan):.4f}\n")
        
        # 2. Temporal Structure
        report.append("\n### 2. Temporal Structure\n")
        report.append(f"- Weighted ACF L2: {self.results.get('weighted_acf_l2', np.nan):.4f}\n")
        report.append(f"- ADF test p-values - Real: {self.results.get('adf_real_pval', np.nan):.4f}, "
                     f"Synthetic: {self.results.get('adf_synth_pval', np.nan):.4f}\n")
        
        # 3. Extreme Values
        report.append("\n### 3. Extreme Value Analysis\n")
        report.append(f"- 95% Exceedance Rate Diff: {self.results.get('exceed_rate_95_diff', np.nan):.4f}\n")
        report.append(f"- Extreme Clustering Diff: {self.results.get('extreme_clustering_diff', np.nan):.4f}\n")
        
        # 4. Non-linear Dynamics
        report.append("\n### 4. Non-linear Dynamics\n")
        report.append(f"- Hurst Exponent Diff: {self.results.get('hurst_diff', np.nan):.4f}\n")
        report.append(f"- Sample Entropy Diff: {self.results.get('sample_entropy_diff', np.nan):.4f}\n")
        
        # 5. Domain Constraints
        report.append("\n### 5. Domain Constraints\n")
        report.append(f"- Temperature Range - Real: [{self.results.get('min_temp_real', np.nan):.1f}, "
                     f"{self.results.get('max_temp_real', np.nan):.1f}]°C\n")
        report.append(f"- Temperature Range - Synthetic: [{self.results.get('min_temp_synth', np.nan):.1f}, "
                     f"{self.results.get('max_temp_synth', np.nan):.1f}]°C\n")
        report.append(f"- Violations: {self.results.get('synth_below_absolute_zero', 0)} below absolute zero, "
                     f"{self.results.get('synth_above_boiling', 0)} above boiling\n")
        
        # Bootstrap Results
        if self.bootstrap_results:
            report.append("\n### 6. Bootstrap Confidence Intervals\n")
            for metric, values in self.bootstrap_results.items():
                report.append(f"- {metric}: {values['mean']:.4f} "
                            f"(95% CI: [{values['ci_lower']:.4f}, {values['ci_upper']:.4f}])\n")
        
        # Model Comparison
        if self.comparison_results is not None:
            report.append("\n### 7. Model Comparison\n")
            report.append(self.comparison_results.to_string())
            report.append("\n")
        
        # Recommendations
        report.append("\n## Recommendations\n")
        
        if ks_pval < 0.05:
            report.append("- ⚠️ Distribution match is poor. Consider:\n")
            report.append("  - Adding mixture components\n")
            report.append("  - Using kernel density estimation\n")
            report.append("  - Incorporating quantile matching\n")
            
        if disc_auc > 0.6:
            report.append("- ⚠️ Synthetic data is too easily distinguishable. Consider:\n")
            report.append("  - Adding more complex temporal dependencies\n")
            report.append("  - Improving extreme value modeling\n")
            report.append("  - Using adversarial training\n")
            
        if self.results.get('exceed_rate_95_diff', 0) > 0.01:
            report.append("- ⚠️ Extreme value behavior differs significantly. Consider:\n")
            report.append("  - Using EVT (Extreme Value Theory) models\n")
            report.append("  - Separate modeling of tail behavior\n")
            
        return '\n'.join(report)

# Generate report
report_generator = SyntheticDataReport(results, bootstrap_results, comparison_results)
report = report_generator.generate_report()

# Display report
display(Markdown(report))

# Save report
with open('synthetic_data_evaluation_report.md', 'w') as f:
    f.write(report)
print("\nReport saved to 'synthetic_data_evaluation_report.md'")

In [None]:
# Cell 9: Interactive Dashboard (Optional)
try:
    import ipywidgets as widgets
    from IPython.display import display
    
    class InteractiveDashboard:
        def __init__(self, generator, data_handler, evaluator):
            self.generator = generator
            self.data_handler = data_handler
            self.evaluator = evaluator
            
        def create_dashboard(self):
            """Create interactive dashboard for parameter tuning"""
            
            # Controls
            n_samples_slider = widgets.IntSlider(
                value=1000, min=100, max=5000, step=100,
                description='N Samples:'
            )
            
            include_extremes = widgets.Checkbox(
                value=True,
                description='Include Extremes'
            )
            
            metric_dropdown = widgets.Dropdown(
                options=['KS Statistic', 'Wasserstein Distance', 'ACF L2'],
                value='KS Statistic',
                description='Metric:'
            )
            
            generate_button = widgets.Button(
                description='Generate & Evaluate',
                button_style='primary'
            )
            
            output = widgets.Output()
            
            def on_generate_click(b):
                with output:
                    output.clear_output()
                    
                    # Generate
                    y_synth = self.generator.generate(
                        n_samples_slider.value, 
                        include_extremes=include_extremes.value
                    )
                    
                    # Evaluate
                    y_synth_test = y_synth[self.data_handler.train_end:]
                    quick_eval = ComprehensiveEvaluator(
                        self.data_handler.y_test, 
                        y_synth_test
                    )
                    quick_eval._evaluate_distributions()
                    
                    # Display
                    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
                    
                    # Time series
                    ax1.plot(self.data_handler.y_test[:200], 'b-', alpha=0.7, label='Real')
                    ax1.plot(y_synth_test[:200], 'r-', alpha=0.7, label='Synthetic')
                    ax1.set_title('Time Series Sample')
                    ax1.legend()
                    
                    # Distribution
                    ax2.hist(self.data_handler.y_test, bins=50, alpha=0.5, density=True, label='Real')
                    ax2.hist(y_synth_test, bins=50, alpha=0.5, density=True, label='Synthetic')
                    ax2.set_title('Distribution')
                    ax2.legend()
                    
                    plt.tight_layout()
                    plt.show()
                    
                    # Show metric
                    if metric_dropdown.value == 'KS Statistic':
                        value = quick_eval.results['ks_statistic']
                    elif metric_dropdown.value == 'Wasserstein Distance':
                        value = quick_eval.results['wasserstein_distance']
                    else:
                        value = quick_eval.results['weighted_acf_l2']
                        
                    print(f"\n{metric_dropdown.value}: {value:.4f}")
            
            generate_button.on_click(on_generate_click)
            
            # Layout
            controls = widgets.VBox([
                widgets.HTML("<h3>Synthetic Data Generator Dashboard</h3>"),
                n_samples_slider,
                include_extremes,
                metric_dropdown,
                generate_button
            ])
            
            return widgets.VBox([controls, output])
    
    # Create dashboard
    dashboard = InteractiveDashboard(generator, data_handler, evaluator)
    display(dashboard.create_dashboard())
    
except ImportError:
    print("ipywidgets not available. Skipping interactive dashboard.")

In [None]:
# Cell 10: Save and Load Models
import pickle
import json

class ModelPersistence:
    @staticmethod
    def save_generator(generator, filepath):
        """Save generator to file"""
        with open(filepath, 'wb') as f:
            pickle.dump(generator, f)
        print(f"Generator saved to {filepath}")
    
    @staticmethod
    def load_generator(filepath):
        """Load generator from file"""
        with open(filepath, 'rb') as f:
            generator = pickle.load(f)
        print(f"Generator loaded from {filepath}")
        return generator
    
    @staticmethod
    def save_results(results, bootstrap_results, filepath):
        """Save evaluation results"""
        save_dict = {
            'results': results,
            'bootstrap_results': bootstrap_results,
            'timestamp': pd.Timestamp.now().isoformat()
        }
        
        # Convert numpy values to native Python types
        def convert_types(obj):
            if isinstance(obj, np.integer):
                return int(obj)
            elif isinstance(obj, np.floating):
                return float(obj)
            elif isinstance(obj, np.ndarray):
                return obj.tolist()
            elif isinstance(obj, dict):
                return {k: convert_types(v) for k, v in obj.items()}
            elif isinstance(obj, list):
                return [convert_types(v) for v in obj]
            return obj
        
        save_dict = convert_types(save_dict)
        
        with open(filepath, 'w') as f:
            json.dump(save_dict, f, indent=2)
        print(f"Results saved to {filepath}")

# Save models and results
ModelPersistence.save_generator(generator, 'synthetic_generator.pkl')
ModelPersistence.save_results(results, bootstrap_results, 'evaluation_results.json')

print("\n✅ Comprehensive evaluation complete!")
print("Files saved:")
print("- synthetic_generator.pkl")
print("- evaluation_results.json")
print("- synthetic_data_evaluation_report.md")