In [None]:
%run "./00_setup_and_config"

# Generate Calibrated Synthetic Predictions - All Commodities

**Enhanced with comprehensive validation output saved to pickle files:**
- Point accuracy: Median prediction has target MAPE (aligned with forecast_agent)
- Distribution calibration: Prediction intervals properly calibrated
- Includes 100% accurate scenario (perfect foresight for testing)
- **NEW**: All validation details saved to pickle files for detailed review

**Accuracy levels:**
- 100% accurate: MAPE = 0%, MAE = 0 (all predictions exactly match actuals)
- 90% accurate: MAPE = 10%
- 80% accurate: MAPE = 20%
- 70% accurate: MAPE = 30%
- 60% accurate: MAPE = 40%

In [None]:
import pandas as pd
import numpy as np
import os
import gc
import time
import pickle
from datetime import datetime
from builtins import min as builtin_min, max as builtin_max

In [None]:
# Configuration
SYNTHETIC_START_DATE = '2022-01-01'
ACCURACY_LEVELS = [1.00, 0.90, 0.80, 0.70, 0.60]  # 100%, 90%, 80%, 70%, 60%
VALIDATION_OUTPUT_FILE = 'validation_results_full.pkl'

print(f"Synthetic prediction configuration:")
print(f"  Synthetic start date: {SYNTHETIC_START_DATE}")
print(f"  Accuracy levels: {[f'{a:.0%}' for a in ACCURACY_LEVELS]}")
print(f"  Validation output: {VALIDATION_OUTPUT_FILE}")
print(f"\nAccuracy definition (aligned with forecast_agent):")
print(f"  - Point forecast: Median has target MAPE")
print(f"  - Distribution: Calibrated prediction intervals")
print(f"  - Validation: MAE, MAPE, Directional Accuracy, CRPS (saved to pickle)")
print(f"  - 100% accurate: Perfect foresight (MAPE = 0%, MAE = 0)")

## Load Market Data

In [None]:
MARKET_TABLE = "commodity.bronze.market"
print(f"\nLoading price data from {MARKET_TABLE}...")

market_df = spark.table(MARKET_TABLE).toPandas()
market_df['date'] = pd.to_datetime(market_df['date'])

print(f"✓ Loaded market price data (FULL HISTORY)")
commodity_counts = market_df.groupby('commodity').size()
print(f"Available commodities:")
for commodity, count in commodity_counts.items():
    print(f"  - {commodity}: {count} rows")
print(f"\nDate range: {market_df['date'].min()} to {market_df['date'].max()}")

## Calibrated Prediction Generation

In [None]:
def generate_calibrated_predictions(prices_df, model_version, target_accuracy=0.90, 
                                    n_runs=2000, n_horizons=14, chunk_size=20):
    """
    Generate calibrated synthetic predictions.
    """
    n_dates = len(prices_df) - n_horizons
    target_mape = 1.0 - target_accuracy
    
    print(f"    Target MAPE: {target_mape:.1%}")
    print(f"    Calibration: 80% interval should contain actual ~80% of time")
    
    all_chunks = []
    
    for chunk_start in range(0, n_dates, chunk_size):
        chunk_end = builtin_min(chunk_start + chunk_size, n_dates)
        chunk_records = []
        
        for i in range(chunk_start, chunk_end):
            current_date = prices_df.loc[i, 'date']
            future_prices = prices_df.loc[i+1:i+n_horizons, 'price'].values
            
            if target_accuracy == 1.0:
                predicted_prices_matrix = np.tile(future_prices, (n_runs, 1))
            else:
                sigma_lognormal = target_mape * np.sqrt(np.pi / 2)
                log_errors = np.random.normal(0, sigma_lognormal, (n_runs, n_horizons))
                multiplicative_errors = np.exp(log_errors)
                future_prices_matrix = np.tile(future_prices, (n_runs, 1))
                predicted_prices_matrix = future_prices_matrix * multiplicative_errors
                run_biases = np.random.normal(1.0, 0.02, (n_runs, 1))
                predicted_prices_matrix *= run_biases
            
            for run_id in range(1, n_runs + 1):
                for day_ahead in range(1, n_horizons + 1):
                    chunk_records.append({
                        'timestamp': current_date,
                        'run_id': run_id,
                        'day_ahead': day_ahead,
                        'predicted_price': predicted_prices_matrix[run_id-1, day_ahead-1],
                        'model_version': model_version
                    })
        
        chunk_df = pd.DataFrame(chunk_records)
        all_chunks.append(chunk_df)
        del chunk_records
        gc.collect()
        
        if chunk_end % 100 == 0 or chunk_end == n_dates:
            print(f"    Progress: {chunk_end}/{n_dates} dates...")
    
    final_df = pd.concat(all_chunks, ignore_index=True)
    del all_chunks
    gc.collect()
    
    return final_df

## Validation Functions (forecast_agent-aligned)

In [None]:
def calculate_crps(actuals: np.ndarray, forecast_paths: np.ndarray) -> float:
    """Calculate CRPS (aligned with forecast_agent)"""
    n_paths, horizon = forecast_paths.shape
    crps_values = []
    
    for t in range(horizon):
        if np.isnan(actuals[t]):
            continue
        
        actual = actuals[t]
        forecast_samples = forecast_paths[:, t]
        sorted_samples = np.sort(forecast_samples)
        
        term1 = np.mean(np.abs(sorted_samples - actual))
        n = len(sorted_samples)
        indices = np.arange(1, n + 1)
        term2 = np.sum((2 * indices - 1) * sorted_samples) / (n ** 2) - np.mean(sorted_samples)
        
        crps = term1 - 0.5 * term2
        crps_values.append(crps)
    
    return crps_values  # Return list for detailed analysis


def calculate_directional_accuracy(actuals: pd.Series, forecasts: pd.Series) -> dict:
    """Calculate directional accuracy (aligned with forecast_agent)"""
    metrics = {}
    
    if len(actuals) > 1:
        actual_direction = np.sign(actuals.diff().dropna())
        forecast_direction = np.sign(forecasts.diff().dropna())
        correct_direction = (actual_direction == forecast_direction).sum()
        metrics['directional_accuracy'] = float(correct_direction / len(actual_direction) * 100)
    
    if len(actuals) > 1:
        day_0_actual = actuals.iloc[0]
        day_0_forecast = forecasts.iloc[0]
        
        correct_from_day0 = 0
        total_from_day0 = 0
        
        for i in range(1, len(actuals)):
            actual_higher = actuals.iloc[i] > day_0_actual
            forecast_higher = forecasts.iloc[i] > day_0_forecast
            
            if actual_higher == forecast_higher:
                correct_from_day0 += 1
            total_from_day0 += 1
        
        if total_from_day0 > 0:
            metrics['directional_accuracy_from_day0'] = float(correct_from_day0 / total_from_day0 * 100)
    
    return metrics

In [None]:
def validate_predictions(predictions_df, prices_df, commodity, model_version, target_accuracy, n_horizons=14):
    """
    Comprehensive validation with ALL details saved for review.
    Returns complete validation data structure.
    """
    print(f"\n  Validating predictions (forecast_agent-aligned metrics)...")
    
    # Compute medians
    medians = predictions_df.groupby(['timestamp', 'day_ahead'])['predicted_price'].median().reset_index()
    medians.columns = ['timestamp', 'day_ahead', 'median_pred']
    
    prices_df = prices_df.copy()
    prices_df['date'] = pd.to_datetime(prices_df['date'])
    
    # Merge with actuals
    results = []
    for _, row in medians.iterrows():
        timestamp = row['timestamp']
        day_ahead = int(row['day_ahead'])
        median_pred = row['median_pred']
        
        future_date = timestamp + pd.Timedelta(days=day_ahead)
        actual_row = prices_df[prices_df['date'] == future_date]
        
        if len(actual_row) > 0:
            actual_price = actual_row['price'].values[0]
            ape = abs(median_pred - actual_price) / actual_price
            ae = abs(median_pred - actual_price)
            results.append({
                'timestamp': timestamp,
                'day_ahead': day_ahead,
                'median_pred': median_pred,
                'actual': actual_price,
                'ape': ape,
                'ae': ae
            })
    
    if len(results) == 0:
        print(f"    ⚠️  No matching actuals found")
        return None
    
    results_df = pd.DataFrame(results)
    target_mape = 1.0 - target_accuracy
    
    # Overall metrics
    overall_mae = results_df['ae'].mean()
    overall_mape = results_df['ape'].mean()
    median_ape = results_df['ape'].median()
    pct90_ape = results_df['ape'].quantile(0.9)
    
    print(f"\n    Overall: MAE=${overall_mae:.2f}, MAPE={overall_mape:.1%} (target: {target_mape:.1%})")
    
    # Per-horizon metrics
    per_horizon = results_df.groupby('day_ahead').agg({
        'ae': ['mean', 'std', 'min', 'max'],
        'ape': ['mean', 'std', 'min', 'max'],
        'timestamp': 'count'
    })
    per_horizon.columns = ['mae_mean', 'mae_std', 'mae_min', 'mae_max', 
                          'mape_mean', 'mape_std', 'mape_min', 'mape_max', 'n_samples']
    
    print(f"\n    Per-Horizon Summary:")
    for horizon in sorted(per_horizon.index):
        h_mape = per_horizon.loc[horizon, 'mape_mean']
        status = '✓' if h_mape <= target_mape * 1.15 else '⚠️'
        print(f"      Day {horizon:2d}: MAPE={h_mape:5.1%} {status}")
    
    # Directional accuracy by timestamp
    timestamps = results_df['timestamp'].unique()
    directional_by_timestamp = []
    
    for ts in timestamps:
        ts_data = results_df[results_df['timestamp'] == ts].sort_values('day_ahead')
        if len(ts_data) >= 2:
            actuals_series = pd.Series(ts_data['actual'].values)
            forecasts_series = pd.Series(ts_data['median_pred'].values)
            
            dir_metrics = calculate_directional_accuracy(actuals_series, forecasts_series)
            dir_metrics['timestamp'] = ts
            directional_by_timestamp.append(dir_metrics)
    
    directional_df = pd.DataFrame(directional_by_timestamp)
    
    if len(directional_df) > 0:
        avg_dir = directional_df['directional_accuracy'].mean()
        avg_dir_day0 = directional_df['directional_accuracy_from_day0'].mean()
        print(f"\n    Directional: {avg_dir:.1f}% (day-to-day), {avg_dir_day0:.1f}% (from day 0)")
    
    # CRPS by timestamp (sample)
    crps_by_timestamp = []
    sample_timestamps = np.random.choice(timestamps, size=min(50, len(timestamps)), replace=False)
    
    for ts in sample_timestamps:
        ts_predictions = predictions_df[predictions_df['timestamp'] == ts]
        forecast_matrix = ts_predictions.pivot_table(
            index='run_id', columns='day_ahead', values='predicted_price'
        ).values
        
        ts_actuals = results_df[results_df['timestamp'] == ts].sort_values('day_ahead')['actual'].values
        
        if len(ts_actuals) == forecast_matrix.shape[1]:
            crps_values = calculate_crps(ts_actuals, forecast_matrix)
            if crps_values:
                crps_by_timestamp.append({
                    'timestamp': ts,
                    'crps_mean': np.mean(crps_values),
                    'crps_values': crps_values
                })
    
    crps_df = pd.DataFrame(crps_by_timestamp)
    if len(crps_df) > 0:
        print(f"    CRPS: ${crps_df['crps_mean'].mean():.2f}")
    
    # Coverage
    intervals = predictions_df.groupby(['timestamp', 'day_ahead'])['predicted_price'].agg(
        p10=lambda x: x.quantile(0.1),
        p90=lambda x: x.quantile(0.9),
        p05=lambda x: x.quantile(0.05),
        p95=lambda x: x.quantile(0.95)
    ).reset_index()
    
    validation = results_df.merge(intervals, on=['timestamp', 'day_ahead'])
    coverage_80 = ((validation['actual'] >= validation['p10']) & 
                   (validation['actual'] <= validation['p90'])).mean()
    coverage_90 = ((validation['actual'] >= validation['p05']) & 
                   (validation['actual'] <= validation['p95'])).mean()
    
    print(f"    Coverage: 80%={coverage_80:.1%}, 90%={coverage_90:.1%}")
    print(f"  ✓ Validation complete")
    
    # Return COMPLETE validation data
    return {
        'commodity': commodity,
        'model_version': model_version,
        'target_accuracy': target_accuracy,
        'target_mape': target_mape,
        'generation_timestamp': datetime.now(),
        
        # Overall metrics
        'overall_mae': float(overall_mae),
        'overall_mape': float(overall_mape),
        'median_ape': float(median_ape),
        'pct90_ape': float(pct90_ape),
        
        # Detailed data
        'results_df': results_df,  # Full predictions vs actuals
        'per_horizon_metrics': per_horizon,  # Per-horizon statistics
        'directional_by_timestamp': directional_df,  # Directional accuracy per forecast
        'crps_by_timestamp': crps_df,  # CRPS per forecast
        'intervals_df': validation,  # Prediction intervals with coverage
        
        # Summary stats
        'avg_directional_accuracy': float(directional_df['directional_accuracy'].mean()) if len(directional_df) > 0 else None,
        'avg_directional_accuracy_day0': float(directional_df['directional_accuracy_from_day0'].mean()) if len(directional_df) > 0 else None,
        'avg_crps': float(crps_df['crps_mean'].mean()) if len(crps_df) > 0 else None,
        'coverage_80': float(coverage_80),
        'coverage_90': float(coverage_90),
        'n_samples': len(results_df)
    }

## Process All Commodities

In [None]:
def process_single_commodity(commodity_name, prices_raw_pd, analysis_config, output_schema, 
                            accuracy_levels, synthetic_start_date):
    """
    Process single commodity and return validation data.
    """
    print(f"\n{'='*80}")
    print(f"PROCESSING: {commodity_name.upper()}")
    print(f"{'='*80}")
    
    # Prepare prices
    print(f"\nPreparing price data...")
    prices_full = prices_raw_pd[prices_raw_pd['commodity'].str.lower() == commodity_name.lower()].copy()
    prices_full['date'] = pd.to_datetime(prices_full['date'])
    prices_full['price'] = prices_full['close']
    prices_full = prices_full[['date', 'price']].sort_values('date').reset_index(drop=True)
    
    print(f"✓ Full history: {len(prices_full)} days")
    
    prices = prices_full[prices_full['date'] >= synthetic_start_date].copy().reset_index(drop=True)
    print(f"✓ Filtered to {len(prices)} days")
    
    # Generate predictions
    all_predictions = []
    validation_data = []
    
    for accuracy in accuracy_levels:
        model_version = f"synthetic_acc{int(accuracy*100)}"
        print(f"\n  {model_version}: {accuracy:.0%} accurate")
        
        start_time = time.time()
        predictions_df = generate_calibrated_predictions(
            prices, model_version, accuracy,
            analysis_config['prediction_runs'],
            analysis_config['forecast_horizon'], 20
        )
        
        elapsed = time.time() - start_time
        print(f"    ✓ Generated {len(predictions_df):,} rows in {elapsed:.1f}s")
        
        # Validate
        val_data = validate_predictions(
            predictions_df, prices, commodity_name, model_version, 
            accuracy, analysis_config['forecast_horizon']
        )
        
        if val_data:
            validation_data.append(val_data)
        
        all_predictions.append(predictions_df)
        del predictions_df
        gc.collect()
    
    # Combine and save
    print(f"\nCombining all accuracy levels...")
    combined = pd.concat(all_predictions, ignore_index=True)
    print(f"✓ Combined: {len(combined):,} rows")
    
    del all_predictions
    gc.collect()
    
    # Save to Delta
    predictions_table = f"{output_schema}.predictions_{commodity_name.lower()}"
    print(f"\nSaving to {predictions_table}...")
    spark.createDataFrame(combined).write.mode("overwrite").option("overwriteSchema", "true").saveAsTable(predictions_table)
    print(f"✓ Saved {spark.table(predictions_table).count():,} rows")
    
    del combined
    gc.collect()
    
    print(f"\n✓ {commodity_name.upper()} COMPLETE")
    
    return {
        'commodity': commodity_name,
        'table': predictions_table,
        'validation_data': validation_data
    }

In [None]:
# Process all commodities
all_results = []
all_validation_data = {}

for commodity_name in COMMODITY_CONFIGS.keys():
    try:
        result = process_single_commodity(
            commodity_name, market_df, ANALYSIS_CONFIG, OUTPUT_SCHEMA,
            ACCURACY_LEVELS, SYNTHETIC_START_DATE
        )
        
        all_results.append({
            'commodity': result['commodity'],
            'table': result['table']
        })
        
        # Store validation data by commodity
        all_validation_data[commodity_name] = result['validation_data']
        
    except Exception as e:
        print(f"\n❌ Error processing {commodity_name.upper()}: {e}")
        import traceback
        traceback.print_exc()

## Save Complete Validation Data

In [None]:
# Save ALL validation details to pickle
print(f"\n{'='*80}")
print(f"SAVING VALIDATION DATA")
print(f"{'='*80}")

validation_output = {
    'generation_timestamp': datetime.now(),
    'config': {
        'synthetic_start_date': SYNTHETIC_START_DATE,
        'accuracy_levels': ACCURACY_LEVELS,
        'prediction_runs': ANALYSIS_CONFIG['prediction_runs'],
        'forecast_horizon': ANALYSIS_CONFIG['forecast_horizon']
    },
    'commodities': all_validation_data,
    'summary': all_results
}

with open(VALIDATION_OUTPUT_FILE, 'wb') as f:
    pickle.dump(validation_output, f)

print(f"\n✓ Saved complete validation data to: {VALIDATION_OUTPUT_FILE}")
print(f"  File size: {os.path.getsize(VALIDATION_OUTPUT_FILE) / (1024*1024):.1f} MB")
print(f"\n  Contains:")
print(f"    - Full predictions vs actuals DataFrames")
print(f"    - Per-horizon metrics (mean, std, min, max)")
print(f"    - Directional accuracy by timestamp")
print(f"    - CRPS values by timestamp")
print(f"    - Prediction intervals with coverage")
print(f"    - All summary statistics")

## Summary

In [None]:
print("\n" + "="*80)
print("GENERATION COMPLETE")
print("="*80)

if len(all_results) > 0:
    print(f"\n✓ Processed {len(all_results)} commodities")
    print(f"\n✓ Prediction tables:")
    for r in all_results:
        print(f"  - {r['table']}")
    
    print(f"\n✓ Validation data saved to: {VALIDATION_OUTPUT_FILE}")
    print(f"  Ready for detailed review and debugging")
else:
    print("\n⚠️  No commodities processed")