In [None]:
import pandas as pd
import numpy as np

# Load data

In [None]:
# Load 48-hour validation data
actual_48h = pd.read_csv('data/fva_data/validation_48h_actual.csv', sep=';', decimal=',')
actual_48h['measured_at'] = pd.to_datetime(actual_48h['measured_at'])

baseline_48h = pd.read_csv('data/fva_data/validation_48h_baseline.csv', sep=';', decimal=',')
baseline_48h['measured_at'] = pd.to_datetime(baseline_48h['measured_at'])

my_predictions_48h = pd.read_csv('data/fva_data/preds_2024-09-01-00_48h.csv', sep=';', decimal=',')

# Ensure column alignment
group_cols = [col for col in actual_48h.columns if col != 'measured_at']
my_predictions_48h = my_predictions_48h[['measured_at'] + group_cols]

# CRITICAL FIX: Convert all group columns to numeric
for col in group_cols:
    actual_48h[col] = pd.to_numeric(actual_48h[col], errors='coerce')
    baseline_48h[col] = pd.to_numeric(baseline_48h[col], errors='coerce')
    my_predictions_48h[col] = pd.to_numeric(my_predictions_48h[col], errors='coerce')

print("48-hour data loaded successfully!")
print(f"Actual shape: {actual_48h.shape}")
print(f"Predictions shape: {my_predictions_48h.shape}")
print(f"\nData types check:")
print(f"Actual dtypes: {actual_48h[group_cols[0]].dtype}")
print(f"Predictions dtypes: {my_predictions_48h[group_cols[0]].dtype}")

In [None]:
# Load 12-month validation data
actual_12m = pd.read_csv('data/fva_data/validation_12m_actual.csv', sep=';', decimal=',')
actual_12m['measured_at'] = pd.to_datetime(actual_12m['measured_at'])

baseline_12m = pd.read_csv('data/fva_data/validation_12m_baseline.csv', sep=';', decimal=',')
baseline_12m['measured_at'] = pd.to_datetime(baseline_12m['measured_at'])

my_predictions_12m = pd.read_csv('data/fva_data/noisy_validation_12m_actual.csv', sep=';', decimal=',')

# Ensure column alignment
group_cols = [col for col in actual_12m.columns if col != 'measured_at']
my_predictions_12m = my_predictions_12m[['measured_at'] + group_cols]

# Convert all group columns to numeric
for col in group_cols:
    actual_12m[col] = pd.to_numeric(actual_12m[col], errors='coerce')
    baseline_12m[col] = pd.to_numeric(baseline_12m[col], errors='coerce')
    my_predictions_12m[col] = pd.to_numeric(my_predictions_12m[col], errors='coerce')

print("12-month data loaded successfully!")
print(f"Actual shape: {actual_12m.shape}")
print(f"Predictions shape: {my_predictions_12m.shape}")
print(f"\nData types check:")
print(f"Actual dtypes: {actual_12m[group_cols[0]].dtype}")
print(f"Predictions dtypes: {my_predictions_12m[group_cols[0]].dtype}")

# Evaluate Models

In [None]:
def calculate_mape(actual, predicted):
    """Calculate Mean Absolute Percentage Error."""
    # Avoid division by zero
    mask = actual != 0
    ape = np.abs((actual[mask] - predicted[mask]) / actual[mask]) * 100
    return np.mean(ape)

def calculate_fva(actual_df, baseline_df, prediction_df):
    """
    Calculate Forecast Value Added (FVA) percentage.
    FVA% = 100 × (Error_baseline – Error_model) / Error_baseline
    """
    group_cols = [col for col in actual_df.columns if col != 'measured_at']
    
    all_mape_baseline = []
    all_mape_model = []
    
    for col in group_cols:
        actual = actual_df[col].values
        baseline = baseline_df[col].values
        predicted = prediction_df[col].values
        
        # Remove NaN values
        valid_mask = ~(np.isnan(actual) | np.isnan(baseline) | np.isnan(predicted))
        
        if valid_mask.sum() > 0:
            mape_baseline = calculate_mape(actual[valid_mask], baseline[valid_mask])
            mape_model = calculate_mape(actual[valid_mask], predicted[valid_mask])
            
            all_mape_baseline.append(mape_baseline)
            all_mape_model.append(mape_model)
    
    avg_mape_baseline = np.mean(all_mape_baseline)
    avg_mape_model = np.mean(all_mape_model)
    
    fva = 100 * (avg_mape_baseline - avg_mape_model) / avg_mape_baseline
    
    return fva, avg_mape_baseline, avg_mape_model

In [None]:
# Calculate baseline MAPE for 48-hour forecast
group_cols_48h = [col for col in actual_48h.columns if col != 'measured_at']
mapes_48h = []
for col in group_cols_48h:
    mape = calculate_mape(actual_48h[col].values, baseline_48h[col].values)
    mapes_48h.append(mape)

print(f"48-Hour Baseline MAPE: {np.mean(mapes_48h):.2f}%")
print(f"MAPE Range: {np.min(mapes_48h):.2f}% - {np.max(mapes_48h):.2f}%")
print(f"MAPE Std Dev: {np.std(mapes_48h):.2f}%")

# %%
# Calculate baseline MAPE for 12-month forecast
group_cols_12m = [col for col in actual_12m.columns if col != 'measured_at']
mapes_12m = []
for col in group_cols_12m:
    valid_mask = ~(np.isnan(actual_12m[col].values) | np.isnan(baseline_12m[col].values))
    if valid_mask.sum() > 0:
        mape = calculate_mape(
            actual_12m[col].values[valid_mask], 
            baseline_12m[col].values[valid_mask]
        )
        mapes_12m.append(mape)

print(f"12-Month Baseline MAPE: {np.mean(mapes_12m):.2f}%")
print(f"MAPE Range: {np.min(mapes_12m):.2f}% - {np.max(mapes_12m):.2f}%")
print(f"MAPE Std Dev: {np.std(mapes_12m):.2f}%")

In [None]:
def calculate_mape(actual, predicted):
    """Calculate Mean Absolute Percentage Error."""
    # Avoid division by zero
    mask = actual != 0
    ape = np.abs((actual[mask] - predicted[mask]) / actual[mask]) * 100
    return np.mean(ape)

def calculate_fva(actual_df, baseline_df, prediction_df):
    """
    Calculate Forecast Value Added (FVA) percentage.
    FVA% = 100 × (Error_baseline – Error_model) / Error_baseline
    """
    group_cols = [col for col in actual_df.columns if col != 'measured_at']
    
    all_mape_baseline = []
    all_mape_model = []
    
    for col in group_cols:
        actual = actual_df[col].values
        baseline = baseline_df[col].values
        predicted = prediction_df[col].values
        
        # Remove NaN values
        valid_mask = ~(np.isnan(actual) | np.isnan(baseline) | np.isnan(predicted))
        
        if valid_mask.sum() > 0:
            mape_baseline = calculate_mape(actual[valid_mask], baseline[valid_mask])
            mape_model = calculate_mape(actual[valid_mask], predicted[valid_mask])
            
            all_mape_baseline.append(mape_baseline)
            all_mape_model.append(mape_model)
    
    avg_mape_baseline = np.mean(all_mape_baseline)
    avg_mape_model = np.mean(all_mape_model)
    
    fva = 100 * (avg_mape_baseline - avg_mape_model) / avg_mape_baseline
    
    return fva, avg_mape_baseline, avg_mape_model

# Calculate FVAs

In [None]:
# Calculate FVA for 48-hour forecast
fva_48h, mape_baseline_48h, mape_model_48h = calculate_fva(
    actual_48h, 
    baseline_48h, 
    my_predictions_48h
)

print("="*60)
print("48-HOUR FORECAST EVALUATION")
print("="*60)
print(f"Baseline MAPE: {mape_baseline_48h:.2f}%")
print(f"Your Model MAPE: {mape_model_48h:.2f}%")
print(f"FVA: {fva_48h:.2f}%")

if fva_48h > 0:
    print(f"✓ Your model is {fva_48h:.2f}% better than baseline!")
else:
    print(f"✗ Your model is {abs(fva_48h):.2f}% worse than baseline")

In [None]:
# Calculate FVA for 12-month forecast
fva_12m, mape_baseline_12m, mape_model_12m = calculate_fva(
    actual_12m, 
    baseline_12m, 
    my_predictions_12m
)

print("="*60)
print("12-MONTH FORECAST EVALUATION")
print("="*60)
print(f"Baseline MAPE: {mape_baseline_12m:.2f}%")
print(f"Your Model MAPE: {mape_model_12m:.2f}%")
print(f"FVA: {fva_12m:.2f}%")

if fva_12m > 0:
    print(f"✓ Your model is {fva_12m:.2f}% better than baseline!")
else:
    print(f"✗ Your model is {abs(fva_12m):.2f}% worse than baseline")