# Prediction vs Reality Analysis Dashboard

This notebook analyzes the performance of market predictions against actual outcomes. We'll examine:
1. Overall hit rates and trends
2. Performance by scenario type
3. Rolling window analysis
4. Statistical significance testing

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from datetime import datetime, timedelta

# Load prediction data
df = pd.read_csv('../data/predictions.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')

## Overall Performance Metrics

Let's first look at the overall hit rate and performance by scenario type.

In [None]:
# Calculate overall hit rate
overall_hit_rate = df['hit'].mean()

# Calculate hit rates by scenario
scenario_hits = df.groupby('scenario')['hit'].agg(['count', 'mean'])
scenario_hits.columns = ['Total Predictions', 'Hit Rate']

print(f"Overall Hit Rate: {overall_hit_rate:.2%}\n")
print("Performance by Scenario:")
print(scenario_hits)

## Rolling Window Analysis

Let's analyze how hit rates have changed over time using different rolling windows.

In [None]:
# Calculate rolling hit rates
windows = [5, 10, 20]
rolling_metrics = pd.DataFrame(index=df['date'])

for window in windows:
    rolling_metrics[f'{window}d_hit_rate'] = df['hit'].rolling(window).mean()

# Plot rolling hit rates
plt.figure(figsize=(12, 6))
for window in windows:
    plt.plot(rolling_metrics.index, 
             rolling_metrics[f'{window}d_hit_rate'], 
             label=f'{window}-day Rolling Hit Rate')

plt.axhline(y=overall_hit_rate, color='r', linestyle='--', label='Overall Hit Rate')
plt.title('Rolling Hit Rates Over Time')
plt.xlabel('Date')
plt.ylabel('Hit Rate')
plt.legend()
plt.grid(True)
plt.show()

## Statistical Analysis

Let's perform statistical tests to determine if our hit rates are significantly different from random chance (50%).

In [None]:
# Perform binomial test for overall performance
n_trials = len(df)
n_successes = df['hit'].sum()
binom_test = stats.binomtest(n_successes, n_trials, p=0.5)

print(f"Binomial Test Results:")
print(f"Number of trials: {n_trials}")
print(f"Number of successes: {n_successes}")
print(f"P-value: {binom_test.pvalue:.4f}")
print(f"Is significantly different from random chance? {binom_test.pvalue < 0.05}")

# Test for each scenario
print("\nResults by Scenario:")
for scenario in df['scenario'].unique():
    scenario_data = df[df['scenario'] == scenario]
    n_scenario = len(scenario_data)
    n_scenario_hits = scenario_data['hit'].sum()
    scenario_test = stats.binomtest(n_scenario_hits, n_scenario, p=0.5)
    
    print(f"\n{scenario}:")
    print(f"Trials: {n_scenario}")
    print(f"Successes: {n_scenario_hits}")
    print(f"Hit Rate: {n_scenario_hits/n_scenario:.2%}")
    print(f"P-value: {scenario_test.pvalue:.4f}")
    print(f"Significant? {scenario_test.pvalue < 0.05}")