# üìì Project Glitch: Model Analysis & EDA

This notebook demonstrates the data analysis and model evaluation process for Project Glitch.

## 1. Setup & Data Loading

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from train_glitch import calculate_rolling_stats, create_targets

%matplotlib inline
sns.set_style('darkgrid')

# Load Data
try:
    df = pd.read_csv('master_data.csv')
    print(f"Loaded {len(df)} matches")
    display(df.head())
except FileNotFoundError:
    print("‚ùå master_data.csv not found!")

## 2. Feature Engineering
We calculate rolling statistics (last 5 games) to capture current form.

In [None]:
# Calculate stats
df_features = calculate_rolling_stats(df, n_games=5)
df_final = create_targets(df_features)

# Drop rows with NaN (first few games of season)
df_final = df_final.dropna()
print(f"Matches after preprocessing: {len(df_final)}")

## 3. Exploratory Data Analysis (EDA)
Let's look at correlations between features and targets.

In [None]:
features = [
    'HomeTeam_Form', 'AwayTeam_Form',
    'Home_Avg_Goals', 'Away_Avg_Goals',
    'Home_Avg_Conceded', 'Away_Avg_Conceded',
    'Home_BTTS_Rate', 'Away_BTTS_Rate'
]

targets = ['Target_Win', 'Target_Goals', 'Target_BTTS']

# Correlation Matrix
plt.figure(figsize=(12, 10))
sns.heatmap(df_final[features + targets].corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Feature Correlation Matrix')
plt.show()

## 4. Model Evaluation
Training the Random Forest models and checking classification reports.

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

X = df_final[features]
y = df_final['Target_Win']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Match Result Model Report:")
print(classification_report(y_test, y_pred, target_names=['Home', 'Draw', 'Away']))