# 🧠 EPL Model Training with Calibration
This notebook trains calibrated models for Over 1.5, Over 2.5, BTTS, Match Result, and Corners.

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.calibration import CalibratedClassifierCV
import joblib


In [None]:

# Load data
data = pd.read_excel("epl_team_form_features_updated.xlsx")

# Create binary labels and numeric targets
data['Over_1.5'] = (data['FTHG'] + data['FTAG'] >= 2).astype(int)
data['Over_2.5'] = (data['FTHG'] + data['FTAG'] >= 3).astype(int)
data['BTTS_Label'] = ((data['FTHG'] > 0) & (data['FTAG'] > 0)).astype(int)
data['Total_Corners'] = data['HC'] + data['AC']
data['Result'] = data['FTR']


In [None]:

# Select features
feature_cols = [col for col in data.columns if col.startswith("Home_") or col.startswith("Away_")]
X = data[feature_cols].fillna(0)
y_15 = data['Over_1.5']
y_25 = data['Over_2.5']
y_btts = data['BTTS_Label']
y_corners = data['Total_Corners']
le_result = LabelEncoder()
y_result = le_result.fit_transform(data['Result'])


In [None]:

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/Test split
X_train, X_test, y_train_15, y_test_15 = train_test_split(X_scaled, y_15, test_size=0.2, random_state=42)
_, _, y_train_25, y_test_25 = train_test_split(X_scaled, y_25, test_size=0.2, random_state=42)
_, _, y_train_btts, y_test_btts = train_test_split(X_scaled, y_btts, test_size=0.2, random_state=42)
_, _, y_train_corners, y_test_corners = train_test_split(X_scaled, y_corners, test_size=0.2, random_state=42)
_, _, y_train_result, y_test_result = train_test_split(X_scaled, y_result, test_size=0.2, random_state=42)


In [None]:

# Train calibrated models
lr_15 = CalibratedClassifierCV(LogisticRegression(max_iter=200)).fit(X_train, y_train_15)
lr_25 = CalibratedClassifierCV(LogisticRegression(max_iter=200)).fit(X_train, y_train_25)
rf_btts = CalibratedClassifierCV(RandomForestClassifier(n_estimators=100, random_state=42)).fit(X_train, y_train_btts)
rf_result = CalibratedClassifierCV(RandomForestClassifier(n_estimators=100, random_state=42)).fit(X_train, y_train_result)
rf_corners = RandomForestRegressor(n_estimators=100, random_state=42).fit(X_train, y_train_corners)


In [None]:

# Save models
joblib.dump(lr_15, 'lr_model_over_1_5_calibrated.joblib')
joblib.dump(lr_25, 'lr_model_over_2_5_calibrated.joblib')
joblib.dump(rf_btts, 'btts_model_calibrated.joblib')
joblib.dump(rf_result, 'win_model_calibrated.joblib')
joblib.dump(rf_corners, 'corner_model.joblib')
joblib.dump(scaler, 'scaler_model.joblib')

print("✅ All models trained and saved successfully.")
