<a href="https://colab.research.google.com/github/RahulSwaroop/football-match-prediction/blob/main/Untitled8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary packages
# !pip install xgboost scikit-learn pandas joblib --quiet

# --- IMPORTS ---
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import joblib

# --- LOAD ONLY REQUIRED COLUMNS ---
matches = pd.read_csv('/content/Matches.csv')
print("Columns in Matches.csv:", matches.columns) # Add this line to inspect columns
# Load EloRatings.csv without specifying usecols to inspect columns
elo = pd.read_csv('/content/EloRatings.csv')
print("Columns in EloRatings.csv:", elo.columns)


# --- KEEP LATEST RATING FOR EACH TEAM ---
# Sort by date and drop duplicates to get latest Elo
elo_latest = elo.sort_values('date').drop_duplicates('club', keep='last').copy()

# --- CREATE DICTIONARY FOR FAST LOOKUP ---
elo_dict = dict(zip(elo_latest['club'], elo_latest['elo']))

# --- MAP HOME AND AWAY ELO RATINGS ---
matches['home_elo'] = matches['HomeTeam'].map(elo_dict)
matches['away_elo'] = matches['AwayTeam'].map(elo_dict)

# --- DROP ROWS WITH MISSING ELO/GOALS ---
matches.dropna(subset=['home_elo', 'away_elo', 'FTHome', 'FTAway'], inplace=True)

# --- CREATE TARGET COLUMN ---
def get_result(row):
    if row['FTHome'] > row['FTAway']:
        return 'Win'
    elif row['FTHome'] < row['FTAway']:
        return 'Loss'
    else:
        return 'Draw'

matches['result'] = matches.apply(get_result, axis=1)

# --- PREPARE FEATURES AND LABELS ---
X = matches[['home_elo', 'away_elo']]
y = matches['result']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# --- TRAIN-TEST SPLIT ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

# --- TRAIN LIGHTWEIGHT XGBOOST MODEL ---
model = XGBClassifier(
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    use_label_encoder=False,
    eval_metric='mlogloss'
)
model.fit(X_train, y_train)

# --- EVALUATE ---
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Model Trained. Accuracy: {accuracy:.2f}")

  matches = pd.read_csv('/content/Matches.csv')


Columns in Matches.csv: Index(['Division', 'MatchDate', 'MatchTime', 'HomeTeam', 'AwayTeam', 'HomeElo',
       'AwayElo', 'Form3Home', 'Form5Home', 'Form3Away', 'Form5Away', 'FTHome',
       'FTAway', 'FTResult', 'HTHome', 'HTAway', 'HTResult', 'HomeShots',
       'AwayShots', 'HomeTarget', 'AwayTarget', 'HomeFouls', 'AwayFouls',
       'HomeCorners', 'AwayCorners', 'HomeYellow', 'AwayYellow', 'HomeRed',
       'AwayRed', 'OddHome', 'OddDraw', 'OddAway', 'MaxHome', 'MaxDraw',
       'MaxAway', 'Over25', 'Under25', 'MaxOver25', 'MaxUnder25', 'HandiSize',
       'HandiHome', 'HandiAway', 'C_LTH', 'C_LTA', 'C_VHD', 'C_VAD', 'C_HTB',
       'C_PHB'],
      dtype='object')
Columns in EloRatings.csv: Index(['date', 'club', 'country', 'elo'], dtype='object')


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Model Trained. Accuracy: 0.49


In [None]:
%pip install xgboost scikit-learn pandas joblib



In [None]:
import joblib
import json
from google.colab import files

# --- SAVE MODEL FILES ---
joblib.dump(model, '/content/football_model.joblib')
joblib.dump(label_encoder, '/content/label_encoder.joblib')

# Save model config as JSON (optional, mostly for debugging or replication)
model_json = model.get_booster().save_config()
with open('/content/xgb_model.json', 'w') as f:
    f.write(model_json)

# --- DOWNLOAD FILES ---
files.download('/content/football_model.joblib')
files.download('/content/label_encoder.joblib')
files.download('/content/xgb_model.json')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>