In [1]:
import torch
import torch.nn as nn
import random
from mplsoccer import Pitch
import matplotlib.pyplot as plt
import json
import torch.nn.functional as F
import joblib
import pandas as pd

In [2]:
class FootballMatchPredictor(nn.Module):
    def __init__(self, input_agg_stats_size, hidden_size=64):
        super(FootballMatchPredictor, self).__init__()
        
        # Layer for aggregated statistics
        self.agg_stats_fc = nn.Sequential(
            nn.Linear(input_agg_stats_size, hidden_size),
            # nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU()
        )
        
        combined_input_size = hidden_size // 2
        self.fc_combined = nn.Sequential(
            nn.Linear(combined_input_size, hidden_size // 4),
            nn.ReLU(),
            nn.Linear(hidden_size // 4, 2),  # Output: 2 probabilities (over/under)
        )

    def forward(self, agg_stats):

        agg_stats_out = self.agg_stats_fc(agg_stats)

        output = self.fc_combined(agg_stats_out)
        
        # Apply softmax to get probabilities
        return F.softmax(output, dim=1)
    
class FootballMatchPredictorOutcome(nn.Module):
    def __init__(self, input_agg_stats_size, hidden_size=128):
        super(FootballMatchPredictorOutcome, self).__init__()
        
        # Layer for aggregated statistics
        self.agg_stats_fc = nn.Sequential(
            nn.Linear(input_agg_stats_size, hidden_size),
            # nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU()
        )
        
        combined_input_size = hidden_size
        self.fc_combined = nn.Sequential(
            nn.Linear(combined_input_size, hidden_size // 2),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, 3),  # Output: 3 probabilities (home win/draw/away win)
        )

    def forward(self, agg_stats):

        agg_stats_out = self.agg_stats_fc(agg_stats)

        output = self.fc_combined(agg_stats_out)
        
        # Apply softmax to get probabilities
        return F.softmax(output, dim=1)

def load_model(model_path):
    model = torch.load(model_path, map_location=torch.device('cpu'))
    model.eval()
    return model

def load_scaler(scaler_path):
    scaler = joblib.load(scaler_path)
    return scaler

def load_selected_fetures(selected_features_path):
    with open(selected_features_path, "r", encoding="utf-8") as f:
        selected_features = json.load(f)
    return selected_features

def predict_goals(input_features, model):
    with torch.no_grad():
        input_tensor = torch.tensor(input_features, dtype=torch.float32)
        prediction = model(input_tensor)
        return prediction.squeeze()[0].item(), prediction.squeeze()[1].item()
    
def predict_outcome(input_features, model):
    with torch.no_grad():
        input_tensor = torch.tensor(input_features, dtype=torch.float32)
        prediction = model(input_tensor)
        return prediction.squeeze()[0].item(), prediction.squeeze()[1].item(), prediction.squeeze()[2].item()


In [8]:
matches = pd.read_csv("../prepared_data.csv")
matches[matches["overall_away_overall_rating"].isna()]

Unnamed: 0,season,date,time,round,attendance_value,referee,home_manager,away_manager,home_captain,away_captain,...,overall_home_stat_gk_diving,overall_away_stat_gk_diving,overall_home_stat_gk_handling,overall_away_stat_gk_handling,overall_home_stat_gk_kicking,overall_away_stat_gk_kicking,overall_home_stat_gk_positioning,overall_away_stat_gk_positioning,overall_home_stat_gk_reflexes,overall_away_stat_gk_reflexes
12265,2024-2025,2024-08-24,15:00,2.0,61357.0,Anthony Taylor,Ange Postecoglou,Sean Dyche,Son Heung-min,James Tarkowski,...,,,,,,,,,,
12266,2024-2025,2024-08-24,15:00,2.0,25401.0,Darren Bond,Marco Silva,Steve Cooper,Bernd Leno,Jamie Vardy,...,,,,,,,,,,
12267,2024-2025,2024-08-24,15:00,2.0,25099.0,Robert Jones,Oliver Glasner,Lopetegui,Marc Guéhi,Jarrod Bowen,...,,,,,,,,,,
12268,2024-2025,2024-08-24,15:00,2.0,53147.0,Samuel Allison,Pep Guardiola,Kieran McKenna,Kevin De Bruyne,Sam Morsy,...,,,,,,,,,,
12269,2024-2025,2024-08-24,15:00,2.0,31150.0,Samuel Barrott,Russell Martin,Nuno Espírito Santo,Jack Stephens,Morgan Gibbs-White,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12399,2024-2025,2024-12-08,14:00,15.0,26954.0,Chris Kavanagh,Marco Silva,Mikel Arteta,Antonee Robinson,Martin Ødegaard,...,,,,,,,,,,
12400,2024-2025,2024-12-08,14:00,15.0,29180.0,Michael Salisbury,Kieran McKenna,Andoni Iraola,Sam Morsy,Adam Smith,...,,,,,,,,,,
12401,2024-2025,2024-12-08,14:00,15.0,31647.0,Stuart Attwell,Ruud van Nistelrooy,Fabian Hürzeler,Jamie Vardy,Lewis Dunk,...,,,,,,,,,,
12402,2024-2025,2024-12-08,16:30,15.0,61184.0,Anthony Taylor,Ange Postecoglou,Enzo Maresca,Son Heung-min,Enzo Fernández,...,,,,,,,,,,


In [5]:
matches = pd.read_csv("../prepared_data.csv")

scaler = load_scaler("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_scaler_v1.pkl")
selected_features = load_selected_fetures("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_features_v1.json")
model = load_model("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_predictor_v1.pth")

scaler_home = load_scaler("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_scaler_home_goals_v1.pkl")
selected_features_home = load_selected_fetures("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/home_goals_features_v1.json")
model_home = load_model("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_home_predictor_v1.pth")

scaler_away = load_scaler("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_scaler_away_goals_v1.pkl")
selected_features_away = load_selected_fetures("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/away_goals_features_v1.json")
model_away = load_model("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_away_predictor_v1.pth")

scaler_outcome = load_scaler("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/outcome_scaler.pkl")
selected_features_outcome = load_selected_fetures("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/outcome_features.json")
model_outcome = load_model("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/football_match_predictor_v1.pth")

filtered_matches = matches[[col for col in matches.columns if 'last5' in col or 'matches_since' in col or 'overall' in col or 'tiredness' in col or 'h2h' in col]]
filtered_matches = filtered_matches.drop(columns = ["home_last5_possession", "away_last5_possession"])
filtered_matches = filtered_matches[~filtered_matches.isna().any(axis=1)]
all_features = filtered_matches.iloc[38]

print(matches[["date", "home_team", "away_team"]].iloc[38])

all_features_scaled = scaler.transform([all_features])
input_features = all_features_scaled[:, [filtered_matches.columns.get_loc(col) for col in selected_features]]

all_features_scaled_home = scaler_home.transform([all_features])
input_features_home = all_features_scaled_home[:, [filtered_matches.columns.get_loc(col) for col in selected_features_home]]

all_features_scaled_away = scaler_home.transform([all_features])
input_features_away = all_features_scaled_away[:, [filtered_matches.columns.get_loc(col) for col in selected_features_away]]

all_features_scaled_outcome = scaler_outcome.transform([all_features])
input_features_outcome = all_features_scaled_outcome[:, [filtered_matches.columns.get_loc(col) for col in selected_features_outcome]]

print(predict_goals(input_features, model))
print(predict_goals(input_features_home, model_home))
print(predict_goals(input_features_away, model_away))
print(predict_outcome(input_features_outcome, model_outcome))

date                  2017-08-26
home_team    Eintracht Frankfurt
away_team              Wolfsburg
Name: 38, dtype: object
(0.6490583419799805, 0.35094162821769714)
(0.7008497714996338, 0.2991502583026886)
(0.8535451292991638, 0.14645490050315857)
(0.2488788515329361, 0.6650015711784363, 0.08611955493688583)


In [19]:
import math
from scipy.optimize import minimize_scalar

def poisson_cdf(lmbda, k):
    return sum((lmbda ** i) * math.exp(-lmbda) / math.factorial(i) for i in range(k + 1))

def solve_lambda(p_over, line):
    def objective(lmbda):
        p_leq = poisson_cdf(lmbda, line)
        return abs(1 - p_leq - p_over)

    result = minimize_scalar(objective, bounds=(0, 10), method='bounded')
    return result.x

def poisson_probability(lmbda, line, over=True):
    if over:
        return 1 - poisson_cdf(lmbda, math.floor(line))
    else:
        return poisson_cdf(lmbda, math.floor(line))

def exact_score_probability(home_lambda, away_lambda, home_goals, away_goals):
    p_home = (home_lambda ** home_goals) * math.exp(-home_lambda) / math.factorial(home_goals)
    p_away = (away_lambda ** away_goals) * math.exp(-away_lambda) / math.factorial(away_goals)
    return p_home * p_away

In [26]:
def get_probabilities(all_fetures):
    all_features_scaled = scaler.transform([all_features])
    input_features = all_features_scaled[:, [filtered_matches.columns.get_loc(col) for col in selected_features]]

    all_features_scaled_home = scaler_home.transform([all_features])
    input_features_home = all_features_scaled_home[:, [filtered_matches.columns.get_loc(col) for col in selected_features_home]]

    all_features_scaled_away = scaler_home.transform([all_features])
    input_features_away = all_features_scaled_away[:, [filtered_matches.columns.get_loc(col) for col in selected_features_away]]

    all_features_scaled_outcome = scaler_outcome.transform([all_features])
    input_features_outcome = all_features_scaled_outcome[:, [filtered_matches.columns.get_loc(col) for col in selected_features_outcome]]

    probabilities = {}

    probabilities["under25"], probabilities["over25"] = predict_goals(input_features, model)
    probabilities["home_under15"], probabilities["home_over15"] = predict_goals(input_features_home, model_home)
    probabilities["away_under15"], probabilities["away_over15"] = predict_goals(input_features_away, model_away)
    probabilities["draw"], probabilities["prob_home_win"], probabilities["prob_away_win"] = predict_outcome(input_features_outcome, model_outcome)
    probabilities["lambda_goals"] = solve_lambda(probabilities["over25"], 2)
    probabilities["lambda_home_goals"] = solve_lambda(probabilities["home_over15"], 1)
    probabilities["lambda_away_goals"] = solve_lambda(probabilities["away_over15"], 1)
    probabilities["under15"] = poisson_probability(probabilities["lambda_goals"], 1.5, over=False)
    probabilities["over15"] = poisson_probability(probabilities["lambda_goals"], 1.5, over=True)
    probabilities["under35"] = poisson_probability(probabilities["lambda_goals"], 3.5, over=False)
    probabilities["over35"] = poisson_probability(probabilities["lambda_goals"], 3.5, over=True)
    probabilities["home_under05"] = poisson_probability(probabilities["lambda_home_goals"], 0.5, over=False)
    probabilities["home_over05"] = poisson_probability(probabilities["lambda_home_goals"], 0.5, over=True)
    probabilities["away_under05"] = poisson_probability(probabilities["lambda_away_goals"], 0.5, over=False)
    probabilities["away_over05"] = poisson_probability(probabilities["lambda_away_goals"], 0.5, over=True)
    probabilities["exact_11"] = exact_score_probability(probabilities["lambda_home_goals"], probabilities["lambda_away_goals"], 1, 1)
    probabilities["exact_00"] = exact_score_probability(probabilities["lambda_home_goals"], probabilities["lambda_away_goals"], 0, 0)
    probabilities["exact_22"] = exact_score_probability(probabilities["lambda_home_goals"], probabilities["lambda_away_goals"], 2, 2)
    probabilities["exact_10"] = exact_score_probability(probabilities["lambda_home_goals"], probabilities["lambda_away_goals"], 1, 0)
    probabilities["exact_20"] = exact_score_probability(probabilities["lambda_home_goals"], probabilities["lambda_away_goals"], 2, 0)
    probabilities["exact_21"] = exact_score_probability(probabilities["lambda_home_goals"], probabilities["lambda_away_goals"], 2, 1)
    probabilities["exact_01"] = exact_score_probability(probabilities["lambda_home_goals"], probabilities["lambda_away_goals"], 0, 1)
    probabilities["exact_02"] = exact_score_probability(probabilities["lambda_home_goals"], probabilities["lambda_away_goals"], 0, 2)
    probabilities["exact_12"] = exact_score_probability(probabilities["lambda_home_goals"], probabilities["lambda_away_goals"], 1, 2)
    
    return probabilities

In [27]:
get_probabilities(all_features) 

{'under25': 0.6490583419799805,
 'over25': 0.35094162821769714,
 'home_under15': 0.7008497714996338,
 'home_over15': 0.2991502583026886,
 'away_under15': 0.8535451292991638,
 'away_over15': 0.14645490050315857,
 'draw': 0.2488788515329361,
 'prob_home_win': 0.6650015711784363,
 'prob_away_win': 0.08611955493688583,
 'lambda_goals': 2.1021232379192076,
 'lambda_home_goals': 1.095030845261993,
 'lambda_away_goals': 0.6729394906196317,
 'under15': 0.37906922251543607,
 'over15': 0.6209307774845639,
 'under35': 0.8382413500719493,
 'over35': 0.16175864992805067,
 'home_under05': 0.33452928814592814,
 'home_over05': 0.6654707118540719,
 'away_under05': 0.5102066237934733,
 'away_over05': 0.48979337620652674,
 'exact_11': 0.1257716060675479,
 'exact_00': 0.17067905866496796,
 'exact_22': 0.02316994395290922,
 'exact_10': 0.18689883387842118,
 'exact_20': 0.10232999402018417,
 'exact_21': 0.0688618940510527,
 'exact_01': 0.11485667879744177,
 'exact_02': 0.03864579746210656,
 'exact_12': 0.04