In [1]:
import torch
import torch.nn as nn
import random
from mplsoccer import Pitch
import matplotlib.pyplot as plt
import json
import torch.nn.functional as F
import joblib
import pandas as pd

In [99]:
class FootballMatchPredictor(nn.Module):
    def __init__(self, input_agg_stats_size, hidden_size=64):
        super(FootballMatchPredictor, self).__init__()
        
        # Layer for aggregated statistics
        self.agg_stats_fc = nn.Sequential(
            nn.Linear(input_agg_stats_size, hidden_size),
            # nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU()
        )
        
        combined_input_size = hidden_size // 2
        self.fc_combined = nn.Sequential(
            nn.Linear(combined_input_size, hidden_size // 4),
            nn.ReLU(),
            nn.Linear(hidden_size // 4, 2),  # Output: 2 probabilities (over/under)
        )

    def forward(self, agg_stats):

        agg_stats_out = self.agg_stats_fc(agg_stats)

        output = self.fc_combined(agg_stats_out)
        
        # Apply softmax to get probabilities
        return F.softmax(output, dim=1)
    
class FootballMatchPredictorOutcome(nn.Module):
    def __init__(self, input_agg_stats_size, hidden_size=128):
        super(FootballMatchPredictorOutcome, self).__init__()
        
        # Layer for aggregated statistics
        self.agg_stats_fc = nn.Sequential(
            nn.Linear(input_agg_stats_size, hidden_size),
            # nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU()
        )
        
        combined_input_size = hidden_size
        self.fc_combined = nn.Sequential(
            nn.Linear(combined_input_size, hidden_size // 2),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, 3),  # Output: 3 probabilities (home win/draw/away win)
        )

    def forward(self, agg_stats):

        agg_stats_out = self.agg_stats_fc(agg_stats)

        output = self.fc_combined(agg_stats_out)
        
        # Apply softmax to get probabilities
        return F.softmax(output, dim=1)

def load_model(model_path):
    model = torch.load(model_path, map_location=torch.device('cpu'))
    model.eval()
    return model

def load_scaler(scaler_path):
    scaler = joblib.load(scaler_path)
    return scaler

def load_selected_fetures(selected_features_path):
    with open(selected_features_path, "r", encoding="utf-8") as f:
        selected_features = json.load(f)
    return selected_features

def predict_goals(input_features, model):
    with torch.no_grad():
        input_tensor = torch.tensor(input_features, dtype=torch.float32)
        prediction = model(input_tensor)
        return prediction.squeeze()[0].item(), prediction.squeeze()[1].item()
    
def predict_outcome(input_features, model):
    with torch.no_grad():
        input_tensor = torch.tensor(input_features, dtype=torch.float32)
        prediction = model(input_tensor)
        return prediction.squeeze()[0].item(), prediction.squeeze()[1].item(), prediction.squeeze()[2].item()


In [100]:
matches = pd.read_csv("data/prepared_data.csv")

scaler = load_scaler("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_scaler_v1.pkl")
selected_features = load_selected_fetures("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_features_v1.json")
model = load_model("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_predictor_v1.pth")

scaler_home = load_scaler("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_scaler_home_goals_v1.pkl")
selected_features_home = load_selected_fetures("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/home_goals_features_v1.json")
model_home = load_model("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_home_predictor_v1.pth")

scaler_away = load_scaler("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_scaler_away_goals_v1.pkl")
selected_features_away = load_selected_fetures("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/away_goals_features_v1.json")
model_away = load_model("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/goals_away_predictor_v1.pth")

scaler_outcome = load_scaler("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/outcome_scaler.pkl")
selected_features_outcome = load_selected_fetures("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/outcome_features.json")
model_outcome = load_model("C:/Users/MateuszAndryszak/OneDrive - GuideVision, s.r.o/Dokumenty/inżynierka/fbref/models/football_match_predictor_v1.pth")

filtered_matches = matches[[col for col in matches.columns if 'last5' in col or 'matches_since' in col or 'overall' in col or 'tiredness' in col or 'h2h' in col]]
filtered_matches = filtered_matches.drop(columns = ["home_last5_possession", "away_last5_possession"])
filtered_matches = filtered_matches[~filtered_matches.isna().any(axis=1)]
all_features = filtered_matches.iloc[37]

print(matches[["date", "home_team", "away_team"]].iloc[37])

all_features_scaled = scaler.transform([all_features])
input_features = all_features_scaled[:, [filtered_matches.columns.get_loc(col) for col in selected_features]]

all_features_scaled_home = scaler_home.transform([all_features])
input_features_home = all_features_scaled_home[:, [filtered_matches.columns.get_loc(col) for col in selected_features_home]]

all_features_scaled_away = scaler_home.transform([all_features])
input_features_away = all_features_scaled_away[:, [filtered_matches.columns.get_loc(col) for col in selected_features_away]]

all_features_scaled_outcome = scaler_outcome.transform([all_features])
input_features_outcome = all_features_scaled_outcome[:, [filtered_matches.columns.get_loc(col) for col in selected_features_outcome]]

print(predict_goals(input_features, model))
print(predict_goals(input_features_home, model_home))
print(predict_goals(input_features_away, model_away))
print(predict_outcome(input_features_outcome, model_outcome))

date                2017-08-26
home_team    Huddersfield Town
away_team          Southampton
Name: 37, dtype: object
(0.4181433618068695, 0.5818566083908081)
(0.7869749069213867, 0.2130250781774521)
(0.4895228445529938, 0.5104771852493286)
(0.2694578468799591, 0.26148757338523865, 0.46905457973480225)
