In [8]:
import datetime
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F

In [2]:
class Loader:

    def __init__(self, files):
        self.files = files

    def get_data(self):
        dfs = []
        for file in self.files:
            df = self.load_past_matches(file)
            dfs.append(df)
        
        df_join = self.join_data(dfs[0], dfs[1])

        return df_join

    def load_past_matches(self, file):
        df = pd.read_csv(f'../data/{file}')
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df['date'] = pd.to_datetime(df['date']).dt.date

        return df

    def join_data(self, df1, df2):
        df = pd.merge(df1, df2,  how='inner',
            left_on=['league', 'date','team', 'opponent', 'home'],
            right_on=['league', 'date','team', 'opponent', 'home'])
        df.sort_values(by=['date', 'league', 'team', 'opponent'], inplace=True)
              
        return df

In [3]:
FILES = ["elos_matches.csv", "goals_matches.csv"]
loader = Loader(FILES)
data = loader.get_data()

In [18]:
def filter_to_season_and_league(df, season, league):
    start_date = pd.to_datetime(f"{season}-07-30").date()
    df = df[df['date'] >= start_date]
    df = df[df['league'] == league]
    df.reset_index(inplace=True, drop=True)
    return df

In [22]:
data = filter_to_season_and_league(data, '2022', 'Serie C, Girone B')
data.drop([
    'team_goals_scored',
    'team_goals_conceded',
    'opponent_goals_scored',
    'opponent_goals_conceded'
], axis=1, inplace=True)
data.head()

Unnamed: 0,league,date,team,opponent,result,elo_team,elo_opponent,elo_diff,home,team_goals_scored_avg,...,league_home_goals_conceded,league_away_goals_conceded,league_home_goals_conceded_avg,league_away_goals_conceded_avg,team_attack_strength,team_defense_strength,opponent_attack_strength,opponent_defense_strength,team_lambda,opponent_lambda
0,"Serie C, Girone B",2022-09-04,alessandria,imolese,0.0,1401.513138,1385.984449,15.528689,0,0.842105,...,0.444444,1.111111,0.802339,1.79152,1.246356,0.998858,0.675698,1.311953,1.311953,1.209143
1,"Serie C, Girone B",2022-09-04,aquila_montevarchi,gubbio,0.0,1432.585346,1473.678249,-41.092903,0,1.105263,...,0.444444,1.111111,0.802339,1.79152,0.655977,1.028236,0.998858,1.11516,0.586926,1.840001
2,"Serie C, Girone B",2022-09-04,carrarese,cesena,1.0,1415.944287,1507.312415,-91.368128,0,1.0,...,0.444444,1.111111,0.802339,1.79152,0.655977,1.11637,1.028236,1.049563,0.552401,2.056471
3,"Serie C, Girone B",2022-09-04,cesena,carrarese,0.0,1507.312415,1415.944287,91.368128,1,1.473684,...,0.444444,1.111111,0.802339,1.79152,1.028236,1.049563,0.655977,1.11637,2.056471,0.552401
4,"Serie C, Girone B",2022-09-04,fermana,fiorenzuola,0.0,1376.394658,1426.747051,-50.352394,0,0.736842,...,0.444444,1.111111,0.802339,1.79152,0.721574,0.851967,0.499429,1.771137,1.025395,0.762286


In [24]:
data.shape

(290, 35)

In [25]:
def build_dataset(df):
    
    df = df.drop(['league', 'date', 'team', 'opponent'], axis=1)
    X = df.drop(['result'], axis=1).to_numpy()
    Y = np.array(df['result']) / 0.5
    
    X = torch.tensor(X).float()
    Y = torch.tensor(Y).long()
    
    
    return X, Y

In [26]:
X, Y = build_dataset(data)

In [27]:
PATH = "../src/model/trained_models/3_linear_layer.pt"
# model.load_state_dict(torch.load(PATH))
model = torch.load(PATH)
model.train()

Sequential(
  (0): Linear(in_features=30, out_features=100, bias=False)
  (1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Tanh()
  (3): Linear(in_features=100, out_features=100, bias=False)
  (4): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): Tanh()
  (6): Linear(in_features=100, out_features=100, bias=False)
  (7): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): Tanh()
  (9): Linear(in_features=100, out_features=3, bias=True)
)

In [42]:
@torch.no_grad()
def make_predictions(x):
    logits = model(x)
    preds = []
#     preds = torch.argmax(logits, dim=1)
    preds = torch.softmax(logits, dim=1)
    
    return preds

In [50]:
predictions = make_predictions(X)
data[['loss', 'draw', 'win']] = predictions.numpy()

In [51]:
data

Unnamed: 0,league,date,team,opponent,result,elo_team,elo_opponent,elo_diff,home,team_goals_scored_avg,...,league_away_goals_conceded_avg,team_attack_strength,team_defense_strength,opponent_attack_strength,opponent_defense_strength,team_lambda,opponent_lambda,loss,draw,win
0,"Serie C, Girone B",2022-09-04,alessandria,imolese,0.0,1401.513138,1385.984449,15.528689,0,0.842105,...,1.791520,1.246356,0.998858,0.675698,1.311953,1.311953,1.209143,0.621379,0.236101,0.142520
1,"Serie C, Girone B",2022-09-04,aquila_montevarchi,gubbio,0.0,1432.585346,1473.678249,-41.092903,0,1.105263,...,1.791520,0.655977,1.028236,0.998858,1.115160,0.586926,1.840001,0.819218,0.123622,0.057160
2,"Serie C, Girone B",2022-09-04,carrarese,cesena,1.0,1415.944287,1507.312415,-91.368128,0,1.000000,...,1.791520,0.655977,1.116370,1.028236,1.049563,0.552401,2.056471,0.847504,0.113006,0.039490
3,"Serie C, Girone B",2022-09-04,cesena,carrarese,0.0,1507.312415,1415.944287,91.368128,1,1.473684,...,1.791520,1.028236,1.049563,0.655977,1.116370,2.056471,0.552401,0.046071,0.124985,0.828944
4,"Serie C, Girone B",2022-09-04,fermana,fiorenzuola,0.0,1376.394658,1426.747051,-50.352394,0,0.736842,...,1.791520,0.721574,0.851967,0.499429,1.771137,1.025395,0.762286,0.724157,0.181032,0.094811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285,"Serie C, Girone B",2022-12-17,sassari_torres,fermana,0.0,1476.896983,1365.520797,111.376186,0,0.789474,...,0.888743,1.115824,1.065965,1.065965,0.998369,0.998369,1.009861,0.213644,0.293656,0.492700
286,"Serie C, Girone B",2022-12-23,rimini,san_donato_tavarnelle,0.0,1485.185717,1427.191531,57.994186,1,1.526316,...,0.894006,2.119379,1.188511,1.307362,1.648406,3.123295,1.376171,0.684631,0.186265,0.129105
287,"Serie C, Girone B",2022-12-23,san_donato_tavarnelle,rimini,1.0,1427.191531,1485.185717,-57.994186,0,0.842105,...,0.894006,1.307362,1.648406,2.119379,1.188511,1.376171,3.123295,0.173215,0.215140,0.611644
288,"Serie C, Girone B",2022-12-23,sassari_torres,virtus_entella,0.0,1457.246697,1535.560893,-78.314195,1,0.789474,...,0.894006,1.354047,1.069660,1.129086,1.236304,1.496579,1.069660,0.806222,0.147915,0.045863


## Joining to Future

In [54]:
future = pd.read_csv("../data/future_predictions.csv", index_col=0)
future.head()

Unnamed: 0,league,date,team,opponent,home,0,1,2
0,"Serie C, Girone B",2023-01-08,siena,reggiana,0,0.136038,0.517463,0.346498
1,"Serie C, Girone B",2023-01-08,san_donato_tavarnelle,sassari_torres,1,0.18471,0.342512,0.472778
2,"Serie C, Girone B",2023-01-08,gubbio,fermana,1,0.808809,0.105148,0.086042
3,"Serie C, Girone B",2023-01-08,imolese,lucchese,1,0.126196,0.307486,0.566317
4,"Serie C, Girone B",2023-01-08,olbia,aquila_montevarchi,1,0.665615,0.134607,0.199779


In [61]:
elos = pd.read_csv("../data/elos_matches.csv", index_col=0, parse_dates=['date'], dayfirst=False)
elos['date'] = elos['date'].dt.date
elos.head()

Unnamed: 0,league,date,team,opponent,result,elo_team,elo_opponent,elo_diff,home
0,Serie A,1997-08-31,atalanta,bologna,1.0,1500.0,1500.0,0.0,1
5,Serie A,1997-08-31,l_r_vicenza,sampdoria,0.0,1500.0,1500.0,-0.0,0
6,Serie A,1997-08-31,milan,piacenza,0.5,1500.0,1500.0,-0.0,0
7,Serie A,1997-08-31,roma,empoli,1.0,1500.0,1500.0,-0.0,0
8,Serie A,1997-08-31,fiorentina,udinese,1.0,1500.0,1500.0,-0.0,0


In [63]:
predictions_data = data[['league', 'date', 'team', 'opponent', 'result', 'home', 'loss', 'draw', 'win']]
elos_c = elos[elos['league']=='Serie C, Girone B']
combined = pd.concat([elos, data], axis=1)
combined.head()

Unnamed: 0,league,date,team,opponent,result,elo_team,elo_opponent,elo_diff,home,league.1,...,league_away_goals_conceded_avg,team_attack_strength,team_defense_strength,opponent_attack_strength,opponent_defense_strength,team_lambda,opponent_lambda,loss,draw,win
0,Serie A,1997-08-31,atalanta,bologna,1.0,1500.0,1500.0,0.0,1,"Serie C, Girone B",...,1.79152,1.246356,0.998858,0.675698,1.311953,1.311953,1.209143,0.621379,0.236101,0.14252
5,Serie A,1997-08-31,l_r_vicenza,sampdoria,0.0,1500.0,1500.0,-0.0,0,"Serie C, Girone B",...,1.79152,0.499429,1.771137,0.721574,0.851967,0.762286,1.025395,0.104415,0.194232,0.701353
6,Serie A,1997-08-31,milan,piacenza,0.5,1500.0,1500.0,-0.0,0,"Serie C, Girone B",...,1.79152,0.998858,1.11516,0.655977,1.028236,1.840001,0.586926,0.075898,0.145895,0.778206
7,Serie A,1997-08-31,roma,empoli,1.0,1500.0,1500.0,-0.0,0,"Serie C, Girone B",...,1.79152,0.675698,1.311953,1.246356,0.998858,1.209143,1.311953,0.155234,0.245187,0.599579
8,Serie A,1997-08-31,fiorentina,udinese,1.0,1500.0,1500.0,-0.0,0,"Serie C, Girone B",...,1.79152,0.721574,0.616941,1.204505,0.787172,0.455731,1.331295,0.877273,0.09044,0.032287
