In [69]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F

%matplotlib inline

In [70]:
class Loader:

    def __init__(self, files):
        self.files = files

    def get_data(self):
        dfs = []
        for file in self.files:
            df = self.load_past_matches(file)
            dfs.append(df)
        
        df_join = self.join_data(dfs[0], dfs[1])

        return df_join

    def load_past_matches(self, file):
        df = pd.read_csv(f'../data/{file}')
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df['date'] = pd.to_datetime(df['date']).dt.date

        return df

    def join_data(self, df1, df2):
        df = pd.merge(df1, df2,  how='inner',
            left_on=['league', 'date','team', 'opponent', 'home'],
            right_on=['league', 'date','team', 'opponent', 'home'])
        df.sort_values(by=['date', 'league', 'team', 'opponent'], inplace=True)
        df.reset_index(inplace=True, drop=True)
              
        return df

In [71]:
def build_dataset(df):
    df_copy = df.copy()
    df_copy.reset_index(inplace=True, drop=True)
    date = df_copy[['date']].iloc[:,0]
    df_copy.drop(['league', 'date', 'team', 'opponent'], axis=1, inplace=True)
    df_copy['date'] = date
    df_copy.sort_values(by=['date'], inplace=True)
    df_copy.drop(['date'], axis=1, inplace=True)
    
    X = df_copy.drop(['result'], axis=1).to_numpy()
    Y = np.array(df_copy['result']) / 0.5
    
    X = torch.tensor(X).float()
    Y = torch.tensor(Y).long()
    
    return X, Y

In [72]:
def add_past_to_row(df, i):
    df_past = df.copy()
    df_past.index += i
    df_past.rename(columns={c: c+f'_{i}' for c in df_past.columns if c not in ['league',
                                                                               'date',
                                                                               'team',
                                                                               'opponent']}, inplace=True)
    return df_past

In [73]:
def build_matches_dataset(df, past_matches, team):
    dfs_past = []
    df_team = df[df['team']==team]
    for i in range(1, past_matches+1):
        df_past = add_past_to_row(df_team, i)
        dfs_past.append(df_past)

    df_team_joined = df_team.copy()
    for df_past in dfs_past:
        df_team_joined = pd.concat([df_team_joined, df_past],
                                    axis=1,
                                    )
    df_team_joined = df_team_joined[past_matches:-past_matches]

    return df_team_joined

In [74]:
def build_teams_dataset(df, past_matches):
    dfs = []
    for team in df['team'].unique():
        df_team_joined = build_matches_dataset(df, past_matches, team)
        dfs.append(df_team_joined)
    dfs = pd.concat(dfs)
    dfs.insert(5, 'result_0', 0)

    return dfs

In [91]:
def build_wavenet_dataset_past_future(df, future_matches, past_matches=7):
    df_copy = df.copy()
    df_copy.sort_values(by=['team', 'date'], inplace=True)
    df_copy.reset_index(inplace=True, drop=True)
    dfs = build_teams_dataset(df_copy, past_matches)
    dfs_future = add_stats_to_future(dfs, future_matches)
    X, Y = build_dataset(dfs)
    
    return X, Y, dfs, dfs_future

#     return dfs

In [92]:
FILES = ["elos_matches.csv", "goals_matches.csv"]
loader = Loader(FILES)
data = loader.get_data()
data.shape

(83624, 39)

In [93]:
data.drop(['team_goals_scored',
           'opponent_goals_scored',
           'team_goals_conceded',
           'opponent_goals_conceded'], axis=1, inplace=True)

In [179]:
def load_future_matches():
    df = pd.read_csv('../data/future_matches.csv', parse_dates=True, dayfirst=True)
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df = duplicate_to_team_and_opponent(df)
    return df


def add_stats_to_future(stats, future):
    stats = get_final_entry(stats, 'team')
    stats_opp = team_to_opponent(stats)

    df_future = pd.merge(future, stats, how='left', on='team')
    df_future = pd.merge(df_future, stats_opp, how='left', on='opponent')
    df_future['elo_diff'] = df_future['elo_team'] - df_future['elo_opponent']
    df_future['date'] = pd.to_datetime(df_future['date'], dayfirst=True)
    df_future['date'] = df_future['date'].dt.date

    df_future.sort_values(by='date', inplace=True)

    return df_future


def get_final_entry(df, team_or_opponent):
    df = df.copy()
    df = df.loc[:,~df.columns.duplicated()].copy()
    df.sort_values(by='date', inplace=True)
    df.reset_index(inplace=True, drop=True)
    df.drop_duplicates(subset=team_or_opponent, keep='last', inplace=True)
    df = df.loc[:, df.columns.str.contains(team_or_opponent) | df.columns.str.contains('league_') |
               df.columns.str.contains('elo_diff') | df.columns.str.contains('^home_\\d', regex=True) |
               df.columns.str.contains('result')]

    return df


def duplicate_to_team_and_opponent(df_matches):
    df_matches_copy = df_matches.copy()
    df_matches = df_matches.rename(columns={'pt1': 'team', 'pt2': 'opponent',
                                            })
    df_matches_copy = df_matches_copy.rename(columns={'pt2': 'team', 'pt1': 'opponent',
                                                    })
    df_matches_copy = df_matches_copy[['league', 'date', 'team', 'opponent' 
                                        ]]
    df_matches.loc[:, 'home'] = 1
    df_matches_copy.loc[:, 'home'] = 0
    df_matches = pd.concat([df_matches, df_matches_copy])
    df_matches.sort_values(by='date', inplace=True)

    return df_matches


def team_to_opponent(df):
    df_opponent = df.copy()
    df_opponent = df_opponent.loc[:, df_opponent.columns.str.contains("team")]
    df_opponent.columns = df_opponent.columns.str.replace("team", "opponent")

    return df_opponent

def build_future_dataset(df):
    df_copy = df.copy()
    df_copy.reset_index(inplace=True, drop=True)
    date = df_copy[['date']].iloc[:,0]
    df_copy.drop(['league', 'date', 'team', 'opponent'], axis=1, inplace=True)
    df_copy['date'] = date
    df_copy.sort_values(by=['date'], inplace=True)
    df_copy.drop(['date'], axis=1, inplace=True)
    
    X = df_copy.drop(['result'], axis=1).to_numpy()
    X = torch.tensor(X).float()
    
    return X

In [180]:
future_data = load_future_matches()

In [181]:
data.head()

Unnamed: 0,league,date,team,opponent,result,elo_team,elo_opponent,elo_diff,home,team_goals_scored_avg,...,league_home_goals_conceded,league_away_goals_conceded,league_home_goals_conceded_avg,league_away_goals_conceded_avg,team_attack_strength,team_defense_strength,opponent_attack_strength,opponent_defense_strength,team_lambda,opponent_lambda
0,Serie A,1997-08-31,atalanta,bologna,1.0,1500.0,1500.0,0.0,1,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Serie A,1997-08-31,bari,parma,0.0,1500.0,1500.0,0.0,1,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Serie A,1997-08-31,bologna,atalanta,0.0,1500.0,1500.0,-0.0,0,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Serie A,1997-08-31,brescia,inter_milan,0.0,1500.0,1500.0,-0.0,0,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Serie A,1997-08-31,empoli,roma,0.0,1500.0,1500.0,0.0,1,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [182]:
Xall, Yall, dfs, dfs_future = build_wavenet_dataset_past_future(data, future_data, 7)

In [183]:
Xfu = build_future_dataset(dfs_future)

In [184]:
Xall.shape

torch.Size([79916, 248])

In [185]:
Xfu.shape

torch.Size([360, 248])

## Predicting Matches

In [195]:
PATH = "../src/model/trained_models/wavenet_3.pt"
model = torch.load(PATH)
model.train()

Sequential(
  (0): Conv1d(1, 32, kernel_size=(31,), stride=(31,))
  (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Tanh()
  (3): Conv1d(32, 64, kernel_size=(2,), stride=(2,))
  (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): Tanh()
  (6): Conv1d(64, 128, kernel_size=(2,), stride=(2,))
  (7): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): Tanh()
  (9): Flatten(start_dim=1, end_dim=-1)
  (10): Linear(in_features=256, out_features=3, bias=True)
)

In [199]:
@torch.no_grad()
def predict(x):
    x = x[:, None, :]
    logits = model(x)
    preds = torch.softmax(logits, dim=1)
    
    return preds

In [200]:
predictions = predict(Xfu)

In [202]:
dfs_future.reset_index(inplace=True, drop=True)
dfs_future[['loss', 'draw', 'win']] = predictions

In [203]:
dfs_future

Unnamed: 0,date,team,opponent,league,home,result,result_0,elo_team,elo_diff,team_goals_scored_avg,...,opponent_goals_scored_avg_home_7,opponent_goals_conceded_avg_home_7,opponent_goals_scored_avg_away_7,opponent_goals_conceded_avg_away_7,opponent_attack_strength_7,opponent_defense_strength_7,opponent_lambda_7,loss,draw,win
0,2023-01-08,siena,reggiana,"Serie C, Girone B",0,0.0,0,1467.818561,-56.099276,1.000000,...,2.210526,0.631579,1.368421,1.210526,1.300723,0.881414,1.026886,0.169832,0.218832,0.611336
1,2023-01-08,san_donato_tavarnelle,sassari_torres,"Serie C, Girone B",1,1.0,0,1427.191531,-30.055167,0.842105,...,1.263158,1.052632,0.947368,0.894737,0.931838,0.885545,0.784706,0.136682,0.205671,0.657647
2,2023-01-08,gubbio,fermana,"Serie C, Girone B",1,0.5,0,1495.245159,129.724362,1.315789,...,1.000000,0.736842,0.631579,1.578947,0.693853,0.684039,0.876446,0.245695,0.187073,0.567232
3,2023-01-08,imolese,lucchese,"Serie C, Girone B",1,0.5,0,1395.818528,-88.962985,0.736842,...,1.421053,0.789474,0.473684,0.789474,0.450250,0.574835,0.497645,0.270006,0.275037,0.454956
4,2023-01-08,olbia,aquila_montevarchi,"Serie C, Girone B",1,0.5,0,1424.494350,30.057318,1.105263,...,1.578947,0.842105,0.473684,1.894737,0.450250,1.379604,0.450250,0.102927,0.149466,0.747608
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,2023-04-23,pontedera,gubbio,"Serie C, Girone B",1,0.0,0,1489.880439,-5.364720,0.947368,...,1.526316,0.894737,1.263158,1.421053,1.234334,0.914525,2.533634,0.042093,0.071878,0.886029
356,2023-04-23,lucchese,olbia,"Serie C, Girone B",1,0.0,0,1484.781513,60.287163,1.052632,...,1.000000,0.947368,0.947368,1.105263,0.900500,0.804769,0.947895,0.110075,0.138297,0.751627
357,2023-04-23,alessandria,cesena,"Serie C, Girone B",1,0.5,0,1435.304888,-96.306229,0.736842,...,1.947368,0.842105,0.894737,1.157895,1.417926,0.800445,2.761225,0.069293,0.134581,0.796125
358,2023-04-23,cesena,alessandria,"Serie C, Girone B",0,1.0,0,1531.611118,96.306229,1.631579,...,1.000000,1.315789,0.894737,1.736842,0.733748,1.352163,0.733748,0.224211,0.261765,0.514025


In [204]:
dfs_future.to_csv("../data/predictions/wavenet_3.csv")