In [176]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F

%matplotlib inline

In [146]:
class Loader:

    def __init__(self, files):
        self.files = files

    def get_data(self):
        dfs = []
        for file in self.files:
            df = self.load_past_matches(file)
            dfs.append(df)
        
        df_join = self.join_data(dfs[0], dfs[1])

        return df_join

    def load_past_matches(self, file):
        df = pd.read_csv(f'../data/{file}')
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df['date'] = pd.to_datetime(df['date']).dt.date

        return df

    def join_data(self, df1, df2):
        df = pd.merge(df1, df2,  how='inner',
            left_on=['league', 'date','team', 'opponent', 'home'],
            right_on=['league', 'date','team', 'opponent', 'home'])
        df.sort_values(by=['date', 'league', 'team', 'opponent'], inplace=True)
        df.reset_index(inplace=True, drop=True)
              
        return df

In [147]:
def build_dataset(df):
    df_copy = df.copy()
    df_copy.reset_index(inplace=True, drop=True)
    date = df_copy[['date']].iloc[:,0]
    df_copy.drop(['league', 'date', 'team', 'opponent'], axis=1, inplace=True)
    df_copy['date'] = date
    df_copy.sort_values(by=['date'], inplace=True)
    df_copy.drop(['date'], axis=1, inplace=True)
    
    X = df_copy.drop(['result'], axis=1).to_numpy()
    Y = np.array(df_copy['result']) / 0.5
    
    X = torch.tensor(X).float()
    Y = torch.tensor(Y).long()
    
    return X, Y

In [148]:
def add_past_to_row(df, i):
    df_past = df.copy()
    df_past.index += i
    df_past.rename(columns={c: c+f'_{i}' for c in df_past.columns if c not in ['league',
                                                                               'date',
                                                                               'team',
                                                                               'opponent']}, inplace=True)
    return df_past

In [149]:
def build_matches_dataset(df, past_matches, team):
    dfs_past = []
    df_team = df[df['team']==team]
    for i in range(1, past_matches+1):
        df_past = add_past_to_row(df_team, i)
        dfs_past.append(df_past)

    df_team_joined = df_team.copy()
    for df_past in dfs_past:
        df_team_joined = pd.concat([df_team_joined, df_past],
                                    axis=1,
                                    )
    df_team_joined = df_team_joined[past_matches:-past_matches]

    return df_team_joined

In [150]:
def build_teams_dataset(df, past_matches):
    dfs = []
    for team in df['team'].unique():
        df_team_joined = build_matches_dataset(df, past_matches, team)
        dfs.append(df_team_joined)
    dfs = pd.concat(dfs)
    dfs.insert(5, 'result_0', 0)

    return dfs

In [151]:
def build_wavenet_dataset_past_future(df, future_matches, past_matches=7):
    df_copy = df.copy()
    df_copy.sort_values(by=['team', 'date'], inplace=True)
    df_copy.reset_index(inplace=True, drop=True)
    dfs = build_teams_dataset(df_copy, past_matches)
    dfs_future = add_stats_to_future(dfs, future_matches)
    X, Y = build_dataset(dfs)
    
    return X, Y, dfs, dfs_future

#     return dfs

In [152]:
FILES = ["elos_matches.csv", "goals_matches.csv"]
loader = Loader(FILES)
data = loader.get_data()
data.shape

(84546, 39)

In [153]:
data.drop(['team_goals_scored',
           'opponent_goals_scored',
           'team_goals_conceded',
           'opponent_goals_conceded'], axis=1, inplace=True)

In [154]:
def load_future_matches():
    df = pd.read_csv('../data/future_matches.csv', parse_dates=True, dayfirst=True)
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df = duplicate_to_team_and_opponent(df)
    return df


def add_stats_to_future(stats, future):
    stats = get_final_entry(stats, 'team')
    stats_opp = team_to_opponent(stats)

    df_future = pd.merge(future, stats, how='left', on='team')
    df_future = pd.merge(df_future, stats_opp, how='left', on='opponent')
    df_future['elo_diff'] = df_future['elo_team'] - df_future['elo_opponent']
    df_future['date'] = pd.to_datetime(df_future['date'], dayfirst=True)
    df_future['date'] = df_future['date'].dt.date

    df_future.sort_values(by='date', inplace=True)

    return df_future


def get_final_entry(df, team_or_opponent):
    df = df.copy()
    df = df.loc[:,~df.columns.duplicated()].copy()
    df.sort_values(by='date', inplace=True)
    df.reset_index(inplace=True, drop=True)
    df.drop_duplicates(subset=team_or_opponent, keep='last', inplace=True)
    df = df.loc[:, df.columns.str.contains(team_or_opponent) | df.columns.str.contains('league_') |
               df.columns.str.contains('elo_diff') | df.columns.str.contains('^home_\\d', regex=True) |
               df.columns.str.contains('result')]

    return df


def duplicate_to_team_and_opponent(df_matches):
    df_matches_copy = df_matches.copy()
    df_matches = df_matches.rename(columns={'pt1': 'team', 'pt2': 'opponent',
                                            })
    df_matches_copy = df_matches_copy.rename(columns={'pt2': 'team', 'pt1': 'opponent',
                                                    })
    df_matches_copy = df_matches_copy[['league', 'date', 'team', 'opponent' 
                                        ]]
    df_matches.loc[:, 'home'] = 1
    df_matches_copy.loc[:, 'home'] = 0
    df_matches = pd.concat([df_matches, df_matches_copy])
    df_matches.sort_values(by='date', inplace=True)

    return df_matches


def team_to_opponent(df):
    df_opponent = df.copy()
    df_opponent = df_opponent.loc[:, df_opponent.columns.str.contains("team")]
    df_opponent.columns = df_opponent.columns.str.replace("team", "opponent")

    return df_opponent

def build_future_dataset(df):
    df_copy = df.copy()
    df_copy.reset_index(inplace=True, drop=True)
    date = df_copy[['date']].iloc[:,0]
    df_copy.drop(['league', 'date', 'team', 'opponent'], axis=1, inplace=True)
    df_copy['date'] = date
    df_copy.sort_values(by=['date'], inplace=True)
    df_copy.drop(['date'], axis=1, inplace=True)
    
    X = df_copy.drop(['result'], axis=1).to_numpy()
    X = torch.tensor(X).float()
    
    return X

In [155]:
future_data = load_future_matches()
future_data.head()

Unnamed: 0,date,team,opponent,league,home
124,02.04.2023,torres,fiorenzuola,"Serie C, Girone B",0
127,02.04.2023,aquila_montevarchi,san_donato_tavarnelle,"Serie C, Girone B",1
126,02.04.2023,imolese,rimini,"Serie C, Girone B",1
125,02.04.2023,gubbio,vis_pesaro,"Serie C, Girone B",1
124,02.04.2023,fiorenzuola,torres,"Serie C, Girone B",1


In [156]:
data.head()

Unnamed: 0,league,date,team,opponent,result,elo_team,elo_opponent,elo_diff,home,team_goals_scored_avg,...,league_home_goals_conceded,league_away_goals_conceded,league_home_goals_conceded_avg,league_away_goals_conceded_avg,team_attack_strength,team_defense_strength,opponent_attack_strength,opponent_defense_strength,team_lambda,opponent_lambda
0,Serie A,1997-08-31,atalanta,bologna,1.0,1500.0,1500.0,0.0,1,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Serie A,1997-08-31,bari,parma,0.0,1500.0,1500.0,0.0,1,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Serie A,1997-08-31,bologna,atalanta,0.0,1500.0,1500.0,-0.0,0,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Serie A,1997-08-31,brescia,inter_milan,0.0,1500.0,1500.0,-0.0,0,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Serie A,1997-08-31,empoli,roma,0.0,1500.0,1500.0,0.0,1,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [157]:
data['league'].unique()

array(['Serie A', 'Italian Serie A', 'Coppa Italia',
       'Serie B, Promotion Playoffs', 'Italian Coppa Italia',
       'Italian Serie B', 'Supercoppa', 'Serie B',
       'Serie B, Relegation Playoffs', 'Coppa Italia Serie C, Girone D',
       'Coppa Italia Serie C, Girone H', 'Coppa Italia Serie C, Girone A',
       'Coppa Italia Serie C, Girone B', 'Coppa Italia Serie C, Girone C',
       'Coppa Italia Serie C, Girone E', 'Coppa Italia Serie C, Girone F',
       'Coppa Italia Serie C, Girone I', 'Coppa Italia Serie C, Girone L',
       'Coppa Italia Serie C, Girone M', 'Coppa Italia Serie C, Girone G',
       'Coppa Italia Serie C, Knockout stage', 'Italian Supercoppa',
       'Serie D, Girone A', 'Serie D, Girone E', 'Serie D, Girone F',
       'Serie D, Girone H', 'Serie D, Girone B', 'Serie D, Girone C',
       'Serie D, Girone D', 'Serie D, Girone G', 'Serie D, Girone I',
       'Primavera Cup, Knockout stage', 'Serie D, Girone E, Playoffs',
       'Serie C, Girone A', 'Serie C

In [158]:
def cut_to_league_and_season(df, league, season):
    df_temp = pd.DataFrame(columns=df.columns)
    start_date = pd.to_datetime(f'{season}-08-01')
    for i in range(len(league)):
#         df_temp = pd.concat(df_temp, df[(df['league']==league[i]) & (df['date']>=start_date)])
        df_temp = df_temp.append(df[(df['league']==league[i]) & (df['date']>=start_date)])
    return df_temp

def update_teams(df, teams):
    df_copy = df.copy()
    df_copy['team'] = df_copy['team'].replace(teams)
    df_copy['opponent'] = df_copy['opponent'].replace(teams)
    return df_copy

In [159]:
teams_future = {
    'inter': 'inter_milan',
    'verona': 'hellas_verona',
}

teams = {
    'internazionale': 'inter_milan',
    'milan': 'ac_milan',
    'roma': 'as_roma',
}

# future_data = update_teams(future_data, teams_future)
data_serie_a_22 = cut_to_league_and_season(data, ['Serie A', 'Italian Serie A'], '2022')
data_serie_a_22 = update_teams(data_serie_a_22, teams)
data_serie_a_22.head()

  df_temp = df_temp.append(df[(df['league']==league[i]) & (df['date']>=start_date)])
  df_temp = df_temp.append(df[(df['league']==league[i]) & (df['date']>=start_date)])
  df_temp = df_temp.append(df[(df['league']==league[i]) & (df['date']>=start_date)])
  df_temp = df_temp.append(df[(df['league']==league[i]) & (df['date']>=start_date)])


Unnamed: 0,league,date,team,opponent,result,elo_team,elo_opponent,elo_diff,home,team_goals_scored_avg,...,league_home_goals_conceded,league_away_goals_conceded,league_home_goals_conceded_avg,league_away_goals_conceded_avg,team_attack_strength,team_defense_strength,opponent_attack_strength,opponent_defense_strength,team_lambda,opponent_lambda
81400,Serie A,2022-08-13,atalanta,sampdoria,1.0,1563.307753,1570.510089,-7.202336,0,1.210526,...,2.0,1.5,1.588596,1.459649,1.027057,0.757212,1.081731,0.894533,1.459502,1.195597
81401,Serie A,2022-08-13,inter_milan,lecce,1.0,1691.720255,1480.703529,211.016726,0,2.052632,...,2.0,1.5,1.588596,1.459649,1.225842,0.540865,1.262019,0.496963,0.96777,0.996331
81402,Serie A,2022-08-13,lecce,inter_milan,0.0,1480.703529,1691.720255,-211.016726,1,1.421053,...,2.0,1.5,1.588596,1.459649,1.262019,0.496963,1.225842,0.540865,0.996331,0.96777
81403,Serie A,2022-08-13,lecce,inter_milan,0.0,1480.703529,1691.720255,-211.016726,1,1.421053,...,2.0,1.5,1.588596,1.459649,1.298077,0.563225,1.225842,0.540865,1.024798,1.096806
81404,Serie A,2022-08-13,ac_milan,udinese,1.0,1709.75874,1609.861149,99.897591,1,1.368421,...,2.0,1.5,1.588596,1.459649,0.973558,0.39757,0.927664,1.153846,1.639676,0.585893


In [160]:
set(data_serie_a_22['team'].unique().tolist()) - set(future_data['team'].unique().tolist())

{'ac_milan',
 'america_mg',
 'as_roma',
 'atalanta',
 'athletico_paranaense',
 'atletico_go',
 'atletico_mg',
 'avai',
 'bologna',
 'botafogo',
 'ceara',
 'corinthians',
 'coritiba',
 'cremonese',
 'cuiaba',
 'empoli',
 'fiorentina',
 'flamengo',
 'fluminense',
 'fortaleza',
 'goias',
 'hellas_verona',
 'inter_milan',
 'internacional',
 'juventude',
 'juventus',
 'lazio',
 'lecce',
 'monza',
 'napoli',
 'palmeiras',
 'red_bull_bragantino',
 'salernitana',
 'sampdoria',
 'santos',
 'sao_paulo',
 'sassuolo',
 'spezia',
 'torino',
 'udinese'}

In [161]:
set(future_data['team'].unique().tolist()) - set(data_serie_a_22['team'].unique().tolist())

{'alessandria',
 'ancona',
 'aquila_montevarchi',
 'carrarese',
 'cesena',
 'fermana',
 'fiorenzuola',
 'gubbio',
 'imolese',
 'lucchese',
 'olbia',
 'pontedera',
 'recanatese',
 'reggiana',
 'rimini',
 'san_donato_tavarnelle',
 'siena',
 'torres',
 'virtus_entella',
 'vis_pesaro'}

In [162]:
future_data['team']

124                torres
127    aquila_montevarchi
126               imolese
125                gubbio
124           fiorenzuola
              ...        
27               reggiana
28                  siena
29             vis_pesaro
29                 torres
27            fiorenzuola
Name: team, Length: 320, dtype: object

In [163]:
Xall, Yall, dfs, dfs_future = build_wavenet_dataset_past_future(data, future_data, 7)

In [164]:
dfs_future = dfs_future[dfs.columns]

In [165]:
Xfu = build_future_dataset(dfs_future)

In [166]:
# for column in dfs.columns:
#     print(column)

In [167]:
# for column in dfs_future.columns:
#     print(column)

In [168]:
Xall.shape

torch.Size([80806, 248])

In [169]:
Xfu.shape

torch.Size([320, 248])

## Predicting Matches

In [170]:
PATH = "../src/model/trained_models/wavenet_3.pt"
model = torch.load(PATH)
model.eval()

Sequential(
  (0): Conv1d(1, 32, kernel_size=(31,), stride=(31,))
  (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Tanh()
  (3): Conv1d(32, 64, kernel_size=(2,), stride=(2,))
  (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): Tanh()
  (6): Conv1d(64, 128, kernel_size=(2,), stride=(2,))
  (7): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): Tanh()
  (9): Flatten(start_dim=1, end_dim=-1)
  (10): Linear(in_features=256, out_features=3, bias=True)
)

In [171]:
@torch.no_grad()
def predict(x):
    x = x[:, None, :]
    logits = model(x)
    preds = torch.softmax(logits, dim=1)
    
    return preds

In [172]:
predictions = predict(Xfu)

In [173]:
dfs_future.reset_index(inplace=True, drop=True)
predictions_df = pd.DataFrame(predictions, columns=['loss', 'draw', 'win'])
dfs_future = pd.concat([dfs_future, predictions_df], axis=1)

In [174]:
dfs_future.head()

Unnamed: 0,league,date,team,opponent,result,result_0,elo_team,elo_opponent,elo_diff,home,...,league_away_goals_conceded_avg_7,team_attack_strength_7,team_defense_strength_7,opponent_attack_strength_7,opponent_defense_strength_7,team_lambda_7,opponent_lambda_7,loss,draw,win
0,"Serie C, Girone B",2023-01-22,carrarese,virtus_entella,0.0,0,1452.000494,1542.043414,-90.04292,0,...,0.877632,0.985701,2.158921,1.069753,1.290323,0.778185,1.351267,0.103082,0.442446,0.454472
1,"Serie C, Girone B",2023-01-22,cesena,fermana,1.0,0,1547.000538,1441.141915,105.858622,1,...,0.877632,0.876179,1.1994,1.07946,0.93094,1.060638,1.136274,0.03745,0.19286,0.76969
2,"Serie C, Girone B",2023-01-22,reggiana,alessandria,1.0,0,1591.770055,1400.103499,191.666556,1,...,0.877632,2.338831,0.602373,0.766657,1.73913,2.70812,0.766657,0.291402,0.367521,0.341078
3,"Serie C, Girone B",2023-01-22,recanatese,san_donato_tavarnelle,0.5,0,1446.361823,1429.085954,17.27587,1,...,0.877632,1.314268,1.1994,1.149985,1.55922,1.175924,1.149985,0.002798,0.083341,0.91386
4,"Serie C, Girone B",2023-01-22,olbia,rimini,0.0,0,1416.167282,1475.664696,-59.497415,1,...,0.877632,1.07946,1.259507,0.985701,1.31934,1.136274,0.570669,0.695975,0.203828,0.100197


In [175]:
# dfs_future.to_csv("../data/predictions/wavenet_3.csv")

## Combine team and opponent to home and away

In [214]:
dfs_future = pd.read_csv("../data/predictions/wavenet_3.csv", index_col=0)
dfs_future['date'] = pd.to_datetime(dfs_future['date'])
dfs_future.head()

Unnamed: 0,league,date,team,opponent,result,result_0,elo_team,elo_opponent,elo_diff,home,...,league_away_goals_conceded_avg_7,team_attack_strength_7,team_defense_strength_7,opponent_attack_strength_7,opponent_defense_strength_7,team_lambda_7,opponent_lambda_7,loss,draw,win
0,"Serie C, Girone B",2023-01-22,carrarese,virtus_entella,0.0,0,1452.000494,1542.043414,-90.04292,0,...,0.877632,0.985701,2.158921,1.069753,1.290323,0.778185,1.351267,0.103082,0.442446,0.454472
1,"Serie C, Girone B",2023-01-22,cesena,fermana,1.0,0,1547.000538,1441.141915,105.858622,1,...,0.877632,0.876179,1.1994,1.07946,0.93094,1.060638,1.136274,0.03745,0.19286,0.76969
2,"Serie C, Girone B",2023-01-22,reggiana,alessandria,1.0,0,1591.770055,1400.103499,191.666556,1,...,0.877632,2.338831,0.602373,0.766657,1.73913,2.70812,0.766657,0.291402,0.367521,0.341078
3,"Serie C, Girone B",2023-01-22,recanatese,san_donato_tavarnelle,0.5,0,1446.361823,1429.085954,17.27587,1,...,0.877632,1.314268,1.1994,1.149985,1.55922,1.175924,1.149985,0.002798,0.083341,0.91386
4,"Serie C, Girone B",2023-01-22,olbia,rimini,0.0,0,1416.167282,1475.664696,-59.497415,1,...,0.877632,1.07946,1.259507,0.985701,1.31934,1.136274,0.570669,0.695975,0.203828,0.100197


In [225]:
dfs_future[(dfs_future['team']=='cesena') | (dfs_future['opponent']=='cesena')].head()

Unnamed: 0,league,date,team,opponent,result,result_0,elo_team,elo_opponent,elo_diff,home,...,league_away_goals_conceded_avg_7,team_attack_strength_7,team_defense_strength_7,opponent_attack_strength_7,opponent_defense_strength_7,team_lambda_7,opponent_lambda_7,loss,draw,win
1,"Serie C, Girone B",2023-01-22,cesena,fermana,1.0,0,1547.000538,1441.141915,105.858622,1,...,0.877632,0.876179,1.1994,1.07946,0.93094,1.060638,1.136274,0.03745,0.19286,0.76969
10,"Serie C, Girone B",2023-01-22,fermana,cesena,0.5,0,1441.141915,1547.000538,-105.858622,0,...,0.877632,1.07946,0.93094,0.876179,1.1994,1.136274,1.060638,0.00373,0.040092,0.956178
25,"Serie C, Girone B",2023-01-28,pontedera,cesena,0.5,0,1511.41452,1547.000538,-35.586017,1,...,0.877632,0.821418,1.13943,0.876179,1.1994,0.951115,1.060638,0.052965,0.304125,0.64291
36,"Serie C, Girone B",2023-01-28,cesena,pontedera,1.0,0,1547.000538,1511.41452,35.586017,0,...,0.877632,0.876179,1.1994,0.821418,1.13943,1.060638,0.951115,0.18203,0.528932,0.289038
54,"Serie C, Girone B",2023-01-31,recanatese,cesena,0.5,0,1446.361823,1547.000538,-100.638714,1,...,0.877632,1.314268,1.1994,0.876179,1.1994,1.175924,1.060638,0.045957,0.182985,0.771058


In [215]:
def transform_to_home_and_away(df):
    df['date'] = pd.to_datetime(df['date'])
    df_home = df[df['home'] == 1]
    df_away = df[df['home'] == 0]
    if 'result' in df_away.columns:
        df_away.drop('result', axis=1, inplace=True)

    df_home.rename(columns={'team': 'home_team', 'opponent': 'away_team', 'elo_team': 'elo_home', 'elo_opponent': 'elo_away',
                            'loss': 'A', 'draw': 'D', 'win': 'H'}, inplace=True)
    df_away.rename(columns={'team': 'away_team', 'opponent': 'home_team', 'elo_team': 'elo_away', 'elo_opponent': 'elo_home',
                            'loss': 'H', 'draw': 'D', 'win': 'A'}, inplace=True)

    df_combined = pd.concat([df_home, df_away])
    df_combined = df_combined.groupby(['date', 'home_team', 'away_team', 'elo_home', 'elo_away']).mean()
    df_combined.reset_index(inplace=True, drop=False)
    if 'result' in df_combined.columns:
        df_combined.drop(['result'], axis=1, inplace=True)
    df_combined['elo_diff'] = df_combined['elo_home'] - df_combined['elo_away']

    df_ftr = df_home.drop(['A', 'D', 'H', 'elo_diff', 'elo_home', 'elo_away', 'home'], axis=1)
    df_ftr['date'] = pd.to_datetime(df_ftr['date'])

    df_combined = df_combined.merge(df_ftr, on=['date', 'home_team', 'away_team'], how='outer'
                                    )

    return df_combined

In [216]:
dfs_future_test = transform_to_home_and_away(dfs_future)
dfs_future_test.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_away.drop('result', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_home.rename(columns={'team': 'home_team', 'opponent': 'away_team', 'elo_team': 'elo_home', 'elo_opponent': 'elo_away',
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_away.rename(columns={'team': 'away_team', 'opponent': 'home_team', 'elo_team': 'elo_away', 'elo_opponent': 'elo_home',
  df_combined = df_combined.groupby(['date', 'home_team', 'away_team', 'elo_home', 'elo

Unnamed: 0,date,home_team,away_team,elo_home,elo_away,result_0_x,elo_diff,home,team_goals_scored_avg_x,team_goals_conceded_avg_x,...,league_home_goals_conceded_7_y,league_away_goals_conceded_7_y,league_home_goals_conceded_avg_7_y,league_away_goals_conceded_avg_7_y,team_attack_strength_7_y,team_defense_strength_7_y,opponent_attack_strength_7_y,opponent_defense_strength_7_y,team_lambda_7_y,opponent_lambda_7_y
0,2023-01-22,aquila_montevarchi,siena,1382.393435,1449.810106,0.0,-67.416671,0.5,0.894737,1.131579,...,0.6,0.9,0.961111,0.877632,1.43928,0.93094,1.1994,0.821418,1.515032,2.272548
1,2023-01-22,cesena,fermana,1547.000538,1441.141915,0.0,105.858622,0.5,1.447368,1.0,...,0.6,0.9,0.961111,0.877632,0.876179,1.1994,1.07946,0.93094,1.060638,1.136274
2,2023-01-22,gubbio,ancona,1501.756967,1513.115807,0.0,-11.35884,0.5,1.473684,0.947368,...,0.6,0.9,0.961111,0.877632,1.85907,1.040462,2.038981,1.423791,2.543991,1.931666
3,2023-01-22,imolese,torres,1331.036279,1462.122421,0.0,-131.086142,0.5,0.657895,1.105263,...,0.6,0.9,0.961111,0.877632,0.83958,1.204746,1.040462,1.07946,0.83958,1.423791
4,2023-01-22,lucchese,fiorenzuola,1470.250163,1437.003524,0.0,33.246638,0.5,1.078947,0.894737,...,0.6,0.9,0.961111,0.877632,0.821418,1.1994,0.95952,1.040462,0.734953,1.464531


In [217]:
dfs_future_test['home_team'] = dfs_future_test['home_team'].str.title().replace('_', ' ', regex=True)
dfs_future_test['away_team'] = dfs_future_test['away_team'].str.title().replace('_', ' ', regex=True)
dfs_future_test = dfs_future_test[['date', 'home_team', 'away_team', 'elo_home', 'elo_away', 'A', 'D', 'H']]
dfs_future_test[['team_goals_scored', 'opponent_goals_scored', 'result']] = np.nan
dfs_future_test = dfs_future_test[['date', 'home_team', 'away_team', 'elo_home', 'elo_away',
                                   'team_goals_scored', 'opponent_goals_scored', 'result', 'A', 'D', 'H']]


In [218]:
dfs_future_test

Unnamed: 0,date,home_team,away_team,elo_home,elo_away,team_goals_scored,opponent_goals_scored,result,A,D,H
0,2023-01-22,Aquila Montevarchi,Siena,1382.393435,1449.810106,,,,0.443036,0.059141,0.497822
1,2023-01-22,Cesena,Fermana,1547.000538,1441.141915,,,,0.496814,0.116476,0.386710
2,2023-01-22,Gubbio,Ancona,1501.756967,1513.115807,,,,0.610321,0.291921,0.097758
3,2023-01-22,Imolese,Torres,1331.036279,1462.122421,,,,0.430573,0.144778,0.424649
4,2023-01-22,Lucchese,Fiorenzuola,1470.250163,1437.003524,,,,0.517110,0.073113,0.409777
...,...,...,...,...,...,...,...,...,...,...,...
155,2023-04-23,Rimini,Aquila Montevarchi,1475.664696,1382.393435,,,,0.286814,0.221114,0.492072
156,2023-04-23,San Donato Tavarnelle,Fiorenzuola,1429.085954,1437.003524,,,,0.561796,0.272640,0.165563
157,2023-04-23,Siena,Virtus Entella,1449.810106,1542.043414,,,,0.312028,0.022428,0.665544
158,2023-04-23,Torres,Fermana,1462.122421,1441.141915,,,,0.426752,0.285634,0.287614


In [219]:
dfs_future_test.to_csv("../data/predictions/wavenet_3_h_a.csv")

In [220]:
def duplicate_data(df):
    df_home = df[[
#                 'league',
                'date',
                'home_team',
                'away_team',
                'elo_home',
                'elo_away',
                'A',
                'D',
                'H',
                'result',
                ]]

    df_away = df_home.copy()
    df_home.rename(columns={
                        'home_team': 'team',
                        'away_team': 'opponent',
                        'elo_home': 'elo_team',
                        'elo_away': 'elo_opponent',
                        'A': 'loss',
                        'D': 'draw',
                        'H': 'win',
                        }, inplace=True)
    df_home['home'] = 1

    df_away.rename(columns={
        'home_team': 'opponent',
        'away_team': 'team',
        'elo_home': 'elo_opponent',
        'elo_away': 'elo_team',
        'A': 'win',
        'D': 'draw',
        'H': 'loss',
    }, inplace=True)
    df_away['home'] = 0

    df_combined = pd.concat([df_home, df_away])

    df_combined.loc[df['result'] == 0, ['result']] = 'A'
    df_combined.loc[df['result'] == 0.5, ['result']] = 'D'
    df_combined.loc[df['result'] == 1, ['result']] = 'H'

    return df_combined

In [221]:
dfs_future_t_o = duplicate_data(dfs_future_test)
dfs_future_t_o

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_home.rename(columns={
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_home['home'] = 1


Unnamed: 0,date,team,opponent,elo_team,elo_opponent,loss,draw,win,result,home
0,2023-01-22,Aquila Montevarchi,Siena,1382.393435,1449.810106,0.443036,0.059141,0.497822,,1
1,2023-01-22,Cesena,Fermana,1547.000538,1441.141915,0.496814,0.116476,0.386710,,1
2,2023-01-22,Gubbio,Ancona,1501.756967,1513.115807,0.610321,0.291921,0.097758,,1
3,2023-01-22,Imolese,Torres,1331.036279,1462.122421,0.430573,0.144778,0.424649,,1
4,2023-01-22,Lucchese,Fiorenzuola,1470.250163,1437.003524,0.517110,0.073113,0.409777,,1
...,...,...,...,...,...,...,...,...,...,...
155,2023-04-23,Aquila Montevarchi,Rimini,1382.393435,1475.664696,0.492072,0.221114,0.286814,,0
156,2023-04-23,Fiorenzuola,San Donato Tavarnelle,1437.003524,1429.085954,0.165563,0.272640,0.561796,,0
157,2023-04-23,Virtus Entella,Siena,1542.043414,1449.810106,0.665544,0.022428,0.312028,,0
158,2023-04-23,Fermana,Torres,1441.141915,1462.122421,0.287614,0.285634,0.426752,,0


In [222]:
dfs_future_t_o['team'] = dfs_future_t_o['team'].str.title().replace('_', ' ', regex=True)
dfs_future_t_o['opponent'] = dfs_future_t_o['opponent'].str.title().replace('_', ' ', regex=True)
dfs_future_t_o[['rest_days', 'result', 'team_goals_scored', 'opponent_goals_scored']] = np.nan
dfs_future_t_o = dfs_future_t_o[['date', 'team', 'opponent', 'elo_team', 'elo_opponent',
                                'loss', 'draw', 'win', 'home', 'rest_days', 'result',
                                'team_goals_scored', 'opponent_goals_scored']]

In [223]:
dfs_future_t_o.to_csv("../data/predictions/wavenet_3_t_o.csv")