In [243]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F

%matplotlib inline

In [244]:
class Loader:

    def __init__(self, files):
        self.files = files

    def get_data(self):
        dfs = []
        for file in self.files:
            df = self.load_past_matches(file)
            dfs.append(df)
        
        df_join = self.join_data(dfs[0], dfs[1])

        return df_join

    def load_past_matches(self, file):
        df = pd.read_csv(f'../data/{file}')
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df['date'] = pd.to_datetime(df['date']).dt.date

        return df

    def join_data(self, df1, df2):
        df = pd.merge(df1, df2,  how='inner',
            left_on=['league', 'date','team', 'opponent', 'home'],
            right_on=['league', 'date','team', 'opponent', 'home'])
        df.sort_values(by=['date', 'league', 'team', 'opponent'], inplace=True)
        df.reset_index(inplace=True, drop=True)
              
        return df

In [245]:
def build_dataset(df):
    df_copy = df.copy()
    df_copy.reset_index(inplace=True, drop=True)
    date = df_copy[['date']].iloc[:,0]
    df_copy.drop(['league', 'date', 'team', 'opponent'], axis=1, inplace=True)
    df_copy['date'] = date
    df_copy.sort_values(by=['date'], inplace=True)
    df_copy.drop(['date'], axis=1, inplace=True)
    
    X = df_copy.drop(['result'], axis=1).to_numpy()
    Y = np.array(df_copy['result']) / 0.5
    
    X = torch.tensor(X).float()
    Y = torch.tensor(Y).long()
    
    return X, Y

In [246]:
def add_past_to_row(df, i):
    df_past = df.copy()
    df_past.index += i
    df_past.rename(columns={c: c+f'_{i}' for c in df_past.columns if c not in ['league',
                                                                               'date',
                                                                               'team',
                                                                               'opponent']}, inplace=True)
    return df_past

In [247]:
def build_matches_dataset(df, past_matches, team):
    dfs_past = []
    df_team = df[df['team']==team]
    for i in range(1, past_matches+1):
        df_past = add_past_to_row(df_team, i)
        dfs_past.append(df_past)

    df_team_joined = df_team.copy()
    for df_past in dfs_past:
        df_team_joined = pd.concat([df_team_joined, df_past],
                                    axis=1,
                                    )
    df_team_joined = df_team_joined[past_matches:-past_matches]

    return df_team_joined

In [248]:
def build_teams_dataset(df, past_matches):
    dfs = []
    for team in df['team'].unique():
        df_team_joined = build_matches_dataset(df, past_matches, team)
        dfs.append(df_team_joined)
    dfs = pd.concat(dfs)
    dfs.insert(5, 'result_0', 0)

    return dfs

In [249]:
def build_wavenet_dataset_past_future(df, future_matches, past_matches=7):
    df_copy = df.copy()
    df_copy.sort_values(by=['team', 'date'], inplace=True)
    df_copy.reset_index(inplace=True, drop=True)
    dfs = build_teams_dataset(df_copy, past_matches)
    dfs_future = add_stats_to_future(dfs, future_matches)
    dfs = dfs.loc[:,~dfs.columns.duplicated()].copy()
    dfs = dfs[dfs['date']<future_date]
    X, Y = build_dataset(dfs)
    
    return X, Y, dfs, dfs_future

#     return dfs

In [250]:
FILES = ["elos_matches.csv", "goals_matches.csv"]
loader = Loader(FILES)
data = loader.get_data()
data.shape

(75212, 39)

In [251]:
data.drop(['team_goals_scored',
           'opponent_goals_scored',
           'team_goals_conceded',
           'opponent_goals_conceded'], axis=1, inplace=True)

In [252]:
def load_future_matches():
    df = pd.read_csv('../data/future_matches.csv', parse_dates=True, dayfirst=True)
    df['date'] = pd.to_datetime(df['date'], dayfirst=True)
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df = duplicate_to_team_and_opponent(df)
    df.sort_values(by='date', inplace=True)
    return df


def add_stats_to_future(stats, future):
    stats = get_final_entry(stats, 'team')
    stats_opp = team_to_opponent(stats)

    df_future = pd.merge(future, stats, how='left', on='team')
    df_future = pd.merge(df_future, stats_opp, how='left', on='opponent')
    df_future['elo_diff'] = df_future['elo_team'] - df_future['elo_opponent']
    df_future['date'] = pd.to_datetime(df_future['date'], dayfirst=True)
    df_future['date'] = df_future['date'].dt.date

    df_future.sort_values(by='date', inplace=True)

    return df_future


def get_final_entry(df, team_or_opponent):
    df = df.copy()
    df = df.loc[:,~df.columns.duplicated()].copy()
    df.sort_values(by='date', inplace=True)
    df.reset_index(inplace=True, drop=True)
    df.drop_duplicates(subset=team_or_opponent, keep='last', inplace=True)
    df = df.loc[:, df.columns.str.contains(team_or_opponent) | df.columns.str.contains('league_') |
               df.columns.str.contains('elo_diff') | df.columns.str.contains('^home_\\d', regex=True) |
               df.columns.str.contains('result')]

    return df


def duplicate_to_team_and_opponent(df_matches):
    df_matches_copy = df_matches.copy()
    df_matches = df_matches.rename(columns={'pt1': 'team', 'pt2': 'opponent',
                                            })
    df_matches_copy = df_matches_copy.rename(columns={'pt2': 'team', 'pt1': 'opponent',
                                                    })
    df_matches_copy = df_matches_copy[['league', 'date', 'team', 'opponent' 
                                        ]]
    df_matches.loc[:, 'home'] = 1
    df_matches_copy.loc[:, 'home'] = 0
    df_matches = pd.concat([df_matches, df_matches_copy])
    df_matches.sort_values(by='date', inplace=True)

    return df_matches


def team_to_opponent(df):
    df_opponent = df.copy()
    df_opponent = df_opponent.loc[:, df_opponent.columns.str.contains("team")]
    df_opponent.columns = df_opponent.columns.str.replace("team", "opponent")

    return df_opponent

def build_future_dataset(df):
    df_copy = df.copy()
    df_copy.reset_index(inplace=True, drop=True)
    date = df_copy[['date']].iloc[:,0]
    df_copy.drop(['league', 'date', 'team', 'opponent'], axis=1, inplace=True)
    df_copy['date'] = date
    df_copy.sort_values(by=['date'], inplace=True)
    df_copy.drop(['date'], axis=1, inplace=True)
    
    X = df_copy.drop(['result'], axis=1).to_numpy()
    X = torch.tensor(X).float()
    
    return X

In [253]:
future_data = load_future_matches()
future_data.reset_index(inplace=True, drop=True)
future_date = future_data['date'][0]
future_data.head()

Unnamed: 0,date,team,opponent,league,home
0,2023-01-22,imolese,torres,"Serie C, Girone B",1
1,2023-01-22,aquila_montevarchi,siena,"Serie C, Girone B",1
2,2023-01-22,olbia,rimini,"Serie C, Girone B",1
3,2023-01-22,vis_pesaro,pontedera,"Serie C, Girone B",1
4,2023-01-22,lucchese,fiorenzuola,"Serie C, Girone B",1


In [254]:
data.head()

Unnamed: 0,league,date,team,opponent,result,elo_team,elo_opponent,elo_diff,home,team_goals_scored_avg,...,league_home_goals_conceded,league_away_goals_conceded,league_home_goals_conceded_avg,league_away_goals_conceded_avg,team_attack_strength,team_defense_strength,opponent_attack_strength,opponent_defense_strength,team_lambda,opponent_lambda
0,Serie A,1997-08-31,atalanta,bologna,1.0,1500.0,1500.0,0.0,1,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Serie A,1997-08-31,bari,parma,0.0,1500.0,1500.0,0.0,1,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Serie A,1997-08-31,bologna,atalanta,0.0,1500.0,1500.0,-0.0,0,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Serie A,1997-08-31,brescia,inter_milan,0.0,1500.0,1500.0,-0.0,0,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Serie A,1997-08-31,empoli,roma,0.0,1500.0,1500.0,0.0,1,0.0,...,1.444444,1.777778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [255]:
data.tail()

Unnamed: 0,league,date,team,opponent,result,elo_team,elo_opponent,elo_diff,home,team_goals_scored_avg,...,league_home_goals_conceded,league_away_goals_conceded,league_home_goals_conceded_avg,league_away_goals_conceded_avg,team_attack_strength,team_defense_strength,opponent_attack_strength,opponent_defense_strength,team_lambda,opponent_lambda
75207,Serie B,2023-01-21,sudtirol,venezia,0.5,1505.931774,1432.18273,73.749044,0,0.0,...,0.0,0.0,0.964912,1.129152,0.0,0.0,1.072066,2.018182,0.0,0.0
75208,Serie B,2023-01-21,ternana,reggina,0.5,1445.953023,1493.083841,-47.130819,0,0.947368,...,0.0,0.0,0.964912,1.129152,1.254545,1.491571,1.538182,1.090909,1.320574,2.590623
75209,Serie B,2023-01-21,venezia,sudtirol,0.5,1432.18273,1505.931774,-73.749044,1,1.263158,...,0.0,0.0,0.964912,1.129152,1.072066,2.018182,0.0,0.0,0.0,0.0
75210,Serie B,2023-01-22,brescia,frosinone,0.5,1396.400872,1534.508235,-138.107363,1,0.684211,...,0.0,0.0,0.912281,1.023889,1.336493,0.923077,1.211538,1.4393,1.969569,1.020243
75211,Serie B,2023-01-22,frosinone,brescia,0.5,1534.508235,1396.400872,138.107363,0,1.421053,...,0.0,0.0,0.912281,1.023889,1.211538,1.4393,1.336493,0.923077,1.020243,1.969569


In [256]:
data['league'].unique()

array(['Serie A', 'Italian Serie A', 'Coppa Italia',
       'Serie B, Promotion Playoffs', 'Italian Coppa Italia',
       'Italian Serie B', 'Supercoppa', 'Serie B',
       'Serie B, Relegation Playoffs', 'Coppa Italia Serie C, Girone D',
       'Coppa Italia Serie C, Girone H', 'Coppa Italia Serie C, Girone A',
       'Coppa Italia Serie C, Girone B', 'Coppa Italia Serie C, Girone C',
       'Coppa Italia Serie C, Girone E', 'Coppa Italia Serie C, Girone F',
       'Coppa Italia Serie C, Girone I', 'Coppa Italia Serie C, Girone L',
       'Coppa Italia Serie C, Girone M', 'Coppa Italia Serie C, Girone G',
       'Coppa Italia Serie C, Knockout stage', 'Italian Supercoppa',
       'Serie D, Girone A', 'Serie D, Girone E', 'Serie D, Girone F',
       'Serie D, Girone H', 'Serie D, Girone B', 'Serie D, Girone C',
       'Serie D, Girone D', 'Serie D, Girone G', 'Serie D, Girone I',
       'Primavera Cup, Knockout stage', 'Serie D, Girone E, Playoffs',
       'Serie C, Girone A', 'Serie C

In [257]:
def cut_to_league_and_season(df, league, season):
    df_temp = pd.DataFrame(columns=df.columns)
    start_date = pd.to_datetime(f'{season}-08-01')
    for i in range(len(league)):
#         df_temp = pd.concat(df_temp, df[(df['league']==league[i]) & (df['date']>=start_date)])
        df_temp = df_temp.append(df[(df['league']==league[i]) & (df['date']>=start_date)])
    return df_temp

def update_teams(df, teams):
    df_copy = df.copy()
    df_copy['team'] = df_copy['team'].replace(teams)
    df_copy['opponent'] = df_copy['opponent'].replace(teams)
    return df_copy

In [258]:
teams_future = {
    'inter': 'inter_milan',
    'verona': 'hellas_verona',
}

teams = {
    'internazionale': 'inter_milan',
    'milan': 'ac_milan',
    'roma': 'as_roma',
}

future_data = update_teams(future_data, teams_future)
data_serie_a_22 = cut_to_league_and_season(data, ['Serie C, Girone B',
#                                                   'Italian Serie B'
                                                 ], '2022')
data_serie_a_22 = update_teams(data_serie_a_22, teams)
data_serie_a_22.head()

  df_temp = df_temp.append(df[(df['league']==league[i]) & (df['date']>=start_date)])
  df_temp = df_temp.append(df[(df['league']==league[i]) & (df['date']>=start_date)])


Unnamed: 0,league,date,team,opponent,result,elo_team,elo_opponent,elo_diff,home,team_goals_scored_avg,...,league_home_goals_conceded,league_away_goals_conceded,league_home_goals_conceded_avg,league_away_goals_conceded_avg,team_attack_strength,team_defense_strength,opponent_attack_strength,opponent_defense_strength,team_lambda,opponent_lambda
72658,"Serie C, Girone B",2022-09-04,alessandria,imolese,0.0,1384.304185,1347.692379,36.611806,0,0.842105,...,0.444444,1.111111,0.802339,1.79152,1.246356,0.998858,0.675698,1.311953,1.311953,1.209143
72659,"Serie C, Girone B",2022-09-04,aquila_montevarchi,gubbio,0.0,1404.517486,1441.253106,-36.73562,0,1.105263,...,0.444444,1.111111,0.802339,1.79152,0.655977,1.028236,0.998858,1.11516,0.586926,1.840001
72660,"Serie C, Girone B",2022-09-04,carrarese,cesena,1.0,1380.131352,1470.142314,-90.010963,0,1.0,...,0.444444,1.111111,0.802339,1.79152,0.655977,1.11637,1.028236,1.049563,0.552401,2.056471
72661,"Serie C, Girone B",2022-09-04,cesena,carrarese,0.0,1470.142314,1380.131352,90.010963,1,1.473684,...,0.444444,1.111111,0.802339,1.79152,1.028236,1.049563,0.655977,1.11637,2.056471,0.552401
72662,"Serie C, Girone B",2022-09-04,fermana,fiorenzuola,0.0,1339.66911,1397.985896,-58.316786,0,0.736842,...,0.444444,1.111111,0.802339,1.79152,0.721574,0.851967,0.499429,1.771137,1.025395,0.762286


In [259]:
set(data_serie_a_22['team'].unique().tolist()) - set(future_data['team'].unique().tolist())

set()

In [260]:
set(future_data['team'].unique().tolist()) - set(data_serie_a_22['team'].unique().tolist())

set()

In [261]:
future_data['team']

0                 imolese
1      aquila_montevarchi
2                   olbia
3              vis_pesaro
4                lucchese
              ...        
315           fiorenzuola
316        virtus_entella
317               fermana
318            vis_pesaro
319             carrarese
Name: team, Length: 320, dtype: object

In [262]:
Xall, Yall, dfs, dfs_future = build_wavenet_dataset_past_future(data, future_data, 7)

  dfs = dfs[dfs['date']<future_date]


In [263]:
dfs.tail()

Unnamed: 0,league,date,team,opponent,result,result_0,elo_team,elo_opponent,elo_diff,home,...,league_home_goals_conceded_7,league_away_goals_conceded_7,league_home_goals_conceded_avg_7,league_away_goals_conceded_avg_7,team_attack_strength_7,team_defense_strength_7,opponent_attack_strength_7,opponent_defense_strength_7,team_lambda_7,opponent_lambda_7
75207,"Serie D, Girone E",2015-02-15,voluntas_spoleto,sangiovannese,0.5,0,1484.477775,1461.126851,23.350924,1.0,...,0.5,1.625,1.126358,1.536759,0.0,0.0,0.856211,0.607454,0.0,0.0
75208,"Serie D, Girone E",2015-02-22,voluntas_spoleto,follonica_gavorrano,0.0,0,1483.47115,1464.373885,19.097264,0.0,...,1.666667,1.555556,1.100042,1.46439,1.221992,1.052592,1.052592,1.006347,1.800831,1.21879
75209,"Serie D, Girone E",2015-03-01,voluntas_spoleto,colligiana,1.0,0,1467.647484,1485.542335,-17.894851,1.0,...,2.0,0.0,1.19434,1.408104,0.0,0.0,0.971818,1.189823,0.0,0.0
75210,"Serie D, Girone E",2015-09-20,voluntas_spoleto,città_di_castello,1.0,0,1483.419384,1496.651919,-13.232535,1.0,...,0.666667,1.0,1.209378,1.30284,0.0,0.0,0.888746,1.087989,0.0,0.0
75211,"Serie D, Girone E",2016-02-07,voluntas_spoleto,jolly_montemurlo,1.0,0,1504.180742,1493.401177,10.779565,0.0,...,0.333333,2.111111,1.191834,1.30284,1.373517,0.971524,0.0,0.0,0.0,0.0


In [264]:
dfs_future = dfs_future[dfs.columns]

In [265]:
Xfu = build_future_dataset(dfs_future)

In [266]:
# for column in dfs.columns:
#     print(column)

In [267]:
# for column in dfs_future.columns:
#     print(column)

In [268]:
Xall.shape

torch.Size([71476, 248])

In [269]:
Xfu.shape

torch.Size([320, 248])

## Predicting Matches

In [270]:
PATH = "../src/model/trained_models/wavenet_4.pt"
model = torch.load(PATH)
model.train()

Sequential(
  (0): Conv1d(1, 32, kernel_size=(31,), stride=(31,))
  (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Tanh()
  (3): Conv1d(32, 64, kernel_size=(2,), stride=(2,))
  (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): Tanh()
  (6): Conv1d(64, 128, kernel_size=(2,), stride=(2,))
  (7): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): Tanh()
  (9): Flatten(start_dim=1, end_dim=-1)
  (10): Linear(in_features=256, out_features=3, bias=True)
)

In [271]:
@torch.no_grad()
def predict(x):
    x = x[:, None, :]
    logits = model(x)
    preds = torch.softmax(logits, dim=1)
    
    return preds

In [272]:
predictions = predict(Xfu)

In [273]:
dfs_future.reset_index(inplace=True, drop=True)
predictions_df = pd.DataFrame(predictions, columns=['loss', 'draw', 'win'])
dfs_future = pd.concat([dfs_future, predictions_df], axis=1)

In [274]:
dfs_future.head()

Unnamed: 0,league,date,team,opponent,result,result_0,elo_team,elo_opponent,elo_diff,home,...,league_away_goals_conceded_avg_7,team_attack_strength_7,team_defense_strength_7,opponent_attack_strength_7,opponent_defense_strength_7,team_lambda_7,opponent_lambda_7,loss,draw,win
0,"Serie C, Girone B",2023-01-22,imolese,torres,0.0,0,1295.913845,1429.372779,-133.458934,1,...,0.877632,0.83958,1.204746,1.040462,1.07946,0.83958,1.423791,0.543503,0.331015,0.125482
1,"Serie C, Girone B",2023-01-22,carrarese,virtus_entella,0.0,0,1416.431434,1509.830147,-93.398712,0,...,0.877632,0.985701,2.158921,1.069753,1.290323,0.778185,1.351267,0.485059,0.312025,0.202915
2,"Serie C, Girone B",2023-01-22,fiorenzuola,lucchese,0.0,0,1402.935299,1434.501377,-31.566078,0,...,0.877632,0.95952,1.040462,0.821418,1.1994,1.464531,0.734953,0.380048,0.410374,0.209578
3,"Serie C, Girone B",2023-01-22,ancona,gubbio,1.0,0,1461.610253,1466.81632,-5.206067,0,...,0.877632,2.038981,1.423791,1.85907,1.040462,1.931666,2.543991,0.085118,0.355527,0.559355
4,"Serie C, Girone B",2023-01-22,rimini,olbia,0.0,0,1434.276215,1380.726773,53.549442,0,...,0.877632,0.985701,1.31934,1.07946,1.259507,0.570669,1.136274,0.326042,0.294224,0.379734


In [275]:
dfs_future.to_csv("../data/predictions/wavenet_4_c_20220120.csv")

## Combine team and opponent to home and away

In [276]:
dfs_future = pd.read_csv("../data/predictions/wavenet_4_c_20220120.csv", index_col=0)
dfs_future['date'] = pd.to_datetime(dfs_future['date'])
dfs_future.head()

Unnamed: 0,league,date,team,opponent,result,result_0,elo_team,elo_opponent,elo_diff,home,...,league_away_goals_conceded_avg_7,team_attack_strength_7,team_defense_strength_7,opponent_attack_strength_7,opponent_defense_strength_7,team_lambda_7,opponent_lambda_7,loss,draw,win
0,"Serie C, Girone B",2023-01-22,imolese,torres,0.0,0,1295.913845,1429.372779,-133.458934,1,...,0.877632,0.83958,1.204746,1.040462,1.07946,0.83958,1.423791,0.543503,0.331015,0.125482
1,"Serie C, Girone B",2023-01-22,carrarese,virtus_entella,0.0,0,1416.431434,1509.830147,-93.398712,0,...,0.877632,0.985701,2.158921,1.069753,1.290323,0.778185,1.351267,0.485059,0.312025,0.202915
2,"Serie C, Girone B",2023-01-22,fiorenzuola,lucchese,0.0,0,1402.935299,1434.501377,-31.566078,0,...,0.877632,0.95952,1.040462,0.821418,1.1994,1.464531,0.734953,0.380048,0.410374,0.209578
3,"Serie C, Girone B",2023-01-22,ancona,gubbio,1.0,0,1461.610253,1466.81632,-5.206067,0,...,0.877632,2.038981,1.423791,1.85907,1.040462,1.931666,2.543991,0.085118,0.355527,0.559355
4,"Serie C, Girone B",2023-01-22,rimini,olbia,0.0,0,1434.276215,1380.726773,53.549442,0,...,0.877632,0.985701,1.31934,1.07946,1.259507,0.570669,1.136274,0.326042,0.294224,0.379734


In [277]:
# dfs_future[(dfs_future['team']=='cesena') | (dfs_future['opponent']=='cesena')].head()

In [278]:
def transform_to_home_and_away(df):
    df['date'] = pd.to_datetime(df['date'])
    df_home = df[df['home'] == 1]
    df_away = df[df['home'] == 0]
    if 'result' in df_away.columns:
        df_away.drop('result', axis=1, inplace=True)

    df_home.rename(columns={'team': 'home_team', 'opponent': 'away_team', 'elo_team': 'elo_home', 'elo_opponent': 'elo_away',
                            'loss': 'A', 'draw': 'D', 'win': 'H'}, inplace=True)
    df_away.rename(columns={'team': 'away_team', 'opponent': 'home_team', 'elo_team': 'elo_away', 'elo_opponent': 'elo_home',
                            'loss': 'H', 'draw': 'D', 'win': 'A'}, inplace=True)

    df_combined = pd.concat([df_home, df_away])
    df_combined = df_combined.groupby(['date', 'home_team', 'away_team', 'elo_home', 'elo_away']).mean()
    df_combined.reset_index(inplace=True, drop=False)
    if 'result' in df_combined.columns:
        df_combined.drop(['result'], axis=1, inplace=True)
    df_combined['elo_diff'] = df_combined['elo_home'] - df_combined['elo_away']

    df_ftr = df_home.drop(['A', 'D', 'H', 'elo_diff', 'elo_home', 'elo_away', 'home'], axis=1)
    df_ftr['date'] = pd.to_datetime(df_ftr['date'])

    df_combined = df_combined.merge(df_ftr, on=['date', 'home_team', 'away_team'], how='outer'
                                    )

    return df_combined

In [279]:
dfs_future_test = transform_to_home_and_away(dfs_future)
dfs_future_test.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_away.drop('result', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_home.rename(columns={'team': 'home_team', 'opponent': 'away_team', 'elo_team': 'elo_home', 'elo_opponent': 'elo_away',
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_away.rename(columns={'team': 'away_team', 'opponent': 'home_team', 'elo_team': 'elo_away', 'elo_opponent': 'elo_home',
  df_combined = df_combined.groupby(['date', 'home_team', 'away_team', 'elo_home', 'elo

Unnamed: 0,date,home_team,away_team,elo_home,elo_away,result_0_x,elo_diff,home,team_goals_scored_avg_x,team_goals_conceded_avg_x,...,league_home_goals_conceded_7_y,league_away_goals_conceded_7_y,league_home_goals_conceded_avg_7_y,league_away_goals_conceded_avg_7_y,team_attack_strength_7_y,team_defense_strength_7_y,opponent_attack_strength_7_y,opponent_defense_strength_7_y,team_lambda_7_y,opponent_lambda_7_y
0,2023-01-22,aquila_montevarchi,siena,1350.280621,1413.9076,0.0,-63.626979,0.5,0.894737,1.131579,...,0.6,0.9,0.961111,0.877632,1.43928,0.93094,1.1994,0.821418,1.515032,2.272548
1,2023-01-22,cesena,fermana,1508.87421,1405.569408,0.0,103.304802,0.5,1.447368,1.0,...,0.6,0.9,0.961111,0.877632,0.876179,1.1994,1.07946,0.93094,1.060638,1.136274
2,2023-01-22,gubbio,ancona,1466.81632,1461.610253,0.0,5.206067,0.5,1.473684,0.947368,...,0.6,0.9,0.961111,0.877632,1.85907,1.040462,2.038981,1.423791,2.543991,1.931666
3,2023-01-22,imolese,torres,1295.913845,1429.372779,0.0,-133.458934,0.5,0.657895,1.105263,...,0.6,0.9,0.961111,0.877632,0.83958,1.204746,1.040462,1.07946,0.83958,1.423791
4,2023-01-22,lucchese,fiorenzuola,1434.501377,1402.935299,0.0,31.566078,0.5,1.078947,0.894737,...,0.6,0.9,0.961111,0.877632,0.821418,1.1994,0.95952,1.040462,0.734953,1.464531


In [280]:
dfs_future_test['home_team'] = dfs_future_test['home_team'].str.title().replace('_', ' ', regex=True)
dfs_future_test['away_team'] = dfs_future_test['away_team'].str.title().replace('_', ' ', regex=True)
dfs_future_test = dfs_future_test[['date', 'home_team', 'away_team', 'elo_home', 'elo_away', 'A', 'D', 'H']]
dfs_future_test[['team_goals_scored', 'opponent_goals_scored', 'result']] = np.nan
dfs_future_test = dfs_future_test[['date', 'home_team', 'away_team', 'elo_home', 'elo_away',
                                   'team_goals_scored', 'opponent_goals_scored', 'result', 'A', 'D', 'H']]


In [281]:
dfs_future_test

Unnamed: 0,date,home_team,away_team,elo_home,elo_away,team_goals_scored,opponent_goals_scored,result,A,D,H
0,2023-01-22,Aquila Montevarchi,Siena,1350.280621,1413.907600,,,,0.485936,0.355788,0.158275
1,2023-01-22,Cesena,Fermana,1508.874210,1405.569408,,,,0.556973,0.299605,0.143422
2,2023-01-22,Gubbio,Ancona,1466.816320,1461.610253,,,,0.471752,0.338211,0.190037
3,2023-01-22,Imolese,Torres,1295.913845,1429.372779,,,,0.481260,0.343438,0.175301
4,2023-01-22,Lucchese,Fiorenzuola,1434.501377,1402.935299,,,,0.107229,0.213790,0.678980
...,...,...,...,...,...,...,...,...,...,...,...
155,2023-04-23,Rimini,Aquila Montevarchi,1434.276215,1350.280621,,,,0.471780,0.324403,0.203818
156,2023-04-23,San Donato Tavarnelle,Fiorenzuola,1394.294372,1402.935299,,,,0.083233,0.267346,0.649421
157,2023-04-23,Siena,Virtus Entella,1413.907600,1509.830147,,,,0.529213,0.282572,0.188216
158,2023-04-23,Torres,Fermana,1429.372779,1405.569408,,,,0.377867,0.302483,0.319650


In [282]:
dfs_future_test.to_csv("../data/predictions/wavenet_4_h_a_c_20220120.csv")

In [220]:
def duplicate_data(df):
    df_home = df[[
#                 'league',
                'date',
                'home_team',
                'away_team',
                'elo_home',
                'elo_away',
                'A',
                'D',
                'H',
                'result',
                ]]

    df_away = df_home.copy()
    df_home.rename(columns={
                        'home_team': 'team',
                        'away_team': 'opponent',
                        'elo_home': 'elo_team',
                        'elo_away': 'elo_opponent',
                        'A': 'loss',
                        'D': 'draw',
                        'H': 'win',
                        }, inplace=True)
    df_home['home'] = 1

    df_away.rename(columns={
        'home_team': 'opponent',
        'away_team': 'team',
        'elo_home': 'elo_opponent',
        'elo_away': 'elo_team',
        'A': 'win',
        'D': 'draw',
        'H': 'loss',
    }, inplace=True)
    df_away['home'] = 0

    df_combined = pd.concat([df_home, df_away])

    df_combined.loc[df['result'] == 0, ['result']] = 'A'
    df_combined.loc[df['result'] == 0.5, ['result']] = 'D'
    df_combined.loc[df['result'] == 1, ['result']] = 'H'

    return df_combined

In [221]:
dfs_future_t_o = duplicate_data(dfs_future_test)
dfs_future_t_o

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_home.rename(columns={
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_home['home'] = 1


Unnamed: 0,date,team,opponent,elo_team,elo_opponent,loss,draw,win,result,home
0,2023-01-22,Aquila Montevarchi,Siena,1382.393435,1449.810106,0.443036,0.059141,0.497822,,1
1,2023-01-22,Cesena,Fermana,1547.000538,1441.141915,0.496814,0.116476,0.386710,,1
2,2023-01-22,Gubbio,Ancona,1501.756967,1513.115807,0.610321,0.291921,0.097758,,1
3,2023-01-22,Imolese,Torres,1331.036279,1462.122421,0.430573,0.144778,0.424649,,1
4,2023-01-22,Lucchese,Fiorenzuola,1470.250163,1437.003524,0.517110,0.073113,0.409777,,1
...,...,...,...,...,...,...,...,...,...,...
155,2023-04-23,Aquila Montevarchi,Rimini,1382.393435,1475.664696,0.492072,0.221114,0.286814,,0
156,2023-04-23,Fiorenzuola,San Donato Tavarnelle,1437.003524,1429.085954,0.165563,0.272640,0.561796,,0
157,2023-04-23,Virtus Entella,Siena,1542.043414,1449.810106,0.665544,0.022428,0.312028,,0
158,2023-04-23,Fermana,Torres,1441.141915,1462.122421,0.287614,0.285634,0.426752,,0


In [222]:
dfs_future_t_o['team'] = dfs_future_t_o['team'].str.title().replace('_', ' ', regex=True)
dfs_future_t_o['opponent'] = dfs_future_t_o['opponent'].str.title().replace('_', ' ', regex=True)
dfs_future_t_o[['rest_days', 'result', 'team_goals_scored', 'opponent_goals_scored']] = np.nan
dfs_future_t_o = dfs_future_t_o[['date', 'team', 'opponent', 'elo_team', 'elo_opponent',
                                'loss', 'draw', 'win', 'home', 'rest_days', 'result',
                                'team_goals_scored', 'opponent_goals_scored']]

In [223]:
dfs_future_t_o.to_csv("../data/predictions/wavenet_3_t_o.csv")