In [157]:
import code
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F

%matplotlib inline

In [158]:
print(torch.backends.mps.is_available())
print(torch.backends.mps.is_built())
torch.device("mps")

True
True


device(type='mps')

In [159]:
class Loader:

    def __init__(self, files):
        self.files = files

    def get_data(self):
        dfs = []
        for file in self.files:
            df = self.load_past_matches(file)
            dfs.append(df)
        
        df_join = self.join_data(dfs[0], dfs[1])

        return df_join

    def load_past_matches(self, file):
        df = pd.read_csv(f'../../data/{file}')
        df.drop('Unnamed: 0', axis=1, inplace=True)
        df['date'] = pd.to_datetime(df['date']).dt.date

        return df

    def join_data(self, df1, df2):
        df = pd.merge(df1, df2,  how='inner',
            left_on=['league', 'date','team', 'opponent', 'home'],
            right_on=['league', 'date','team', 'opponent', 'home'])
        df.sort_values(by=['date', 'league', 'team', 'opponent'], inplace=True)
        df.reset_index(inplace=True, drop=True)
              
        return df

In [160]:
class Wavenet:

    def __init__(self, df, future=None, past_matches=7, future_date=None):
        self.df = df
        self.future = future
        self.X = None
        self.Y = None
        self.dfs = None
        self.dfs_future = None
        self.past_matches = past_matches
        self.future_date = future_date
        self.index_columns = ['league', 'date', 'team', 'opponent', 'result']

    def set_up_data(self, df):
        df.drop(['team_goals_scored',
            'opponent_goals_scored',
            'team_goals_conceded',
            'opponent_goals_conceded'], axis=1, inplace=True)

    def build_dataset(self, df):
        df_copy = df.copy()
        df_copy.reset_index(inplace=True, drop=True)
        date = df_copy[['date']].iloc[:,0]
        df_copy.drop(['league', 'date', 'team', 'opponent'], axis=1, inplace=True)
        df_copy['date'] = date
        df_copy.sort_values(by=['date'], inplace=True)
        df_copy = df_copy[df_copy['date']<self.future_date]
        df_copy.drop(['date'], axis=1, inplace=True)
        
        self.X = df_copy.drop(['result'], axis=1).to_numpy()
        self.Y = np.array(df_copy['result']) / 0.5
        
        self.X = torch.tensor(self.X).float()
        self.Y = torch.tensor(self.Y).long()

    def add_past_to_row(self, df, i):
        df_past = df.copy()
        df_past.index += i
        df_past.rename(columns={c: c+f'_{i}' for c in df_past.columns if c not in ['league',
                                                                                'date',
                                                                                'team',
                                                                                'opponent']}, inplace=True)
        return df_past

    def build_matches_dataset(self, df, past_matches, team):
        dfs_past = []
        df_team = df[df['team']==team]
        for i in range(1, past_matches+1):
            df_past = self.add_past_to_row(df_team, i)
            dfs_past.append(df_past)

        df_team_joined = df_team.copy()
        for df_past in dfs_past:
            df_team_joined = pd.concat([df_team_joined, df_past],
                                        axis=1,
                                        )
        df_team_joined = df_team_joined[past_matches:-past_matches]

        return df_team_joined

    def build_teams_dataset(self, df, past_matches):
        dfs = []
        for team in df['team'].unique():
            df_team_joined = self.build_matches_dataset(df, past_matches, team)
            dfs.append(df_team_joined)
        dfs = pd.concat(dfs)
        dfs.insert(5, 'result_0', 0)
        dfs = self.add_opponent_past_matches(dfs)
        dfs = self.ordering_columns(dfs)

        return dfs
    
    def add_opponent_past_matches(self, df):
        df = df.loc[:,~df.columns.duplicated()].copy()
        df_copy = df.copy()
        keep_same = {'league', 'date', 'team', 'opponent', 'result'}
        df_copy.columns = ['{}{}'.format(c, '' if c in keep_same else '_y') for c in df_copy.columns]
        df_copy['result'] = 1 - df_copy['result']
        df_copy.rename(columns={'team': 'opponent', 'opponent': 'team'}, inplace=True)
        df_combined = pd.merge(df, df_copy, how='left',
                              left_on=['league', 'date', 'team', 'opponent', 'result'],
                              right_on=['league', 'date', 'team', 'opponent', 'result'])
        return df_combined

    def ordering_columns(self, df):
        index_columns = self.index_columns
        template_columns = ['result', 'elo_team', 'elo_opponent', 'elo_diff', 'home', 'team_goals_scored_avg',
                           'team_goals_conceded_avg', 'team_goals_scored_avg_home',
                           'team_goals_conceded_avg_home', 'team_goals_scored_avg_away',
                           'team_goals_conceded_avg_away', 'opponent_goals_scored_avg',
                           'opponent_goals_conceded_avg', 'opponent_goals_scored_avg_home',
                           'opponent_goals_conceded_avg_home', 'opponent_goals_scored_avg_away',
                           'opponent_goals_conceded_avg_away', 'league_home_goals_scored',
                           'league_away_goals_scored', 'league_home_goals_scored_avg',
                           'league_away_goals_scored_avg', 'league_home_goals_conceded',
                           'league_away_goals_conceded', 'league_home_goals_conceded_avg',
                           'league_away_goals_conceded_avg', 'team_attack_strength',
                           'team_defense_strength', 'opponent_attack_strength',
                           'opponent_defense_strength', 'team_lambda', 'opponent_lambda']
        template_columns_y = [s + f'_y' for s in template_columns]
        columns = [[index_columns + ['result_0'] + template_columns[1:] + ['result_0_y'] + template_columns_y[1:]]]
        for i in range(1, self.past_matches+1):
            team_cols = [s + f'_{i}' for s in template_columns]
            opp_cols = [s + f'_{i}_y' for s in template_columns]
            columns.append([team_cols, opp_cols])

        columns = [subitem for sublist in columns for item in sublist for subitem in item]
        df = df[columns]
        df.dropna(inplace=True)
        df.reset_index(inplace=True, drop=True)
        return df
    
    def get_final_entry(self, df, team_or_opponent):
        df = df.copy()
        df = df.loc[:,~df.columns.duplicated()].copy()
        df.sort_values(by='date', inplace=True)
        df.reset_index(inplace=True, drop=True)
        df.drop_duplicates(subset=team_or_opponent, keep='last', inplace=True)
        df = df.loc[:, ~df.columns.str.contains('_y')]
        df = df.drop(['home'], axis=1)
        return df
    
    def team_to_opponent(self, df):
        df_opponent = df.copy()     
        keep_same = {'league', 'date', 'team', 'opponent', 'result'}
        df_opponent.columns = ['{}{}'.format(c, '' if c in keep_same else '_y') for c in df_opponent.columns]
        df_opponent['result'] = 1 - df_opponent['result']
        df_opponent = self.drop_common_columns(df_opponent, 'opponent')
        return df_opponent
    
    def drop_common_columns(self, df, team_or_opp):
        columns_to_drop = [item for item in self.index_columns if item not in [team_or_opp]]
        df.drop(columns=columns_to_drop, axis=1, inplace=True)
        return df
    
    def add_stats_to_future(self, stats, future):
        stats = self.get_final_entry(stats, 'team')
        stats_opp = self.team_to_opponent(stats)
        stats = self.drop_common_columns(stats, 'team')

        df_future = pd.merge(future, stats, how='left', on='team')
        df_future = pd.merge(df_future, stats_opp, how='left', on='opponent')
        df_future['home_y'] = 1 - df_future['home']
        df_future['date'] = pd.to_datetime(df_future['date'], dayfirst=True)
        df_future['date'] = df_future['date'].dt.date
        df_future.sort_values(by='date', inplace=True)
#         df_future.drop_duplicates(subset=['team', 'opponent'], inplace=True)

        return df_future
    
    def remove_duplicate_columns(self, df):
        df = df.loc[:,~df.columns.duplicated()].copy()
        return df

    def build_wavenet_dataset(self):
        df_copy = self.df.copy()
        df_copy.sort_values(by=['team', 'date'], inplace=True)
        df_copy.reset_index(inplace=True, drop=True)
        self.dfs = self.build_teams_dataset(df_copy, self.past_matches)
        self.build_dataset(self.dfs)
        
    def order_date(self, df):
        df = df.sort_values(by=['team', 'date'])
        df = df.reset_index(drop=True)
        return df

    def build_wavenet_dataset_past_future(self):
        df_copy = self.df.copy()
        self.set_up_data(df_copy)
        df_copy.sort_values(by=['team', 'date'], inplace=True)
        df_copy.reset_index(inplace=True, drop=True)
        self.dfs = self.build_teams_dataset(df_copy, self.past_matches)
        self.dfs_future = self.add_stats_to_future(self.dfs, self.future)
        self.dfs_future = self.dfs_future[self.dfs.drop(['result'], axis=1).columns]
        self.dfs_future = self.order_date(self.dfs_future)
        self.dfs = self.dfs.loc[:,~self.dfs.columns.duplicated()].copy()
#         self.dfs = self.dfs[self.dfs['date']<future_date]
#         self.build_dataset(self.dfs)
#         self.dfs = self.remove_duplicate_columns(self.dfs)

In [161]:
def load_future_matches():
    df = pd.read_csv('../../data/future_matches.csv', parse_dates=True, dayfirst=True)
    df['date'] = pd.to_datetime(df['date'], dayfirst=True)
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df = duplicate_to_team_and_opponent(df)
    df.reset_index(inplace=True, drop=True)
    return df

def duplicate_to_team_and_opponent(df_matches):
    df_matches_copy = df_matches.copy()
    df_matches = df_matches.rename(columns={'pt1': 'team', 'pt2': 'opponent',
                                            })
    df_matches_copy = df_matches_copy.rename(columns={'pt2': 'team', 'pt1': 'opponent',
                                                    })
    df_matches_copy = df_matches_copy[['league', 'date', 'team', 'opponent' 
                                        ]]
    df_matches.loc[:, 'home'] = 1
    df_matches_copy.loc[:, 'home'] = 0
    df_matches = pd.concat([df_matches, df_matches_copy])
    df_matches.sort_values(by='date', inplace=True)

    return df_matches


def build_future_dataset(df):
    df_copy = df.copy()
    df_copy.reset_index(inplace=True, drop=True)
    date = df_copy[['date']].iloc[:,0]
    df_copy.drop(['league', 'date', 'team', 'opponent'], axis=1, inplace=True)
    df_copy['date'] = date
    df_copy.sort_values(by=['date'], inplace=True)
    df_copy.drop(['date'], axis=1, inplace=True)

    X = df_copy.to_numpy()
    X = torch.tensor(X).float()
    
    return X

In [162]:
def add_stats_to_future(stats, future):
    columns = stats.drop(['result'], axis=1).columns
    stats = get_final_entry(stats, 'team')
    stats_opp = team_to_opponent(stats)

    df_future = pd.merge(future, stats, how='left', on='team')
    df_future = pd.merge(df_future, stats_opp, how='left', on='opponent')
    df_future['elo_diff'] = df_future['elo_team'] - df_future['elo_opponent']
    df_future['date'] = pd.to_datetime(df_future['date'], dayfirst=True)
    df_future['date'] = df_future['date'].dt.date

    df_future.sort_values(by='date', inplace=True)
    df_future = df_future[columns]

    return df_future


def get_final_entry(df, team_or_opponent):
    df.sort_values(by='date', inplace=True)
    df.reset_index(inplace=True, drop=True)
    df.drop_duplicates(subset=team_or_opponent, keep='last', inplace=True)
    df = df.loc[:, df.columns.str.contains(team_or_opponent) | df.columns.str.contains('league_')]

    return df


def duplicate_to_team_and_opponent(df_matches):
    df_matches_copy = df_matches.copy()
    df_matches = df_matches.rename(columns={'pt1': 'team', 'pt2': 'opponent',
                                            })
    df_matches_copy = df_matches_copy.rename(columns={'pt2': 'team', 'pt1': 'opponent',
                                                    })
    df_matches_copy = df_matches_copy[['league', 'date', 'team', 'opponent' 
                                        ]]
    df_matches.loc[:, 'home'] = 1
    df_matches_copy.loc[:, 'home'] = 0
    df_matches = pd.concat([df_matches, df_matches_copy])
    df_matches.sort_values(by='date', inplace=True)

    return df_matches


def team_to_opponent(df):
    df_opponent = df.copy()
    df_opponent = df_opponent.loc[:, df_opponent.columns.str.contains("team")]
    df_opponent.columns = df_opponent.columns.str.replace("team", "opponent")

    return df_opponent


In [163]:
# future_data_combined.to_csv("../../data/predictions/future_test.csv")

In [164]:
FILES = ["elos_matches.csv", "goals_matches.csv"]
loader = Loader(FILES)
future = load_future_matches()
future_date = future['date'][0]
data = loader.get_data()
wavenet = Wavenet(data, future, 7, future_date)
wavenet.build_wavenet_dataset_past_future()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(inplace=True)


In [165]:
future_data = add_stats_to_future(data, future)
future_data.drop(['team_goals_conceded',
                 'opponent_goals_conceded',
                 'opponent_goals_scored',
                 'team_goals_scored'], axis=1, inplace=True)
current_date = pd.to_datetime("2023-01-25")
next_match = wavenet.dfs[wavenet.dfs['date']>=current_date]
next_match.reset_index(inplace=True, drop=True)
next_match_team = next_match.loc[:, next_match.columns.str.contains('\d$', regex=True) | 
                                    next_match.columns.str.contains('^team$', regex=True)]
next_match_opp = next_match.loc[:, next_match.columns.str.contains('_y$', regex=True) | 
                                   next_match.columns.str.contains('^opponent$', regex=True)]
future_data_combined = pd.merge(future_data, next_match_team, how='left',
                               left_on='team',
                               right_on='team')
future_data_combined = pd.merge(future_data_combined, next_match_opp, how='left',
                               left_on='opponent',
                               right_on='opponent')
columns_list = wavenet.dfs.drop(['result'], axis=1).columns
future_data_combined = future_data_combined[columns_list]

  next_match = wavenet.dfs[wavenet.dfs['date']>=current_date]


In [166]:
next_match[next_match['league']=='Serie B'].head()

Unnamed: 0,league,date,team,opponent,result,result_0,elo_team,elo_opponent,elo_diff,home,...,league_home_goals_conceded_7_y,league_away_goals_conceded_7_y,league_home_goals_conceded_avg_7_y,league_away_goals_conceded_avg_7_y,team_attack_strength_7_y,team_defense_strength_7_y,opponent_attack_strength_7_y,opponent_defense_strength_7_y,team_lambda_7_y,opponent_lambda_7_y
2,Serie B,2023-01-28,ascoli,palermo,0.5,0,1437.370195,1499.116958,-61.746763,1.0,...,1.333333,0.777778,1.20026,1.489259,1.183954,1.024883,0.989542,0.833153,1.183954,1.510353
4,Serie B,2023-01-28,bari,perugia,0.5,0,1505.870627,1387.19759,118.673038,1.0,...,1.0,0.818182,1.1253,1.405793,0.599025,0.982195,0.841881,1.048294,0.882774,0.9305
5,Serie B,2023-02-04,bari,spal,0.5,0,1500.937473,1459.49833,41.439143,0.0,...,1.0,0.818182,1.1253,1.405793,0.841881,1.048294,0.599025,0.982195,0.9305,0.882774
6,Serie B,2023-01-29,benevento,frosinone,0.5,0,1434.694179,1543.904633,-109.210455,0.0,...,1.333333,0.777778,1.20026,1.489259,0.877003,1.130905,0.812838,0.657752,0.692371,1.36899
8,Serie B,2023-01-28,brescia,como,0.5,0,1387.921137,1405.722188,-17.801051,1.0,...,1.333333,0.777778,1.20026,1.489259,0.920853,1.413631,0.848179,0.657752,0.726989,1.78564


In [167]:
future_data_combined.head()

Unnamed: 0,league,date,team,opponent,result_0,elo_team,elo_opponent,elo_diff,home,team_goals_scored_avg,...,league_home_goals_conceded_7_y,league_away_goals_conceded_7_y,league_home_goals_conceded_avg_7_y,league_away_goals_conceded_avg_7_y,team_attack_strength_7_y,team_defense_strength_7_y,opponent_attack_strength_7_y,opponent_defense_strength_7_y,team_lambda_7_y,opponent_lambda_7_y
0,Serie B,2023-01-28,ascoli,palermo,0,1437.370195,1499.116958,-61.746763,1,1.736842,...,1.333333,0.777778,1.20026,1.489259,1.183954,1.024883,0.989542,0.833153,1.183954,1.510353
1,Serie B,2023-01-28,sudtirol,reggina,0,1516.926552,1506.178725,10.747827,1,0.0,...,1.333333,0.777778,1.20026,1.489259,0.526202,0.848179,1.166246,0.613902,0.387728,1.473153
2,Serie B,2023-01-28,ternana,modena,0,1433.942081,1505.468205,-71.526124,1,0.947368,...,1.555556,0.555556,1.025619,1.15667,1.683599,0.923704,1.282923,0.910054,1.77221,1.215401
3,Serie B,2023-01-28,ternana,modena,0,1433.942081,1505.468205,-71.526124,1,0.947368,...,0.5,1.1,1.072668,1.29096,1.275717,0.896925,1.18231,1.177585,1.611432,1.368991
4,Serie B,2023-01-28,genoa,pisa,0,1530.944075,1473.50643,57.437645,1,1.052632,...,1.333333,0.777778,1.20026,1.489259,0.964704,1.060223,1.095564,1.052404,1.218573,1.729838


In [168]:
future_data_combined[future_data_combined.isnull().any(axis=1)]

Unnamed: 0,league,date,team,opponent,result_0,elo_team,elo_opponent,elo_diff,home,team_goals_scored_avg,...,league_home_goals_conceded_7_y,league_away_goals_conceded_7_y,league_home_goals_conceded_avg_7_y,league_away_goals_conceded_avg_7_y,team_attack_strength_7_y,team_defense_strength_7_y,opponent_attack_strength_7_y,opponent_defense_strength_7_y,team_lambda_7_y,opponent_lambda_7_y


In [169]:
Xfu = build_future_dataset(future_data_combined)

In [170]:
wavenet.dfs_future.shape

(576, 500)

In [171]:
wavenet.dfs.shape

(69982, 501)

In [172]:
Xfu.shape

torch.Size([408, 496])

In [173]:
# wavenet.dfs_future.to_csv("../../data/predictions/wavenet_7_test.csv")

In [174]:
# dfs_test = wavenet.dfs[(wavenet.dfs['team']=='alessandria') | (wavenet.dfs['team']=='reggiana')]
# dfs_test[dfs_test['date']>=pd.to_datetime('2022-09-01')].to_csv("../../data/predictions/past_test.csv")

## Model Predictions

In [175]:
@torch.no_grad()
def get_predictions(x, df):
    x = x[:, None, :]
    logits = model(x)
    preds = []
    preds = torch.softmax(logits, dim=1)
    print(preds)
    df[['loss', 'draw', 'win']] = pd.DataFrame(preds.numpy())
    
    return df

In [176]:
PATH = "../../src/model/trained_models/wavenet_9.pt"
model = torch.load(PATH)
model.train().to("cpu")

Sequential(
  (0): Conv1d(1, 32, kernel_size=(62,), stride=(62,))
  (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Tanh()
  (3): Conv1d(32, 64, kernel_size=(2,), stride=(2,))
  (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): Tanh()
  (6): Conv1d(64, 128, kernel_size=(2,), stride=(2,))
  (7): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): Tanh()
  (9): Flatten(start_dim=1, end_dim=-1)
  (10): Linear(in_features=256, out_features=3, bias=True)
)

In [177]:
Xfu.shape

torch.Size([408, 496])

In [178]:
dfs_preds = future_data_combined.copy()
dfs_preds = dfs_preds[['date', 'team', 'opponent',
                       'elo_team', 'elo_opponent', 'elo_diff', 'home',
                       ]]
dfs_preds.sort_values('date', inplace=True)
dfs_preds.reset_index(inplace=True, drop=True)
dfs_preds = get_predictions(Xfu, dfs_preds)

tensor([[0.2477, 0.4640, 0.2883],
        [0.2788, 0.4296, 0.2916],
        [0.2050, 0.5120, 0.2830],
        ...,
        [0.4344, 0.3612, 0.2045],
        [0.1365, 0.4381, 0.4255],
        [0.4122, 0.4221, 0.1657]])


In [179]:
dfs_preds_cut = dfs_preds.copy()
dfs_preds_cut['prediction'] = dfs_preds_cut[['loss', 'draw', 'win']].idxmax(axis=1)
dfs_preds_cut['prediction'] = dfs_preds_cut['prediction'].replace({'win': 1, 'draw': 0.5, 'loss': 0})

In [180]:
dfs_preds_cut['prediction'].value_counts()

0.5    199
1.0    105
0.0    104
Name: prediction, dtype: int64

In [181]:
# dfs_preds_cut[dfs_preds_cut['date']<=pd.to_datetime('2023-01-29')]

In [182]:
dfs_preds_cut.head()

Unnamed: 0,date,team,opponent,elo_team,elo_opponent,elo_diff,home,loss,draw,win,prediction
0,2023-01-28,ascoli,palermo,1437.370195,1499.116958,-61.746763,1,0.247666,0.463989,0.288345,0.5
1,2023-01-28,palermo,ascoli,1499.116958,1437.370195,61.746763,0,0.278778,0.42961,0.291612,0.5
2,2023-01-28,perugia,bari,1387.19759,1500.937473,-113.739884,0,0.205012,0.51203,0.282959,0.5
3,2023-01-28,perugia,bari,1387.19759,1500.937473,-113.739884,0,0.481254,0.394237,0.124508,0.0
4,2023-01-28,como,brescia,1405.722188,1387.921137,17.801051,0,0.222684,0.511749,0.265566,0.5


In [183]:
def transform_to_home_and_away(df):
    df['date'] = pd.to_datetime(df['date'])
    df_home = df[df['home'] == 1]
    df_away = df[df['home'] == 0]
    if 'result' in df_away.columns:
        df_away.drop('result', axis=1, inplace=True)

    df_home.rename(columns={'team': 'home_team', 'opponent': 'away_team', 'elo_team': 'elo_home', 'elo_opponent': 'elo_away',
                            'loss': 'A', 'draw': 'D', 'win': 'H'}, inplace=True)
    df_away.rename(columns={'team': 'away_team', 'opponent': 'home_team', 'elo_team': 'elo_away', 'elo_opponent': 'elo_home',
                            'loss': 'H', 'draw': 'D', 'win': 'A'}, inplace=True)

    df_combined = pd.concat([df_home, df_away])
    df_combined = df_combined.groupby(['date', 'home_team', 'away_team', 'elo_home', 'elo_away']).mean()
    df_combined.reset_index(inplace=True, drop=False)
    if 'result' in df_combined.columns:
        df_combined.drop(['result'], axis=1, inplace=True)
    df_combined['elo_diff'] = df_combined['elo_home'] - df_combined['elo_away']

    if 'team_goals_scored' not in df_home.columns:
        df_ftr = df_home.drop(['A', 'D', 'H', 'elo_diff', 'elo_home', 'elo_away', 'home'], axis=1)
        df_ftr['date'] = pd.to_datetime(df_ftr['date'])
    else:
        df_ftr = df_home.drop(['loss', 'draw', 'win', 'rest_days', 'team_goals_scored', 'opponent_goals_scored', 'elo_home', 'elo_away', 'home'], axis=1)
        df_ftr['date'] = pd.to_datetime(df_ftr['date'])

    df_combined = df_combined.merge(df_ftr, on=['date', 'home_team', 'away_team'], how='outer'
                                    )

    return df_combined

In [184]:
dfs_preds_h_a = transform_to_home_and_away(dfs_preds_cut)
dfs_preds_h_a = dfs_preds_h_a.loc[:, ~dfs_preds_h_a.columns.str.contains('_x')]
dfs_preds_h_a = dfs_preds_h_a.loc[:, ~dfs_preds_h_a.columns.str.contains('_y')]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_home.rename(columns={'team': 'home_team', 'opponent': 'away_team', 'elo_team': 'elo_home', 'elo_opponent': 'elo_away',
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_away.rename(columns={'team': 'away_team', 'opponent': 'home_team', 'elo_team': 'elo_away', 'elo_opponent': 'elo_home',


In [185]:
dfs_preds_h_a.head()

Unnamed: 0,date,home_team,away_team,elo_home,elo_away,elo_diff,home,A,D,H
0,2023-01-28,ascoli,palermo,1437.370195,1499.116958,-61.746763,0.5,0.269639,0.446799,0.283562
1,2023-01-28,bari,perugia,1500.937473,1387.19759,113.739884,0.5,0.2178,0.443006,0.339195
2,2023-01-28,bari,perugia,1500.937473,1387.19759,113.739884,0.5,0.2178,0.443006,0.339195
3,2023-01-28,brescia,como,1387.921137,1405.722188,-17.801051,0.5,0.219084,0.545525,0.235391
4,2023-01-28,cosenza,parma,1366.841649,1452.260303,-85.418654,0.5,0.265747,0.369322,0.364931


In [186]:
dfs_preds_h_a['prediction'] = dfs_preds_h_a[['A', 'D', 'H']].idxmax(axis=1)
dfs_preds_h_a['prediction'] = dfs_preds_h_a['prediction'].replace({'H': 1, 'D': 0.5, 'A': 0})

In [189]:
dfs_preds_h_a.drop_duplicates(subset=['home_team', 'away_team', 'date'], inplace=True)
dfs_preds_h_a['prediction'].value_counts()

0.5    81
1.0    64
0.0    24
Name: prediction, dtype: int64

In [190]:
dfs_preds_h_a.to_csv("../../data/predictions/wavenet_9_h_a_b_20230127.csv")