In [122]:
# Importation des bibliothèques
import numpy as np
import pandas as pd

# Importation de l'API
from nba_api.stats.endpoints import leaguegamefinder

In [123]:
# Récupération du jeu de données
path = 'nba odds 2021-22.xlsx'
odds_df = pd.read_excel(path, usecols = ['Date', 'Team', 'ML'])
odds_df.tail()

Unnamed: 0,Date,Team,ML
2641,610,Boston,-165
2642,613,Boston,145
2643,613,Golden State,-165
2644,616,Golden State,155
2645,616,Boston,-175


In [124]:
# Suppression des espaces dans les noms des équipes
odds_df.Team = odds_df.Team.str.replace(' ', '')
odds_df.tail()

Unnamed: 0,Date,Team,ML
2641,610,Boston,-165
2642,613,Boston,145
2643,613,GoldenState,-165
2644,616,GoldenState,155
2645,616,Boston,-175


In [125]:
# Création d'un dictionnaire pour remplacer les équipes par leurs noms officiels
team_name = {'Detroit': 'DetroitPistons', 'Washington': 'WashingtonWizards',
            'Dallas': 'DallasMavericks', 'Phoenix': 'PhoenixSuns',
            'NewOrleans': 'NewOrleansPelicans', 'LAClippers': 'LAClippers',
            'OklahomaCity': 'OklahomaCityThunder', 'GoldenState': 'GoldenStateWarriors',
            'Philadelphia': 'Philadelphia76ers', 'Indiana': 'IndianaPacers',
            'Miami': 'MiamiHeat', 'Toronto': 'TorontoRaptors',
            'Orlando': 'OrlandoMagic', 'NewYork': 'NewYorkKnicks',
            'Boston': 'BostonCeltics', 'Chicago': 'ChicagoBulls',
            'SanAntonio': 'SanAntonioSpurs', 'Portland': 'PortlandTrailBlazers',
            'Denver': 'DenverNuggets', 'Memphis': 'MemphisGrizzlies',
            'Brooklyn': 'BrooklynNets', 'Houston': 'HoustonRockets',
            'Utah': 'UtahJazz', 'Minnesota':'MinnesotaTimberwolves',
            'LALakers': 'LosAngelesLakers', 'Atlanta': 'AtlantaHawks',
            'Charlotte': 'CharlotteHornets', 'Cleveland': 'ClevelandCavaliers',
            'Sacramento': 'SacramentoKings', 'Milwaukee': 'MilwaukeeBucks'}
odds_df.replace({'Team': team_name}, inplace = True)

In [126]:
def format(date):
    'Fonction pour changer le format de l\'heure de notre dataframe'
    'Elle prend notre dataframe en entrée'
    'Elle nous retourne une date qui respecte le format AAAAMMJJ'
    
    year = path.split(' ')[2].split('.')[0].split('-')[0]
    
    if odds_df['Date'][0] <= date:
        return year + str(date)
    else:
        return str(int(year) + 1) + '0' + str(date)

In [127]:
# Formattage des données pour la création de notre identifiant
odds_df['Date'] = list(map(format, odds_df['Date']))
odds_df['ID'] = list(map(str, odds_df['Date'])) + odds_df['Team']
odds_df.tail()

Unnamed: 0,Date,Team,ML,ID
2641,20220610,BostonCeltics,-165,20220610BostonCeltics
2642,20220613,BostonCeltics,145,20220613BostonCeltics
2643,20220613,GoldenStateWarriors,-165,20220613GoldenStateWarriors
2644,20220616,GoldenStateWarriors,155,20220616GoldenStateWarriors
2645,20220616,BostonCeltics,-175,20220616BostonCeltics


In [128]:
def change_odds(x):
    'Fonction pour changer le format des cotes de notre dataframe'
    'Elle prend une cote américaine en entrée'
    'Elle nous retourne une cote qui correspond à la cote française'

    return round(1 + x / 100 , 2) if x > 0 else round(1 + 100 / np.abs(x),2)

In [129]:
# Modification du format des cotes
odds_df['ML'] = odds_df['ML'].astype(int)
odds_df['ODDS'] = odds_df['ML'].apply(lambda x : change_odds(x))
odds_df.head()

Unnamed: 0,Date,Team,ML,ID,ODDS
0,20211019,BrooklynNets,105,20211019BrooklynNets,2.05
1,20211019,MilwaukeeBucks,-125,20211019MilwaukeeBucks,1.8
2,20211019,GoldenStateWarriors,140,20211019GoldenStateWarriors,2.4
3,20211019,LosAngelesLakers,-160,20211019LosAngelesLakers,1.62
4,20211020,IndianaPacers,-125,20211020IndianaPacers,1.8


In [130]:
# Récupération du deuxième jeu de données
score_df = leaguegamefinder.LeagueGameFinder().get_data_frames()[0]
score_df.drop(['SEASON_ID', 'TEAM_ABBREVIATION', 'MIN'], axis = 1, inplace = True)
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,PTS,FGM,FGA,FG_PCT,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,1612709920,Raptors 905,2022200244,2023-02-08,RAP vs. MNE,L,117,46,91,0.505,...,0.667,8,24,32,29,6,5,9,19,-9.0
1,1612709909,Delaware Blue Coats,2022200243,2023-02-08,DEL @ CLC,W,122,45,86,0.523,...,0.909,15,27,42,26,14,4,25,16,13.6
2,1612709889,Oklahoma City Blue,2022200245,2023-02-08,OKL @ FWN,,62,24,42,0.571,...,0.571,4,17,21,15,4,2,12,11,-5.0
3,1612709910,Fort Wayne Mad Ants,2022200245,2023-02-08,FWN vs. OKL,,69,27,47,0.574,...,0.714,4,13,17,12,6,2,8,8,5.4
4,1612709893,Cleveland Charge,2022200243,2023-02-08,CLC vs. DEL,L,112,42,88,0.477,...,0.8,15,23,38,27,12,6,22,18,-14.6


In [131]:
# Suppression des tirets dans les dates et des espaces dans les noms des équipes
score_df.GAME_DATE = score_df.GAME_DATE.str.replace('-', '')
score_df.TEAM_NAME = score_df.TEAM_NAME.str.replace(' ', '')
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,PTS,FGM,FGA,FG_PCT,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,1612709920,Raptors905,2022200244,20230208,RAP vs. MNE,L,117,46,91,0.505,...,0.667,8,24,32,29,6,5,9,19,-9.0
1,1612709909,DelawareBlueCoats,2022200243,20230208,DEL @ CLC,W,122,45,86,0.523,...,0.909,15,27,42,26,14,4,25,16,13.6
2,1612709889,OklahomaCityBlue,2022200245,20230208,OKL @ FWN,,62,24,42,0.571,...,0.571,4,17,21,15,4,2,12,11,-5.0
3,1612709910,FortWayneMadAnts,2022200245,20230208,FWN vs. OKL,,69,27,47,0.574,...,0.714,4,13,17,12,6,2,8,8,5.4
4,1612709893,ClevelandCharge,2022200243,20230208,CLC vs. DEL,L,112,42,88,0.477,...,0.8,15,23,38,27,12,6,22,18,-14.6


In [132]:
# Suppression de la colonne MATCHUP et création des colonnes HOME et AWAY
score_df['HOME'] = [1 if '@' in score_df.MATCHUP[index] else 0 for index, row in score_df.iterrows()]
score_df.drop(['MATCHUP'], axis = 1, inplace = True)
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,WL,PTS,FGM,FGA,FG_PCT,FG3M,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME
0,1612709920,Raptors905,2022200244,20230208,L,117,46,91,0.505,8,...,8,24,32,29,6,5,9,19,-9.0,0
1,1612709909,DelawareBlueCoats,2022200243,20230208,W,122,45,86,0.523,15,...,15,27,42,26,14,4,25,16,13.6,1
2,1612709889,OklahomaCityBlue,2022200245,20230208,,62,24,42,0.571,6,...,4,17,21,15,4,2,12,11,-5.0,1
3,1612709910,FortWayneMadAnts,2022200245,20230208,,69,27,47,0.574,7,...,4,13,17,12,6,2,8,8,5.4,0
4,1612709893,ClevelandCharge,2022200243,20230208,L,112,42,88,0.477,13,...,15,23,38,27,12,6,22,18,-14.6,0


In [133]:
# Suppression de la colonne WL et création des colonnes W 
score_df['WL'] = score_df['WL'].astype(str)
score_df['WIN'] = [1 if 'W' in score_df.WL[index] else 0 for index, row in score_df.iterrows()]
score_df.drop(['WL'], axis = 1, inplace = True)
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME,WIN
0,1612709920,Raptors905,2022200244,20230208,117,46,91,0.505,8,31,...,24,32,29,6,5,9,19,-9.0,0,0
1,1612709909,DelawareBlueCoats,2022200243,20230208,122,45,86,0.523,15,33,...,27,42,26,14,4,25,16,13.6,1,1
2,1612709889,OklahomaCityBlue,2022200245,20230208,62,24,42,0.571,6,16,...,17,21,15,4,2,12,11,-5.0,1,0
3,1612709910,FortWayneMadAnts,2022200245,20230208,69,27,47,0.574,7,17,...,13,17,12,6,2,8,8,5.4,0,0
4,1612709893,ClevelandCharge,2022200243,20230208,112,42,88,0.477,13,36,...,23,38,27,12,6,22,18,-14.6,0,0


In [134]:
# On supprime toutes les lignes contenant une équipe qui ne fait pas partie des 30 équipes de la NBA 
teams = ['AtlantaHawks','BrooklynNets','BostonCeltics', 'CharlotteHornets', 'ChicagoBulls', 'ClevelandCavaliers', 
'DallasMavericks', 'DenverNuggets', 'DetroitPistons', 'GoldenStateWarriors', 'HoustonRockets', 'IndianaPacers', 
'LAClippers', 'LosAngelesLakers', 'MemphisGrizzlies', 'MiamiHeat', 'MilwaukeeBucks', 'MinnesotaTimberwolves',
'NewOrleansPelicans', 'NewYorkKnicks', 'OklahomaCityThunder', 'OrlandoMagic', 'Philadelphia76ers', 'PhoenixSuns', 
'PortlandTrailBlazers', 'SacramentoKings', 'SanAntonioSpurs', 'TorontoRaptors', 'UtahJazz', 'WashingtonWizards']

team_serie = pd.Series(score_df["TEAM_NAME"])
score_df = score_df[team_serie.str.contains('|'.join(teams))]

In [135]:
# On supprime toutes les lignes qui possède un GAME_ID unique
game_id_counts = score_df["GAME_ID"].value_counts()
single_occurrence_game_ids = game_id_counts[game_id_counts == 1].index

score_df = score_df[~score_df["GAME_ID"].isin(single_occurrence_game_ids)]
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,...,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,HOME,WIN
7,1610612747,LosAngelesLakers,22200821,20230207,130,49,85,0.576,14,31,...,35,41,27,8,3,20,19,-3.0,0,0
12,1610612743,DenverNuggets,22200820,20230207,146,58,93,0.624,13,27,...,38,44,44,12,5,10,21,34.0,0,1
13,1610612750,MinnesotaTimberwolves,22200820,20230207,112,40,85,0.471,10,35,...,28,36,24,6,7,17,16,-34.0,1,0
15,1610612756,PhoenixSuns,22200817,20230207,116,43,92,0.467,9,31,...,32,48,31,10,4,15,24,4.0,1,1
16,1610612740,NewOrleansPelicans,22200818,20230207,116,45,91,0.495,12,32,...,34,47,28,8,2,9,19,9.0,0,1


In [136]:
# Création d'un indice d'efficaicité lors des matchs et suppression des colonnes inutiles
score_df['EFF'] = (score_df['PTS'] + score_df['REB'] + score_df['AST'] + score_df['STL'] + score_df['BLK'] - ((score_df['FGA'] - score_df['FGM'])+(score_df['FTA']-score_df['FTM'])+score_df['TOV']))
score_df.drop(['PTS', 'REB', 'AST', 'STL', 'BLK', 'FGA', 'FGM', 'FTA', 'FTM', 'TOV', 'FG3M', 'FG3A', 'OREB', 'DREB'], axis = 1, inplace = True)
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,FG_PCT,FG3_PCT,FT_PCT,PF,PLUS_MINUS,HOME,WIN,EFF
7,1610612747,LosAngelesLakers,22200821,20230207,0.576,0.452,0.692,19,-3.0,0,0,145
12,1610612743,DenverNuggets,22200820,20230207,0.624,0.481,0.773,21,34.0,0,1,201
13,1610612750,MinnesotaTimberwolves,22200820,20230207,0.471,0.286,0.846,16,-34.0,1,0,119
15,1610612756,PhoenixSuns,22200817,20230207,0.467,0.29,0.75,24,4.0,1,1,138
16,1610612740,NewOrleansPelicans,22200818,20230207,0.495,0.375,0.667,19,9.0,0,1,139


In [137]:
# Création de notre identifiant pour le merge de nos deux dataframes
score_df['ID'] = score_df['GAME_DATE'] + score_df['TEAM_NAME']
score_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,FG_PCT,FG3_PCT,FT_PCT,PF,PLUS_MINUS,HOME,WIN,EFF,ID
7,1610612747,LosAngelesLakers,22200821,20230207,0.576,0.452,0.692,19,-3.0,0,0,145,20230207LosAngelesLakers
12,1610612743,DenverNuggets,22200820,20230207,0.624,0.481,0.773,21,34.0,0,1,201,20230207DenverNuggets
13,1610612750,MinnesotaTimberwolves,22200820,20230207,0.471,0.286,0.846,16,-34.0,1,0,119,20230207MinnesotaTimberwolves
15,1610612756,PhoenixSuns,22200817,20230207,0.467,0.29,0.75,24,4.0,1,1,138,20230207PhoenixSuns
16,1610612740,NewOrleansPelicans,22200818,20230207,0.495,0.375,0.667,19,9.0,0,1,139,20230207NewOrleansPelicans


In [138]:
# Merge des deux dataframes
df_merge = score_df.merge(odds_df, left_on = 'ID', right_on='ID')
df_merge.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,FG_PCT,FG3_PCT,FT_PCT,PF,PLUS_MINUS,HOME,WIN,EFF,ID,Date,Team,ML,ODDS
0,1610612738,BostonCeltics,42100406,20220616,0.425,0.393,0.917,16,-13.0,0,0,105,20220616BostonCeltics,20220616,BostonCeltics,-175,1.57
1,1610612744,GoldenStateWarriors,42100406,20220616,0.413,0.413,1.0,20,13.0,1,1,125,20220616GoldenStateWarriors,20220616,GoldenStateWarriors,155,2.55
2,1610612738,BostonCeltics,42100405,20220613,0.413,0.344,0.677,16,-10.0,1,0,91,20220613BostonCeltics,20220613,BostonCeltics,145,2.45
3,1610612744,GoldenStateWarriors,42100405,20220613,0.466,0.225,0.867,28,10.0,0,1,122,20220613GoldenStateWarriors,20220613,GoldenStateWarriors,-165,1.61
4,1610612738,BostonCeltics,42100404,20220610,0.4,0.395,0.737,17,-10.0,0,0,104,20220610BostonCeltics,20220610,BostonCeltics,-165,1.61


In [139]:
# Suppression des dernières colonnes inutiles
df_merge.drop(['PF', 'ID', 'Date', 'Team', 'ML'], axis = 1, inplace = True)
df_merge.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,FG_PCT,FG3_PCT,FT_PCT,PLUS_MINUS,HOME,WIN,EFF,ODDS
0,1610612738,BostonCeltics,42100406,20220616,0.425,0.393,0.917,-13.0,0,0,105,1.57
1,1610612744,GoldenStateWarriors,42100406,20220616,0.413,0.413,1.0,13.0,1,1,125,2.55
2,1610612738,BostonCeltics,42100405,20220613,0.413,0.344,0.677,-10.0,1,0,91,2.45
3,1610612744,GoldenStateWarriors,42100405,20220613,0.466,0.225,0.867,10.0,0,1,122,1.61
4,1610612738,BostonCeltics,42100404,20220610,0.4,0.395,0.737,-10.0,0,0,104,1.61


In [140]:
# Tri des données par date dans l'ordre décroissant
df_merge.sort_values(by = 'GAME_DATE', ascending = False, inplace = True)

In [141]:
# TODO: Rajouter une cartouche pour expliquer ce que l'on fait
# Remplacement des valeurs par des moyennes mobiles
def replace_with_rolling_mean(df_merge, window=10):
    for team in teams:
        df_team = df_merge[df_merge['TEAM_NAME'] == team]
        max_len = window
        for index, row in df_team.iterrows():
            if index + max_len > len(df_team)-1:
                max_len = len(df_team)-1 - index
            if (index + 1) <= len(df_team)-1:
                df_team.loc[index, 'FG_PCT'] = df_team.loc[(index + 1):(index + max_len), 'FG_PCT'].mean()
                df_team.loc[index, 'FG3_PCT'] = df_team.loc[(index + 1):(index + max_len), 'FG3_PCT'].mean()
                df_team.loc[index, 'FT_PCT'] = df_team.loc[(index + 1):(index + max_len), 'FT_PCT'].mean()
                df_team.loc[index, 'PLUS_MINUS'] = df_team.loc[(index + 1):(index + max_len), 'PLUS_MINUS'].mean()
                df_team.loc[index, 'EFF'] = df_team.loc[(index + 1):(index + max_len), 'EFF'].mean()
        df_merge[df_merge['TEAM_NAME'] == team] = df_team


replace_with_rolling_mean(df_merge, window=100)
df_merge.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,FG_PCT,FG3_PCT,FT_PCT,PLUS_MINUS,HOME,WIN,EFF,ODDS
0,1610612738,BostonCeltics,42100406,20220616,0.4455,0.368333,0.790167,5.555556,0,0,122.333333,1.57
1,1610612744,GoldenStateWarriors,42100406,20220616,0.476562,0.361687,0.787562,2.6875,1,1,130.6875,2.55
2,1610612738,BostonCeltics,42100405,20220613,0.447412,0.369765,0.796824,6.470588,1,0,124.176471,2.45
3,1610612744,GoldenStateWarriors,42100405,20220613,0.477267,0.3708,0.782267,2.2,0,1,131.266667,1.61
4,1610612738,BostonCeltics,42100404,20220610,0.450375,0.368188,0.800562,7.5,0,0,125.4375,1.61


In [142]:
# On supprime les lignes qui possèdent des valeurs nulles
df_merge.dropna(inplace=True)

# On supprime toutes les lignes qui possède un GAME_ID unique
game_id_counts = df_merge["GAME_ID"].value_counts()
single_occurrence_game_ids = game_id_counts[game_id_counts == 1].index

df_merge = df_merge[~df_merge["GAME_ID"].isin(single_occurrence_game_ids)]
df_merge.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GAME_ID,GAME_DATE,FG_PCT,FG3_PCT,FT_PCT,PLUS_MINUS,HOME,WIN,EFF,ODDS
0,1610612738,BostonCeltics,42100406,20220616,0.4455,0.368333,0.790167,5.555556,0,0,122.333333,1.57
1,1610612744,GoldenStateWarriors,42100406,20220616,0.476562,0.361687,0.787562,2.6875,1,1,130.6875,2.55
2,1610612738,BostonCeltics,42100405,20220613,0.447412,0.369765,0.796824,6.470588,1,0,124.176471,2.45
3,1610612744,GoldenStateWarriors,42100405,20220613,0.477267,0.3708,0.782267,2.2,0,1,131.266667,1.61
4,1610612738,BostonCeltics,42100404,20220610,0.450375,0.368188,0.800562,7.5,0,0,125.4375,1.61


In [143]:
# Séparation des données en 2 datasets : un dataset par equipe pour chaque match
df_home = df_merge[df_merge['HOME'] == 1]
df_away = df_merge[df_merge['HOME'] == 0]

# Fusion des deux dataset pour avoir un match par ligne
df_final = pd.merge(df_home, df_away, on = 'GAME_ID', suffixes = ('_home', '_away'))
df_final.head()

Unnamed: 0,TEAM_ID_home,TEAM_NAME_home,GAME_ID,GAME_DATE_home,FG_PCT_home,FG3_PCT_home,FT_PCT_home,PLUS_MINUS_home,HOME_home,WIN_home,...,TEAM_NAME_away,GAME_DATE_away,FG_PCT_away,FG3_PCT_away,FT_PCT_away,PLUS_MINUS_away,HOME_away,WIN_away,EFF_away,ODDS_away
0,1610612744,GoldenStateWarriors,42100406,20220616,0.476562,0.361687,0.787562,2.6875,1,1,...,BostonCeltics,20220616,0.4455,0.368333,0.790167,5.555556,0,0,122.333333,1.57
1,1610612738,BostonCeltics,42100405,20220613,0.447412,0.369765,0.796824,6.470588,1,0,...,GoldenStateWarriors,20220613,0.477267,0.3708,0.782267,2.2,0,1,131.266667,1.61
2,1610612744,GoldenStateWarriors,42100404,20220610,0.479929,0.372357,0.781,1.642857,1,1,...,BostonCeltics,20220610,0.450375,0.368188,0.800562,7.5,0,0,125.4375,1.61
3,1610612744,GoldenStateWarriors,42100403,20220608,0.481308,0.372154,0.774385,3.0,1,0,...,BostonCeltics,20220608,0.4482,0.368,0.806733,6.933333,0,1,124.466667,1.65
4,1610612738,BostonCeltics,42100401,20220602,0.444071,0.357714,0.806286,6.571429,1,1,...,GoldenStateWarriors,20220602,0.4845,0.368,0.777833,4.25,0,0,134.916667,1.61


In [144]:
# TODO: Remplacer les valeurs H et A par le nom de l'équipe
# Création de la colonne WIN qui contient H si l'équipe à domicile a gagné, A si l'équipe à l'extérieur a gagné
df_final['WIN'] = ['H' if df_final.WIN_home[index] == 1 else 'A' for index, row in df_final.iterrows()]

# Suppression des colonnes inutiles
df_final.drop(['TEAM_ID_home', 'HOME_home', 'WIN_home', 'TEAM_ID_away', 'GAME_DATE_away', 'HOME_away', 'WIN_away'], axis = 1, inplace = True)

# Renommage des colonnes
df_final.rename(columns = {'GAME_DATE_home' : 'DATE', 'TEAM_NAME_home' : 'HOME', 'TEAM_NAME_away' : 'AWAY'}, inplace = True)

# Déplacement des colonnes GANME_ID et DATE au début du dataframe
cols = df_final.columns.tolist()
cols = cols[1:3] + cols[0:1] + cols[3:]
df_final = df_final[cols]

# Renommage des colonnes pour passer tout en uppercase
df_final.columns = df_final.columns.str.upper()

df_final.head()

Unnamed: 0,GAME_ID,DATE,HOME,FG_PCT_HOME,FG3_PCT_HOME,FT_PCT_HOME,PLUS_MINUS_HOME,EFF_HOME,ODDS_HOME,AWAY,FG_PCT_AWAY,FG3_PCT_AWAY,FT_PCT_AWAY,PLUS_MINUS_AWAY,EFF_AWAY,ODDS_AWAY,WIN
0,42100406,20220616,GoldenStateWarriors,0.476562,0.361687,0.787562,2.6875,130.6875,2.55,BostonCeltics,0.4455,0.368333,0.790167,5.555556,122.333333,1.57,H
1,42100405,20220613,BostonCeltics,0.447412,0.369765,0.796824,6.470588,124.176471,2.45,GoldenStateWarriors,0.477267,0.3708,0.782267,2.2,131.266667,1.61,A
2,42100404,20220610,GoldenStateWarriors,0.479929,0.372357,0.781,1.642857,131.571429,2.45,BostonCeltics,0.450375,0.368188,0.800562,7.5,125.4375,1.61,H
3,42100403,20220608,GoldenStateWarriors,0.481308,0.372154,0.774385,3.0,133.615385,2.35,BostonCeltics,0.4482,0.368,0.806733,6.933333,124.466667,1.65,A
4,42100401,20220602,BostonCeltics,0.444071,0.357714,0.806286,6.571429,122.785714,2.45,GoldenStateWarriors,0.4845,0.368,0.777833,4.25,134.916667,1.61,H


In [145]:
# Sauvegarde de notre dataframe
df_final.to_csv('preprocessed_data.csv', index = False)