# Libraries

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_columns', None)



# Data

In [2]:
games = pd.read_csv('./data/games.csv')
games_details = pd.read_csv('./data/games_details.csv')

players = pd.read_csv('./data/players.csv')

teams = pd.read_csv('./data/teams.csv')
ranking = pd.read_csv('./data/ranking.csv')

In [3]:
# games.head()
# games_details.head()

In [4]:
games['GAME_DATE_EST'] = pd.to_datetime(games['GAME_DATE_EST'], format='%Y-%m-%d')

## GAMES DETAILS

In [5]:
df = pd.merge(games_details, games[['GAME_ID', 'SEASON', 'GAME_DATE_EST']], on='GAME_ID', how='inner')
df = df.sort_values('GAME_DATE_EST')

df['MIN'] = pd.to_numeric(df['MIN'].str.strip(':').str[0:2], errors='coerce')

In [6]:
last_n_games = [5, 10]
columns_to_agg = ['MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 
                  'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 
                  'STL', 'BLK', 'TO', 'PF', 'PTS', 'PLUS_MINUS']

for game in last_n_games:
    for col in columns_to_agg:
        df[f'AVG_{col}_LAST_{game}'] = df.groupby(['SEASON', 'PLAYER_ID'])[col].shift(1).rolling(game, min_periods=1).mean()
        
    df[f'GAMES_PLAYED_LAST_{game}'] = df.groupby(['SEASON', 'PLAYER_ID'])['MIN'].shift(1).rolling(game + 1, min_periods=1).count()/game

In [7]:
df.tail()

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,SEASON,GAME_DATE_EST,AVG_MIN_LAST_5,AVG_FGM_LAST_5,AVG_FGA_LAST_5,AVG_FG_PCT_LAST_5,AVG_FG3M_LAST_5,AVG_FG3A_LAST_5,AVG_FG3_PCT_LAST_5,AVG_FTM_LAST_5,AVG_FTA_LAST_5,AVG_FT_PCT_LAST_5,AVG_OREB_LAST_5,AVG_DREB_LAST_5,AVG_REB_LAST_5,AVG_AST_LAST_5,AVG_STL_LAST_5,AVG_BLK_LAST_5,AVG_TO_LAST_5,AVG_PF_LAST_5,AVG_PTS_LAST_5,AVG_PLUS_MINUS_LAST_5,GAMES_PLAYED_LAST_5,AVG_MIN_LAST_10,AVG_FGM_LAST_10,AVG_FGA_LAST_10,AVG_FG_PCT_LAST_10,AVG_FG3M_LAST_10,AVG_FG3A_LAST_10,AVG_FG3_PCT_LAST_10,AVG_FTM_LAST_10,AVG_FTA_LAST_10,AVG_FT_PCT_LAST_10,AVG_OREB_LAST_10,AVG_DREB_LAST_10,AVG_REB_LAST_10,AVG_AST_LAST_10,AVG_STL_LAST_10,AVG_BLK_LAST_10,AVG_TO_LAST_10,AVG_PF_LAST_10,AVG_PTS_LAST_10,AVG_PLUS_MINUS_LAST_10,GAMES_PLAYED_LAST_10
59,42000142,1610612763,MEM,Memphis,203937,Kyle Anderson,F,,36.0,5.0,7.0,0.714,1.0,2.0,0.5,0.0,0.0,0.0,1.0,5.0,6.0,3.0,4.0,0.0,2.0,2.0,11.0,2.0,2020,2021-05-26,32.6,7.6,15.8,0.4548,0.8,2.6,0.22,2.6,3.6,0.75,2.4,3.6,6.0,2.4,2.4,0.8,1.2,3.6,18.6,5.0,1.0,25.333333,4.5,10.0,0.3857,0.7,2.0,0.2267,1.5,2.1,0.475,1.6,2.6,4.2,1.8,1.2,0.5,0.9,2.3,11.2,1.5,0.9
58,42000132,1610612752,NYK,New York,1629033,Theo Pinson,,DNP - Coach's Decision,,,,,,,,,,,,,,,,,,,,,2020,2021-05-26,32.0,6.75,14.5,0.4375,1.0,3.0,0.275,2.25,3.5,0.6875,2.75,3.75,6.5,2.0,2.75,1.0,1.25,4.0,16.75,4.75,0.8,27.0,4.777778,10.444444,0.391556,0.777778,2.222222,0.251889,1.444444,2.111111,0.416667,1.666667,2.666667,4.333333,1.666667,1.333333,0.555556,1.0,2.555556,11.777778,2.0,0.8
57,42000132,1610612752,NYK,New York,203658,Norvel Pelle,,DNP - Coach's Decision,,,,,,,,,,,,,,,,,,,,,2020,2021-05-26,30.666667,4.666667,10.666667,0.416667,0.666667,2.333333,0.233333,2.0,2.666667,0.75,2.666667,3.666667,6.333333,2.0,3.0,0.666667,1.333333,4.666667,12.0,1.666667,0.6,27.0,5.375,11.375,0.4405,0.875,2.5,0.283375,1.625,2.375,0.46875,1.75,2.875,4.625,1.875,1.5,0.625,1.0,2.625,13.25,3.5,0.8
71,42000142,1610612763,MEM,Memphis,1629723,John Konchar,,DNP - Coach's Decision,,,,,,,,,,,,,,,,,,,,,2020,2021-05-26,26.5,3.5,9.0,0.375,1.0,3.5,0.35,2.5,3.0,0.875,1.0,2.5,3.5,1.5,4.5,0.5,1.0,4.5,10.5,-5.0,0.4,29.0,6.142857,12.428571,0.503429,1.0,2.571429,0.323857,1.857143,2.714286,0.535714,1.857143,3.142857,5.0,1.857143,1.714286,0.571429,1.0,2.857143,15.142857,5.428571,0.7
0,42000102,1610612764,WAS,Washington,203078,Bradley Beal,F,,34.0,14.0,28.0,0.5,1.0,6.0,0.167,4.0,6.0,0.667,0.0,4.0,4.0,3.0,1.0,0.0,1.0,0.0,33.0,-22.0,2020,2021-05-26,35.0,9.0,16.5,0.5325,1.0,4.0,0.3335,4.5,5.0,0.875,1.5,5.5,7.0,4.5,3.5,0.0,3.5,2.5,23.5,2.5,0.4,31.571429,7.714286,15.142857,0.512714,1.0,3.142857,0.276286,2.714286,3.428571,0.678571,2.142857,4.142857,6.285714,2.714286,1.857143,0.571429,1.857143,2.571429,19.142857,6.285714,0.7


## GAMES

In [None]:
col_home = ['GAME_DATE_EST', 'GAME_ID', 'HOME_TEAM_ID', 'SEASON', 
            'PTS_home', 'FG_PCT_home', 'FT_PCT_home', 'FG3_PCT_home', 
            'AST_home', 'REB_home', 'HOME_TEAM_WINS']

col_guest = ['GAME_DATE_EST', 'GAME_ID', 'VISITOR_TEAM_ID', 'SEASON', 
            'PTS_away', 'FG_PCT_away', 'FT_PCT_away', 'FG3_PCT_away', 
            'AST_away', 'REB_away', 'HOME_TEAM_WINS']

col_names = ['GAME_DATE_EST', 'GAME_ID', 'TEAM_ID', 'SEASON', 
            'PTS', 'FG_PCT', 'FT_PCT', 'FG3_PCT', 
            'AST', 'REB', 'TEAM_WINS']

games_home = games[col_home]
games_home.columns = col_names
games_home = games_home.sort_values('GAME_DATE_EST')

games_guest = games[col_guest]
games_guest['HOME_TEAM_WINS'] = 1 - games['HOME_TEAM_WINS']
games_guest.columns = col_names
games_guest = games_guest.sort_values('GAME_DATE_EST')

all_games = pd.concat([games_home, games_guest])
all_games = all_games.sort_values('GAME_DATE_EST')

# FIRST VERSION

In [None]:
df = df.drop('COMMENT',axis=1)

In [None]:
def fill_na_cat(dataset, columns):
    for column in columns:
        value_for_swap = dataset[column].describe().top
        dataset[column] = dataset[column].fillna(value_for_swap)
    return pd.DataFrame(dataset)
        

def fill_na_num(dataset, columns):
    for column in columns:
        value_for_swap = 0
        dataset[column] = dataset[column].fillna(value_for_swap)
    return pd.DataFrame(dataset)

numerics = df.select_dtypes(np.number)
cats = df.select_dtypes(object)


num_cols = list(numerics.columns)
cat_cols = list(cats.columns)

In [None]:
numerics = fill_na_num(numerics, num_cols)

df = pd.concat([cats, numerics], axis=1)

In [None]:
df['MIN'] = df['MIN'].fillna(0)

In [None]:
df['START_POSITION'] = df['START_POSITION'].fillna('G')

In [None]:
df.info()

In [None]:
testcol = df.groupby(['PLAYER_ID', 'SEASON'])['PTS'].shift(1).rolling(5, min_periods=1).mean()

In [None]:
testcol