# Libraries

In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_columns', None)

# Data

In [4]:
games = pd.read_csv('./data/games.csv')
games_details = pd.read_csv('./data/games_details.csv')

players = pd.read_csv('./data/players.csv')

teams = pd.read_csv('./data/teams.csv')
ranking = pd.read_csv('./data/ranking.csv')

In [5]:
games['GAME_DATE_EST'] = pd.to_datetime(games['GAME_DATE_EST'], format='%Y-%m-%d')

## GAMES DETAILS

In [6]:
df = pd.merge(games_details, games[['GAME_ID', 'SEASON', 'GAME_DATE_EST']], on='GAME_ID', how='inner')
df = df.sort_values('GAME_DATE_EST')

df['MIN'] = pd.to_numeric(df['MIN'].str.strip(':').str[0:2], errors='coerce')

In [7]:
last_n_games = [5, 10]
columns_to_agg = ['MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 
                  'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 
                  'STL', 'BLK', 'TO', 'PF', 'PTS', 'PLUS_MINUS']

for game in last_n_games:
    for col in columns_to_agg:
        df[f'AVG_{col}_LAST_{game}'] = df.groupby(['SEASON', 'PLAYER_ID'])[col].shift(1).rolling(game, min_periods=1).mean()
        
    df[f'GAMES_PLAYED_LAST_{game}'] = df.groupby(['SEASON', 'PLAYER_ID'])['MIN'].shift(1).rolling(game + 1, min_periods=1).count()/game

In [8]:
df.tail()

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,SEASON,GAME_DATE_EST,AVG_MIN_LAST_5,AVG_FGM_LAST_5,AVG_FGA_LAST_5,AVG_FG_PCT_LAST_5,AVG_FG3M_LAST_5,AVG_FG3A_LAST_5,AVG_FG3_PCT_LAST_5,AVG_FTM_LAST_5,AVG_FTA_LAST_5,AVG_FT_PCT_LAST_5,AVG_OREB_LAST_5,AVG_DREB_LAST_5,AVG_REB_LAST_5,AVG_AST_LAST_5,AVG_STL_LAST_5,AVG_BLK_LAST_5,AVG_TO_LAST_5,AVG_PF_LAST_5,AVG_PTS_LAST_5,AVG_PLUS_MINUS_LAST_5,GAMES_PLAYED_LAST_5,AVG_MIN_LAST_10,AVG_FGM_LAST_10,AVG_FGA_LAST_10,AVG_FG_PCT_LAST_10,AVG_FG3M_LAST_10,AVG_FG3A_LAST_10,AVG_FG3_PCT_LAST_10,AVG_FTM_LAST_10,AVG_FTA_LAST_10,AVG_FT_PCT_LAST_10,AVG_OREB_LAST_10,AVG_DREB_LAST_10,AVG_REB_LAST_10,AVG_AST_LAST_10,AVG_STL_LAST_10,AVG_BLK_LAST_10,AVG_TO_LAST_10,AVG_PF_LAST_10,AVG_PTS_LAST_10,AVG_PLUS_MINUS_LAST_10,GAMES_PLAYED_LAST_10
59,42000142,1610612763,MEM,Memphis,203937,Kyle Anderson,F,,36.0,5.0,7.0,0.714,1.0,2.0,0.5,0.0,0.0,0.0,1.0,5.0,6.0,3.0,4.0,0.0,2.0,2.0,11.0,2.0,2020,2021-05-26,32.6,7.6,15.8,0.4548,0.8,2.6,0.22,2.6,3.6,0.75,2.4,3.6,6.0,2.4,2.4,0.8,1.2,3.6,18.6,5.0,1.2,25.333333,4.5,10.0,0.3857,0.7,2.0,0.2267,1.5,2.1,0.475,1.6,2.6,4.2,1.8,1.2,0.5,0.9,2.3,11.2,1.5,0.9
58,42000132,1610612752,NYK,New York,1629033,Theo Pinson,,DNP - Coach's Decision,,,,,,,,,,,,,,,,,,,,,2020,2021-05-26,32.0,6.75,14.5,0.4375,1.0,3.0,0.275,2.25,3.5,0.6875,2.75,3.75,6.5,2.0,2.75,1.0,1.25,4.0,16.75,4.75,1.0,27.0,4.777778,10.444444,0.391556,0.777778,2.222222,0.251889,1.444444,2.111111,0.416667,1.666667,2.666667,4.333333,1.666667,1.333333,0.555556,1.0,2.555556,11.777778,2.0,0.9
57,42000132,1610612752,NYK,New York,203658,Norvel Pelle,,DNP - Coach's Decision,,,,,,,,,,,,,,,,,,,,,2020,2021-05-26,30.666667,4.666667,10.666667,0.416667,0.666667,2.333333,0.233333,2.0,2.666667,0.75,2.666667,3.666667,6.333333,2.0,3.0,0.666667,1.333333,4.666667,12.0,1.666667,0.8,27.0,5.375,11.375,0.4405,0.875,2.5,0.283375,1.625,2.375,0.46875,1.75,2.875,4.625,1.875,1.5,0.625,1.0,2.625,13.25,3.5,0.8
71,42000142,1610612763,MEM,Memphis,1629723,John Konchar,,DNP - Coach's Decision,,,,,,,,,,,,,,,,,,,,,2020,2021-05-26,26.5,3.5,9.0,0.375,1.0,3.5,0.35,2.5,3.0,0.875,1.0,2.5,3.5,1.5,4.5,0.5,1.0,4.5,10.5,-5.0,0.6,29.0,6.142857,12.428571,0.503429,1.0,2.571429,0.323857,1.857143,2.714286,0.535714,1.857143,3.142857,5.0,1.857143,1.714286,0.571429,1.0,2.857143,15.142857,5.428571,0.8
0,42000102,1610612764,WAS,Washington,203078,Bradley Beal,F,,34.0,14.0,28.0,0.5,1.0,6.0,0.167,4.0,6.0,0.667,0.0,4.0,4.0,3.0,1.0,0.0,1.0,0.0,33.0,-22.0,2020,2021-05-26,35.0,9.0,16.5,0.5325,1.0,4.0,0.3335,4.5,5.0,0.875,1.5,5.5,7.0,4.5,3.5,0.0,3.5,2.5,23.5,2.5,0.6,31.571429,7.714286,15.142857,0.512714,1.0,3.142857,0.276286,2.714286,3.428571,0.678571,2.142857,4.142857,6.285714,2.714286,1.857143,0.571429,1.857143,2.571429,19.142857,6.285714,0.8


## GAMES

In [9]:
col_home = ['GAME_DATE_EST', 'GAME_ID', 'HOME_TEAM_ID','HOME_TEAM_ID','SEASON', 
            'PTS_home', 'FG_PCT_home', 'FT_PCT_home', 'FG3_PCT_home', 
            'AST_home', 'REB_home', 'HOME_TEAM_WINS']

col_guest = ['GAME_DATE_EST', 'GAME_ID','HOME_TEAM_ID','VISITOR_TEAM_ID', 'SEASON', 
            'PTS_away', 'FG_PCT_away', 'FT_PCT_away', 'FG3_PCT_away', 
            'AST_away', 'REB_away', 'HOME_TEAM_WINS']

col_names = ['GAME_DATE_EST', 'GAME_ID', 'HOME_TEAM_ID', 'TEAM_ID', 'SEASON', 
            'PTS', 'FG_PCT', 'FT_PCT', 'FG3_PCT', 
            'AST', 'REB', 'TEAM_WINS']

games_home = games[col_home]
games_home.columns = col_names
games_home = games_home.sort_values('GAME_DATE_EST')

games_guest = games[col_guest]
games_guest['HOME_TEAM_WINS'] = 1 - games['HOME_TEAM_WINS']
games_guest.columns = col_names
games_guest = games_guest.sort_values('GAME_DATE_EST')

all_games = pd.concat([games_home, games_guest])
all_games = all_games.sort_values('GAME_DATE_EST')

In [10]:
last_n_games = [5, 10]
columns_to_agg = ['PTS','FG_PCT','FT_PCT','FG3_PCT',
                  'AST','REB','TEAM_WINS']
for game in last_n_games:
    for col in columns_to_agg:
        all_games[f'AVG_{col}_LAST_{game}'] = all_games.groupby(['SEASON', 'GAME_ID'])[col].shift(1).rolling(game, min_periods=1).mean()

In [11]:
all_games.tail()

Unnamed: 0,GAME_DATE_EST,GAME_ID,HOME_TEAM_ID,TEAM_ID,SEASON,PTS,FG_PCT,FT_PCT,FG3_PCT,AST,REB,TEAM_WINS,AVG_PTS_LAST_5,AVG_FG_PCT_LAST_5,AVG_FT_PCT_LAST_5,AVG_FG3_PCT_LAST_5,AVG_AST_LAST_5,AVG_REB_LAST_5,AVG_TEAM_WINS_LAST_5,AVG_PTS_LAST_10,AVG_FG_PCT_LAST_10,AVG_FT_PCT_LAST_10,AVG_FG3_PCT_LAST_10,AVG_AST_LAST_10,AVG_REB_LAST_10,AVG_TEAM_WINS_LAST_10
0,2021-05-26,42000102,1610612755,1610612755,2020,120.0,0.557,0.684,0.429,26.0,45.0,1,114.5,0.525,0.7375,0.4185,23.0,32.5,0.5,108.75,0.469,0.736,0.369,22.25,36.0,0.25
1,2021-05-26,42000132,1610612752,1610612752,2020,101.0,0.383,0.739,0.364,15.0,54.0,1,114.5,0.525,0.7375,0.4185,23.0,32.5,0.5,108.75,0.469,0.736,0.369,22.25,36.0,0.25
2,2021-05-26,42000142,1610612762,1610612762,2020,141.0,0.544,0.774,0.487,28.0,42.0,1,115.5,0.503,0.848,0.328,20.5,32.0,0.0,116.5,0.50375,0.75525,0.3845,22.25,35.25,0.25
1,2021-05-26,42000132,1610612752,1610612737,2020,92.0,0.369,0.818,0.273,17.0,41.0,0,115.0,0.462,0.751,0.356,17.5,43.5,0.5,113.4,0.4796,0.752,0.3804,20.8,39.0,0.4
0,2021-05-26,42000102,1610612755,1610612764,2020,95.0,0.402,0.633,0.091,22.0,40.0,0,116.666667,0.493667,0.728667,0.380333,20.333333,44.0,0.666667,114.5,0.4925,0.740667,0.3885,21.666667,40.0,0.5


In [12]:
nightlife = pd.read_excel('./data/nightlife.xlsx')

In [14]:
df1 = pd.merge(df, all_games, on=['GAME_ID', 'TEAM_ID'], how='inner')

In [16]:
df1.tail()

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,START_POSITION,COMMENT,MIN,FGM,FGA,FG_PCT_x,FG3M,FG3A,FG3_PCT_x,FTM,FTA,FT_PCT_x,OREB,DREB,REB_x,AST_x,STL,BLK,TO,PF,PTS_x,PLUS_MINUS,SEASON_x,GAME_DATE_EST_x,AVG_MIN_LAST_5,AVG_FGM_LAST_5,AVG_FGA_LAST_5,AVG_FG_PCT_LAST_5_x,AVG_FG3M_LAST_5,AVG_FG3A_LAST_5,AVG_FG3_PCT_LAST_5_x,AVG_FTM_LAST_5,AVG_FTA_LAST_5,AVG_FT_PCT_LAST_5_x,AVG_OREB_LAST_5,AVG_DREB_LAST_5,AVG_REB_LAST_5_x,AVG_AST_LAST_5_x,AVG_STL_LAST_5,AVG_BLK_LAST_5,AVG_TO_LAST_5,AVG_PF_LAST_5,AVG_PTS_LAST_5_x,AVG_PLUS_MINUS_LAST_5,GAMES_PLAYED_LAST_5,AVG_MIN_LAST_10,AVG_FGM_LAST_10,AVG_FGA_LAST_10,AVG_FG_PCT_LAST_10_x,AVG_FG3M_LAST_10,AVG_FG3A_LAST_10,AVG_FG3_PCT_LAST_10_x,AVG_FTM_LAST_10,AVG_FTA_LAST_10,AVG_FT_PCT_LAST_10_x,AVG_OREB_LAST_10,AVG_DREB_LAST_10,AVG_REB_LAST_10_x,AVG_AST_LAST_10_x,AVG_STL_LAST_10,AVG_BLK_LAST_10,AVG_TO_LAST_10,AVG_PF_LAST_10,AVG_PTS_LAST_10_x,AVG_PLUS_MINUS_LAST_10,GAMES_PLAYED_LAST_10,GAME_DATE_EST_y,HOME_TEAM_ID,SEASON_y,PTS_y,FG_PCT_y,FT_PCT_y,FG3_PCT_y,AST_y,REB_y,TEAM_WINS,AVG_PTS_LAST_5_y,AVG_FG_PCT_LAST_5_y,AVG_FT_PCT_LAST_5_y,AVG_FG3_PCT_LAST_5_y,AVG_AST_LAST_5_y,AVG_REB_LAST_5_y,AVG_TEAM_WINS_LAST_5,AVG_PTS_LAST_10_y,AVG_FG_PCT_LAST_10_y,AVG_FT_PCT_LAST_10_y,AVG_FG3_PCT_LAST_10_y,AVG_AST_LAST_10_y,AVG_REB_LAST_10_y,AVG_TEAM_WINS_LAST_10
621302,42000142,1610612763,MEM,Memphis,1628415,Dillon Brooks,G,,28.0,10.0,14.0,0.714,2.0,2.0,1.0,1.0,1.0,1.0,0.0,2.0,2.0,1.0,1.0,0.0,1.0,5.0,23.0,3.0,2020,2021-05-26,24.8,5.8,11.8,0.4548,1.0,2.6,0.3134,1.4,2.2,0.3,1.2,2.4,3.6,1.8,0.6,0.6,0.8,1.4,14.0,4.6,1.0,22.666667,4.428571,9.714286,0.372429,0.714286,1.857143,0.223857,1.285714,1.857143,0.357143,1.142857,2.142857,3.285714,1.714286,0.428571,0.428571,0.714286,1.285714,10.857143,1.428571,0.6,2021-05-26,1610612762,2020,129.0,0.541,0.763,0.348,20.0,33.0,0,112.333333,0.491333,0.752667,0.396667,23.0,36.0,0.333333,112.6,0.4822,0.7488,0.381,23.6,35.8,0.4
621303,42000142,1610612763,MEM,Memphis,202685,Jonas Valanciunas,C,,29.0,7.0,12.0,0.583,0.0,1.0,0.0,4.0,4.0,1.0,3.0,3.0,6.0,3.0,0.0,1.0,2.0,4.0,18.0,5.0,2020,2021-05-26,30.0,7.2,13.8,0.5548,1.0,2.2,0.3134,1.6,2.6,0.4,2.2,3.4,5.6,2.0,0.6,0.6,1.0,2.2,17.0,9.6,1.2,25.0,4.75,10.25,0.388375,0.625,1.625,0.195875,1.25,1.875,0.375,1.75,2.625,4.375,1.875,0.375,0.5,0.875,1.75,11.375,3.125,0.7,2021-05-26,1610612762,2020,129.0,0.541,0.763,0.348,20.0,33.0,0,112.333333,0.491333,0.752667,0.396667,23.0,36.0,0.333333,112.6,0.4822,0.7488,0.381,23.6,35.8,0.4
621304,42000142,1610612763,MEM,Memphis,1628991,Jaren Jackson Jr.,F,,31.0,3.0,6.0,0.5,1.0,2.0,0.5,9.0,11.0,0.818,1.0,2.0,3.0,1.0,0.0,1.0,2.0,4.0,16.0,3.0,2020,2021-05-26,30.2,7.2,14.6,0.5048,1.0,2.8,0.2534,2.0,2.8,0.6,2.4,3.6,6.0,2.0,1.2,0.8,1.2,2.6,17.4,7.8,1.2,24.75,4.444444,10.0,0.373,0.666667,2.0,0.196333,1.333333,1.888889,0.444444,1.666667,2.555556,4.222222,1.666667,0.666667,0.555556,0.888889,2.0,10.888889,0.666667,0.8,2021-05-26,1610612762,2020,129.0,0.541,0.763,0.348,20.0,33.0,0,112.333333,0.491333,0.752667,0.396667,23.0,36.0,0.333333,112.6,0.4822,0.7488,0.381,23.6,35.8,0.4
621305,42000142,1610612763,MEM,Memphis,203937,Kyle Anderson,F,,36.0,5.0,7.0,0.714,1.0,2.0,0.5,0.0,0.0,0.0,1.0,5.0,6.0,3.0,4.0,0.0,2.0,2.0,11.0,2.0,2020,2021-05-26,32.6,7.6,15.8,0.4548,0.8,2.6,0.22,2.6,3.6,0.75,2.4,3.6,6.0,2.4,2.4,0.8,1.2,3.6,18.6,5.0,1.2,25.333333,4.5,10.0,0.3857,0.7,2.0,0.2267,1.5,2.1,0.475,1.6,2.6,4.2,1.8,1.2,0.5,0.9,2.3,11.2,1.5,0.9,2021-05-26,1610612762,2020,129.0,0.541,0.763,0.348,20.0,33.0,0,112.333333,0.491333,0.752667,0.396667,23.0,36.0,0.333333,112.6,0.4822,0.7488,0.381,23.6,35.8,0.4
621306,42000142,1610612763,MEM,Memphis,1629723,John Konchar,,DNP - Coach's Decision,,,,,,,,,,,,,,,,,,,,,2020,2021-05-26,26.5,3.5,9.0,0.375,1.0,3.5,0.35,2.5,3.0,0.875,1.0,2.5,3.5,1.5,4.5,0.5,1.0,4.5,10.5,-5.0,0.6,29.0,6.142857,12.428571,0.503429,1.0,2.571429,0.323857,1.857143,2.714286,0.535714,1.857143,3.142857,5.0,1.857143,1.714286,0.571429,1.0,2.857143,15.142857,5.428571,0.8,2021-05-26,1610612762,2020,129.0,0.541,0.763,0.348,20.0,33.0,0,112.333333,0.491333,0.752667,0.396667,23.0,36.0,0.333333,112.6,0.4822,0.7488,0.381,23.6,35.8,0.4


In [17]:
df1 = pd.merge(df1, players, on='PLAYER_ID', how='inner')

In [20]:
df1 = df1.loc[df1['MIN'] > 20]

In [21]:
df1

Unnamed: 0,GAME_ID,TEAM_ID_x,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME_x,START_POSITION,COMMENT,MIN,FGM,FGA,FG_PCT_x,FG3M,FG3A,FG3_PCT_x,FTM,FTA,FT_PCT_x,OREB,DREB,REB_x,AST_x,STL,BLK,TO,PF,PTS_x,PLUS_MINUS,SEASON_x,GAME_DATE_EST_x,AVG_MIN_LAST_5,AVG_FGM_LAST_5,AVG_FGA_LAST_5,AVG_FG_PCT_LAST_5_x,AVG_FG3M_LAST_5,AVG_FG3A_LAST_5,AVG_FG3_PCT_LAST_5_x,AVG_FTM_LAST_5,AVG_FTA_LAST_5,AVG_FT_PCT_LAST_5_x,AVG_OREB_LAST_5,AVG_DREB_LAST_5,AVG_REB_LAST_5_x,AVG_AST_LAST_5_x,AVG_STL_LAST_5,AVG_BLK_LAST_5,AVG_TO_LAST_5,AVG_PF_LAST_5,AVG_PTS_LAST_5_x,AVG_PLUS_MINUS_LAST_5,GAMES_PLAYED_LAST_5,AVG_MIN_LAST_10,AVG_FGM_LAST_10,AVG_FGA_LAST_10,AVG_FG_PCT_LAST_10_x,AVG_FG3M_LAST_10,AVG_FG3A_LAST_10,AVG_FG3_PCT_LAST_10_x,AVG_FTM_LAST_10,AVG_FTA_LAST_10,AVG_FT_PCT_LAST_10_x,AVG_OREB_LAST_10,AVG_DREB_LAST_10,AVG_REB_LAST_10_x,AVG_AST_LAST_10_x,AVG_STL_LAST_10,AVG_BLK_LAST_10,AVG_TO_LAST_10,AVG_PF_LAST_10,AVG_PTS_LAST_10_x,AVG_PLUS_MINUS_LAST_10,GAMES_PLAYED_LAST_10,GAME_DATE_EST_y,HOME_TEAM_ID,SEASON_y,PTS_y,FG_PCT_y,FT_PCT_y,FG3_PCT_y,AST_y,REB_y,TEAM_WINS,AVG_PTS_LAST_5_y,AVG_FG_PCT_LAST_5_y,AVG_FT_PCT_LAST_5_y,AVG_FG3_PCT_LAST_5_y,AVG_AST_LAST_5_y,AVG_REB_LAST_5_y,AVG_TEAM_WINS_LAST_5,AVG_PTS_LAST_10_y,AVG_FG_PCT_LAST_10_y,AVG_FT_PCT_LAST_10_y,AVG_FG3_PCT_LAST_10_y,AVG_AST_LAST_10_y,AVG_REB_LAST_10_y,AVG_TEAM_WINS_LAST_10,PLAYER_NAME_y,TEAM_ID_y,SEASON
20,20300075,1610612762,UTA,Utah,2260,Jarron Collins,,,25.0,0.0,2.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,4.0,2.0,6.0,0.0,0.0,0.0,4.0,2.0,0.0,-9.0,2003,2003-11-07,24.750000,3.000000,6.600000,0.50220,0.000000,0.400000,4.440892e-17,1.400000,2.200000,0.311200,2.000000,3.600000,5.600000,1.600000,0.400000,0.000000,1.600000,1.800000,7.400000,11.800000,1.0,26.625000,3.777778,8.444444,0.498667,0.444444,1.777778,0.170333,2.000000,3.000000,0.506222,1.888889,4.111111,6.000000,2.222222,1.000000,0.000000,1.333333,2.000000,10.000000,14.000000,0.9,2003-11-07,1610612744,2003,89.0,0.384,0.952,0.273,20.0,43.0,0,100.200000,0.453000,0.691800,0.466800,24.200000,43.600000,0.600000,89.250000,0.432500,0.637125,0.385750,21.125000,41.625000,0.375000,Jarron Collins,1610612757,2010
21,20300075,1610612762,UTA,Utah,2260,Jarron Collins,,,25.0,0.0,2.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,4.0,2.0,6.0,0.0,0.0,0.0,4.0,2.0,0.0,-9.0,2003,2003-11-07,24.750000,3.000000,6.600000,0.50220,0.000000,0.400000,4.440892e-17,1.400000,2.200000,0.311200,2.000000,3.600000,5.600000,1.600000,0.400000,0.000000,1.600000,1.800000,7.400000,11.800000,1.0,26.625000,3.777778,8.444444,0.498667,0.444444,1.777778,0.170333,2.000000,3.000000,0.506222,1.888889,4.111111,6.000000,2.222222,1.000000,0.000000,1.333333,2.000000,10.000000,14.000000,0.9,2003-11-07,1610612744,2003,89.0,0.384,0.952,0.273,20.0,43.0,0,100.200000,0.453000,0.691800,0.466800,24.200000,43.600000,0.600000,89.250000,0.432500,0.637125,0.385750,21.125000,41.625000,0.375000,Jarron Collins,1610612746,2010
22,20300075,1610612762,UTA,Utah,2260,Jarron Collins,,,25.0,0.0,2.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,4.0,2.0,6.0,0.0,0.0,0.0,4.0,2.0,0.0,-9.0,2003,2003-11-07,24.750000,3.000000,6.600000,0.50220,0.000000,0.400000,4.440892e-17,1.400000,2.200000,0.311200,2.000000,3.600000,5.600000,1.600000,0.400000,0.000000,1.600000,1.800000,7.400000,11.800000,1.0,26.625000,3.777778,8.444444,0.498667,0.444444,1.777778,0.170333,2.000000,3.000000,0.506222,1.888889,4.111111,6.000000,2.222222,1.000000,0.000000,1.333333,2.000000,10.000000,14.000000,0.9,2003-11-07,1610612744,2003,89.0,0.384,0.952,0.273,20.0,43.0,0,100.200000,0.453000,0.691800,0.466800,24.200000,43.600000,0.600000,89.250000,0.432500,0.637125,0.385750,21.125000,41.625000,0.375000,Jarron Collins,1610612756,2009
23,20300075,1610612762,UTA,Utah,2260,Jarron Collins,,,25.0,0.0,2.0,0.000,0.0,0.0,0.0,0.0,0.0,0.000,4.0,2.0,6.0,0.0,0.0,0.0,4.0,2.0,0.0,-9.0,2003,2003-11-07,24.750000,3.000000,6.600000,0.50220,0.000000,0.400000,4.440892e-17,1.400000,2.200000,0.311200,2.000000,3.600000,5.600000,1.600000,0.400000,0.000000,1.600000,1.800000,7.400000,11.800000,1.0,26.625000,3.777778,8.444444,0.498667,0.444444,1.777778,0.170333,2.000000,3.000000,0.506222,1.888889,4.111111,6.000000,2.222222,1.000000,0.000000,1.333333,2.000000,10.000000,14.000000,0.9,2003-11-07,1610612744,2003,89.0,0.384,0.952,0.273,20.0,43.0,0,100.200000,0.453000,0.691800,0.466800,24.200000,43.600000,0.600000,89.250000,0.432500,0.637125,0.385750,21.125000,41.625000,0.375000,Jarron Collins,1610612757,2009
24,20300083,1610612762,UTA,Utah,2260,Jarron Collins,,,21.0,1.0,2.0,0.500,0.0,0.0,0.0,2.0,2.0,1.000,0.0,2.0,2.0,0.0,1.0,2.0,0.0,2.0,4.0,14.0,2003,2003-11-08,20.250000,2.750000,8.000000,0.28850,0.250000,0.500000,2.500000e-01,1.750000,1.750000,0.500000,1.500000,2.000000,3.500000,1.500000,0.750000,0.000000,1.500000,2.750000,7.500000,-3.000000,0.8,21.666667,1.777778,5.111111,0.220778,0.333333,0.888889,0.194444,0.777778,0.777778,0.222222,0.777778,1.444444,2.222222,1.333333,0.555556,0.000000,1.333333,1.444444,4.666667,-4.000000,0.7,2003-11-08,1610612762,2003,96.0,0.450,0.815,0.667,23.0,39.0,1,97.500000,0.456500,0.771250,0.356750,22.500000,43.000000,0.750000,96.000000,0.440222,0.741556,0.383667,22.222222,44.111111,0.555556,Jarron Collins,1610612757,2010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4004721,21900528,1610612764,WAS,Washington,1628394,Anzejs Pasecniks,,,22.0,6.0,8.0,0.750,0.0,0.0,0.0,1.0,2.0,0.500,3.0,5.0,8.0,1.0,0.0,1.0,1.0,5.0,13.0,26.0,2019,2020-01-04,20.000000,1.800000,5.400000,0.45560,0.000000,1.400000,5.919709e-14,0.200000,1.200000,0.050000,2.000000,2.400000,4.400000,1.000000,0.400000,0.600000,1.000000,2.000000,3.800000,-3.400000,1.0,25.333333,4.300000,9.400000,0.491700,0.700000,2.700000,0.130000,2.000000,3.200000,0.314500,1.900000,3.700000,5.600000,2.200000,0.800000,0.900000,1.600000,2.400000,11.300000,-2.200000,0.9,2020-01-04,1610612764,2019,128.0,0.549,0.815,0.375,20.0,49.0,1,119.000000,0.458000,0.773400,0.369000,24.200000,44.000000,0.600000,117.666667,0.469000,0.779500,0.377667,23.833333,44.000000,0.666667,Anzejs Pasecniks,1610612764,2019
4004722,21900542,1610612764,WAS,Washington,1628394,Anzejs Pasecniks,,,22.0,2.0,3.0,0.667,0.0,0.0,0.0,3.0,4.0,0.750,0.0,4.0,4.0,0.0,1.0,1.0,3.0,5.0,7.0,11.0,2019,2020-01-06,25.333333,7.333333,12.666667,0.51400,0.666667,2.333333,2.333333e-01,2.000000,3.000000,0.600000,1.000000,3.000000,4.000000,3.000000,1.000000,0.333333,0.333333,3.333333,17.333333,23.333333,0.8,27.875000,6.000000,12.000000,0.448000,0.750000,3.250000,0.182750,4.000000,4.750000,0.796375,1.125000,3.375000,4.500000,2.500000,0.875000,0.250000,0.875000,2.750000,16.750000,9.750000,0.9,2020-01-06,1610612764,2019,99.0,0.464,0.609,0.333,18.0,47.0,1,107.000000,0.419333,0.813000,0.294000,22.666667,44.666667,0.333333,107.000000,0.419333,0.813000,0.294000,22.666667,44.666667,0.333333,Anzejs Pasecniks,1610612764,2019
4004723,21900557,1610612764,WAS,Washington,1628394,Anzejs Pasecniks,,,35.0,6.0,8.0,0.750,0.0,0.0,0.0,4.0,6.0,0.667,2.0,3.0,5.0,3.0,0.0,2.0,3.0,2.0,16.0,-16.0,2019,2020-01-08,21.500000,2.000000,2.666667,0.48900,0.666667,0.666667,3.333333e-01,1.666667,2.666667,0.416667,1.000000,2.666667,3.666667,0.666667,0.333333,1.000000,1.333333,2.333333,6.333333,6.000000,0.6,26.714286,3.500000,6.750000,0.489125,1.125000,2.375000,0.354125,1.250000,2.000000,0.302125,1.375000,4.125000,5.500000,2.750000,1.000000,0.875000,1.375000,2.750000,9.375000,6.875000,0.8,2020-01-08,1610612753,2019,89.0,0.429,0.579,0.261,24.0,40.0,0,117.400000,0.451200,0.791000,0.354600,26.600000,51.400000,0.600000,115.833333,0.455333,0.785833,0.351000,26.333333,49.000000,0.500000,Anzejs Pasecniks,1610612764,2019
4004727,21900621,1610612764,WAS,Washington,1628394,Anzejs Pasecniks,,,23.0,4.0,6.0,0.667,0.0,0.0,0.0,3.0,5.0,0.600,1.0,5.0,6.0,0.0,0.0,0.0,2.0,0.0,11.0,-22.0,2019,2020-01-17,23.250000,4.250000,7.500000,0.59425,0.750000,1.750000,2.500000e-01,2.250000,3.000000,0.875000,1.250000,3.250000,4.500000,3.500000,1.000000,1.500000,2.250000,2.750000,11.500000,0.500000,1.0,24.222222,4.444444,8.333333,0.538111,1.333333,3.333333,0.336111,1.666667,2.222222,0.677778,0.777778,3.777778,4.555556,3.000000,1.000000,0.888889,1.111111,2.111111,11.888889,1.777778,0.9,2020-01-17,1610612761,2019,111.0,0.449,0.696,0.429,19.0,43.0,0,112.666667,0.483000,0.766667,0.330333,24.666667,42.666667,0.333333,112.666667,0.483000,0.766667,0.330333,24.666667,42.666667,0.333333,Anzejs Pasecniks,1610612764,2019


## TEST 

In [43]:
# prosek minuta svakog igraca po sezoni, mozda zatreba

In [40]:
df_test = df1.groupby(['PLAYER_ID', 'SEASON'])['MIN'].mean()

In [41]:
df_test

PLAYER_ID   SEASON
255         2009      31.075901
            2010      31.075901
            2011      31.075901
            2012      31.075901
283         2009      24.958333
                        ...    
1629962     2019      24.000000
1962936250  2009      24.500000
1962936483  2011      22.000000
1962936489  2011      28.000000
1962936495  2011      23.000000
Name: MIN, Length: 5788, dtype: float64

In [44]:
df_test.to_csv('./data/dt_test.csv')

In [19]:
## FIRST VERSION

In [None]:
df = df.drop('COMMENT',axis=1)

In [None]:
def fill_na_cat(dataset, columns):
    for column in columns:
        value_for_swap = dataset[column].describe().top
        dataset[column] = dataset[column].fillna(value_for_swap)
    return pd.DataFrame(dataset)
        

def fill_na_num(dataset, columns):
    for column in columns:
        value_for_swap = 0
        dataset[column] = dataset[column].fillna(value_for_swap)
    return pd.DataFrame(dataset)

numerics = df.select_dtypes(np.number)
cats = df.select_dtypes(object)


num_cols = list(numerics.columns)
cat_cols = list(cats.columns)

In [None]:
numerics = fill_na_num(numerics, num_cols)

df = pd.concat([cats, numerics], axis=1)

In [None]:
df['MIN'] = df['MIN'].fillna(0)

In [None]:
df['START_POSITION'] = df['START_POSITION'].fillna('G')

In [None]:
df.info()

In [None]:
testcol = df.groupby(['PLAYER_ID', 'SEASON'])['PTS'].shift(1).rolling(5, min_periods=1).mean()

In [None]:
testcol