# Player Statistics and Attributes

In [49]:
# This tells matplotlib not to try opening a new window for each plot.
%matplotlib inline

# General libraries.
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import plotly.express as px
from datetime import datetime

import sqlite3

from nba_api.stats.endpoints import PlayerCareerStats

In [12]:
# you might need to pip install the following libraries
#pip install nba_api
#pip install plotly.express
#pip install kaleido

## Get all player ID

In [13]:
#path = ".." #Everything preceding the file name
database = 'basketball.sqlite' #The path + the file name

conn = sqlite3.connect(database)

tables = pd.read_sql("""SELECT *
                        FROM sqlite_master
                        WHERE type='table';""", conn)
#tables

In [14]:
Player = pd.read_sql(
    """
        SELECT cast(id as int) as id, full_name, first_name, last_name, is_active from Player order by id
    """,
    conn)
#Player.to_csv('Player.csv', sep=',')

#Player['id'] = Player['id'].astype('int')
Player

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,2,Byron Scott,Byron,Scott,0
1,3,Grant Long,Grant,Long,0
2,7,Dan Schayes,Dan,Schayes,0
3,9,Sedale Threatt,Sedale,Threatt,0
4,12,Chris King,Chris,King,0
...,...,...,...,...,...
4496,1629744,Matt Thomas,Matt,Thomas,1
4497,1629745,Tariq Owens,Tariq,Owens,1
4498,1629750,Javonte Green,Javonte,Green,1
4499,1629752,Juwan Morgan,Juwan,Morgan,1


## Get Player attribute

In [26]:
Player_Attributes = pd.read_sql(
    """
    SELECT 
    cast(pa.id as int) as id, p.is_active, cast(BIRTHDATE as date) as BIRTHDATE, HEIGHT, WEIGHT, JERSEY, POSITION,SCHOOL, COUNTRY, 
    TEAM_ID, GAMES_PLAYED_CURRENT_SEASON_FLAG,SEASON_EXP,
    FROM_YEAR,TO_YEAR,DLEAGUE_FLAG,NBA_FLAG,GAMES_PLAYED_FLAG,
    DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,PTS,AST,REB,ALL_STAR_APPEARANCES,PIE
    FROM Player_Attributes pa
    left join Player p on p.id = pa.id
    where team_id <>0

    """, 
    conn)

Player_Attributes

Unnamed: 0,id,is_active,BIRTHDATE,HEIGHT,WEIGHT,JERSEY,POSITION,SCHOOL,COUNTRY,TEAM_ID,...,NBA_FLAG,GAMES_PLAYED_FLAG,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,PTS,AST,REB,ALL_STAR_APPEARANCES,PIE
0,76001,0,1968,82.0,240.0,30,Forward,Duke,USA,1610612757,...,Y,Y,1990,1,25,5.7,0.3,3.3,0.0,
1,76002,0,1946,81.0,235.0,54,Center,Iowa State,USA,1610612745,...,Y,Y,1968,1,5,9.0,1.2,8.0,0.0,
2,76003,0,1947,86.0,225.0,33,Center,UCLA,USA,1610612747,...,Y,Y,1969,1,1,24.6,3.6,11.2,18.0,
3,51,0,1969,73.0,162.0,1,Guard,Louisiana State,USA,1610612743,...,Y,Y,1990,1,3,14.6,3.5,1.9,0.0,
4,1505,0,1974,78.0,235.0,9,Forward-Guard,San Jose State,France,1610612758,...,Y,Y,1997,1,11,7.8,1.1,3.3,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3831,1627790,1,1997,82.0,266.0,41,Center,,Croatia,1610612739,...,Y,Y,2016,1,23,6.0,0.6,3.9,0.0,
3832,78647,0,1953,85.0,240.0,34,Center,Kent State,USA,1610612765,...,Y,Y,Undrafted,Undrafted,Undrafted,0.3,0.1,1.1,0.0,
3833,78648,0,1948,73.0,170.0,6,Guard,Duquesne,USA,1610612749,...,Y,Y,1970,2,33,2.2,1.4,0.9,0.0,
3834,1627826,1,1997,84.0,240.0,40,Center,Mega Basket,Croatia,1610612746,...,Y,Y,2016,2,32,8.5,1.1,7.0,,0.126


## Get Player stats from nba_api
https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/playercareerstats.md

Parameter
- per_mode36: (Totals)|(PerGame)|(Per36)
- player_id: ex) 2544
- LeagueID: nullable


#options of tables:   
player_info.career_totals_all_star_season.get_data_frame()    
player_info.career_totals_college_season.get_data_frame()    
player_info.career_totals_post_season.get_data_frame()    
player_info.career_totals_regular_season.get_data_frame()    
player_info.season_rankings_post_season.get_data_frame()    
player_info.season_rankings_regular_season.get_data_frame()    
player_info.season_totals_all_star_season.get_data_frame()    
player_info.season_totals_college_season.get_data_frame()    
player_info.season_totals_regular_season.get_data_frame()    

**Don't following blocks if you don't want to wait for API calls. Just read the csv file I pasted in slack 'player_stats.csv'**

In [78]:
player_info_0 = PlayerCareerStats(player_id=Player['id'][0], per_mode36 = 'Per36')
player_stats_df = player_info_0.career_totals_regular_season.get_data_frame()

In [17]:
player_stats_df

Unnamed: 0,PLAYER_ID,LEAGUE_ID,Team_ID,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,2,0,0,1073,717,30152.0,7.1,14.6,0.482,0.9,...,0.833,0.8,2.8,3.6,3.3,1.5,0.0,1.9,2.4,18.0


In [87]:
for i,v in enumerate(Player['id'][1:]):
    player_info = PlayerCareerStats(player_id=v, per_mode36 = 'Per36')
    player_info_df = player_info.career_totals_regular_season.get_data_frame()
    player_stats_df = player_stats_df.append(player_info_df)
    #print(v)   
    

In [44]:
player_stats_df = player_stats_df.drop_duplicates(subset=['PLAYER_ID'], keep=False)


In [19]:
player_stats_df

Unnamed: 0,PLAYER_ID,LEAGUE_ID,Team_ID,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,2,00,0,1073,717,30152.0,7.1,14.6,0.482,0.9,...,0.833,0.8,2.8,3.6,3.3,1.5,0,1.9,2.4,18.0
0,3,00,0,1003,673,28518.0,4.4,9.4,0.467,0.1,...,0.761,2.7,5,7.8,2.2,1.5,0,2.1,3.7,12.0
0,7,00,0,1138,427,21976.0,4.9,10.2,0.481,0,...,0.806,3,6.3,9.3,2.1,0.9,0,2.6,5.7,14.4
0,9,00,0,951,304,22437.0,6.3,13.0,0.485,0.3,...,0.815,0.6,2.1,2.7,5.8,1.8,0,2.1,3.5,15.0
0,12,00,0,103,66,2058.0,4.7,11.2,0.423,0.8,...,0.633,1.9,3.6,5.4,2.0,1.3,0,2.1,3.2,12.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,1629744,00,0,87,1,769.0,5.8,13.0,0.442,3.5,...,0.808,0.7,4.2,4.9,1.9,0.7,0,1.4,2.6,16.0
0,1629745,00,0,3,0,15.0,2.4,12.0,0.200,0,...,1.000,4.8,2.4,7.2,0.0,2.4,0,0,2.4,9.6
0,1629750,00,0,95,5,1043.0,4.4,8.4,0.523,0.8,...,0.722,1.8,4.4,6.2,1.6,1.9,0,1.5,3.5,11.8
0,1629752,00,0,50,0,282.0,3.7,7.1,0.518,0.9,...,0.545,2.4,4.9,7.3,1.9,0.6,0,1.3,5.1,9.1


In [20]:
player_stats_df.to_csv('player_stats.csv', sep=',')

# Read the 'player_stats.csv' file and run below

In [102]:
player_stats_df = pd.read_csv('player_stats.csv')

In [103]:
player_stats_df

Unnamed: 0.1,Unnamed: 0,PLAYER_ID,LEAGUE_ID,Team_ID,GP,GS,MIN,FGM,FGA,FG_PCT,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,0,2,0,0,1073,717.0,30152.0,7.1,14.6,0.482,...,0.833,0.8,2.8,3.6,3.3,1.5,0.0,1.9,2.4,18.0
1,0,3,0,0,1003,673.0,28518.0,4.4,9.4,0.467,...,0.761,2.7,5.0,7.8,2.2,1.5,0.0,2.1,3.7,12.0
2,0,7,0,0,1138,427.0,21976.0,4.9,10.2,0.481,...,0.806,3.0,6.3,9.3,2.1,0.9,0.0,2.6,5.7,14.4
3,0,9,0,0,951,304.0,22437.0,6.3,13.0,0.485,...,0.815,0.6,2.1,2.7,5.8,1.8,0.0,2.1,3.5,15.0
4,0,12,0,0,103,66.0,2058.0,4.7,11.2,0.423,...,0.633,1.9,3.6,5.4,2.0,1.3,0.0,2.1,3.2,12.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4133,0,1629744,0,0,87,1.0,769.0,5.8,13.0,0.442,...,0.808,0.7,4.2,4.9,1.9,0.7,0.0,1.4,2.6,16.0
4134,0,1629745,0,0,3,0.0,15.0,2.4,12.0,0.200,...,1.000,4.8,2.4,7.2,0.0,2.4,0.0,0.0,2.4,9.6
4135,0,1629750,0,0,95,5.0,1043.0,4.4,8.4,0.523,...,0.722,1.8,4.4,6.2,1.6,1.9,0.0,1.5,3.5,11.8
4136,0,1629752,0,0,50,0.0,282.0,3.7,7.1,0.518,...,0.545,2.4,4.9,7.3,1.9,0.6,0.0,1.3,5.1,9.1


In [104]:
player_data_df = pd.merge(player_stats_df, Player_Attributes, left_on='PLAYER_ID', right_on='id')
#pd.concat([player_stats_df,Player_Attributes], keys=['PLAYER_ID', 'ID'],axis=1)


In [105]:
player_data_df

Unnamed: 0.1,Unnamed: 0,PLAYER_ID,LEAGUE_ID,Team_ID,GP,GS,MIN,FGM,FGA,FG_PCT,...,NBA_FLAG,GAMES_PLAYED_FLAG,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,PTS_y,AST_y,REB_y,ALL_STAR_APPEARANCES,PIE
0,0,2,0,0,1073,717.0,30152.0,7.1,14.6,0.482,...,Y,Y,1983,1,4,14.1,2.5,2.8,0.0,
1,0,7,0,0,1138,427.0,21976.0,4.9,10.2,0.481,...,Y,Y,1981,1,13,7.7,1.1,5.0,0.0,
2,0,9,0,0,951,304.0,22437.0,6.3,13.0,0.485,...,Y,Y,1983,6,139,9.8,3.8,1.8,0.0,
3,0,15,0,0,789,250.0,14612.0,5.0,11.6,0.434,...,Y,Y,1994,1,15,7.5,1.0,2.2,0.0,
4,0,17,0,0,1086,950.0,37538.0,8.0,16.9,0.472,...,Y,Y,1983,1,14,20.4,5.6,6.1,9.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3473,0,1629744,0,0,87,1.0,769.0,5.8,13.0,0.442,...,Y,Y,Undrafted,Undrafted,Undrafted,2.6,0.4,0.8,,0.058
3474,0,1629745,0,0,3,0.0,15.0,2.4,12.0,0.200,...,Y,Y,Undrafted,Undrafted,Undrafted,1.3,0.0,1.0,0.0,
3475,0,1629750,0,0,95,5.0,1043.0,4.4,8.4,0.523,...,Y,Y,Undrafted,Undrafted,Undrafted,4.2,0.4,2.1,,0.069
3476,0,1629752,0,0,50,0.0,282.0,3.7,7.1,0.518,...,Y,Y,Undrafted,Undrafted,Undrafted,0.9,0.2,0.8,,0.035


In [106]:
#create age colume, year_in_game column and draft boolean flag
player_data_df['draft_flag'] = player_data_df['DRAFT_YEAR']!= 'Undrafted'
player_data_df['year_in_game'] = player_data_df['TO_YEAR'].astype(int) - player_data_df['FROM_YEAR'].astype(int)

#Player age should be calculated after final table to get age at the year of game
player_data_df['player_age'] = 2020 - player_data_df['BIRTHDATE'].astype(int)

In [107]:
player_data_df = player_data_df.drop(['POSITION','SCHOOL','COUNTRY','TEAM_ID','GAMES_PLAYED_CURRENT_SEASON_FLAG',
                                    'DRAFT_NUMBER','DRAFT_ROUND','NBA_FLAG','GAMES_PLAYED_FLAG','id','is_active',
                                      'LEAGUE_ID', 'Team_ID','DLEAGUE_FLAG','DRAFT_YEAR','TO_YEAR','FROM_YEAR',
                                      'PIE','ALL_STAR_APPEARANCES'
                                     ], axis = 1)

In [108]:
# dimension
player_data_df.shape

(3478, 34)

Check NA's    
**What do we want to do with NAs from GS, FG3M, FG3A, FG3_PCT, OREB, DREB, STL, BLK, TOV**

In [109]:
player_data_df.isnull().sum()

Unnamed: 0         0
PLAYER_ID          0
GP                 0
GS              1052
MIN                0
FGM                0
FGA                0
FG_PCT             0
FG3M             939
FG3A             939
FG3_PCT         1185
FTM                0
FTA                0
FT_PCT             0
OREB             635
DREB             635
REB_x              0
AST_x              0
STL              635
BLK              635
TOV              837
PF                 0
PTS_x              0
BIRTHDATE          0
HEIGHT             1
WEIGHT             2
JERSEY             0
SEASON_EXP         0
PTS_y              0
AST_y              0
REB_y              0
draft_flag         0
year_in_game       0
player_age         0
dtype: int64

for NAs in height and weight, I filled in with average

In [110]:
player_data_df["HEIGHT"].fillna(player_data_df['HEIGHT'].mean(), inplace = True)
player_data_df["WEIGHT"].fillna(player_data_df['WEIGHT'].mean(), inplace = True)

In [111]:
player_data_df.to_csv('player_data.csv', sep=',')