# Player Statistics and Attributes
get player data from sqlite and NBA api

In [1]:
# This tells matplotlib not to try opening a new window for each plot.
%matplotlib inline

# General libraries.
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import plotly.express as px
from datetime import datetime

import sqlite3

from nba_api.stats.endpoints import PlayerCareerStats

In [12]:
# you might need to pip install the following libraries
#pip install nba_api
#pip install plotly.express
#pip install kaleido

## Get all player ID

In [2]:
#path = ".." #Everything preceding the file name
database = 'basketball.sqlite' #The path + the file name

conn = sqlite3.connect(database)

tables = pd.read_sql("""SELECT *
                        FROM sqlite_master
                        WHERE type='table';""", conn)
#tables

In [3]:
Player = pd.read_sql(
    """
        SELECT cast(id as int) as id, full_name, first_name, last_name, is_active from Player order by id
    """,
    conn)
#Player.to_csv('Player.csv', sep=',')

#Player['id'] = Player['id'].astype('int')
Player

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,2,Byron Scott,Byron,Scott,0
1,3,Grant Long,Grant,Long,0
2,7,Dan Schayes,Dan,Schayes,0
3,9,Sedale Threatt,Sedale,Threatt,0
4,12,Chris King,Chris,King,0
...,...,...,...,...,...
4496,1629744,Matt Thomas,Matt,Thomas,1
4497,1629745,Tariq Owens,Tariq,Owens,1
4498,1629750,Javonte Green,Javonte,Green,1
4499,1629752,Juwan Morgan,Juwan,Morgan,1


## Get Player attribute

In [4]:
Player_Attributes = pd.read_sql(
    """
    SELECT 
    cast(pa.id as int) as id, p.is_active, cast(BIRTHDATE as date) as BIRTHDATE, HEIGHT, WEIGHT, JERSEY, POSITION,SCHOOL, COUNTRY, 
    TEAM_ID, GAMES_PLAYED_CURRENT_SEASON_FLAG,SEASON_EXP,
    FROM_YEAR,TO_YEAR,DLEAGUE_FLAG,NBA_FLAG,GAMES_PLAYED_FLAG,
    DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,PTS,AST,REB,ALL_STAR_APPEARANCES,PIE
    FROM Player_Attributes pa
    left join Player p on p.id = pa.id
    where team_id <>0

    """, 
    conn)

Player_Attributes

Unnamed: 0,id,is_active,BIRTHDATE,HEIGHT,WEIGHT,JERSEY,POSITION,SCHOOL,COUNTRY,TEAM_ID,...,NBA_FLAG,GAMES_PLAYED_FLAG,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,PTS,AST,REB,ALL_STAR_APPEARANCES,PIE
0,76001,0,1968,82.0,240.0,30,Forward,Duke,USA,1610612757,...,Y,Y,1990,1,25,5.7,0.3,3.3,0.0,
1,76002,0,1946,81.0,235.0,54,Center,Iowa State,USA,1610612745,...,Y,Y,1968,1,5,9.0,1.2,8.0,0.0,
2,76003,0,1947,86.0,225.0,33,Center,UCLA,USA,1610612747,...,Y,Y,1969,1,1,24.6,3.6,11.2,18.0,
3,51,0,1969,73.0,162.0,1,Guard,Louisiana State,USA,1610612743,...,Y,Y,1990,1,3,14.6,3.5,1.9,0.0,
4,1505,0,1974,78.0,235.0,9,Forward-Guard,San Jose State,France,1610612758,...,Y,Y,1997,1,11,7.8,1.1,3.3,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3831,1627790,1,1997,82.0,266.0,41,Center,,Croatia,1610612739,...,Y,Y,2016,1,23,6.0,0.6,3.9,0.0,
3832,78647,0,1953,85.0,240.0,34,Center,Kent State,USA,1610612765,...,Y,Y,Undrafted,Undrafted,Undrafted,0.3,0.1,1.1,0.0,
3833,78648,0,1948,73.0,170.0,6,Guard,Duquesne,USA,1610612749,...,Y,Y,1970,2,33,2.2,1.4,0.9,0.0,
3834,1627826,1,1997,84.0,240.0,40,Center,Mega Basket,Croatia,1610612746,...,Y,Y,2016,2,32,8.5,1.1,7.0,,0.126


## Get Player stats from nba_api
https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/playercareerstats.md

Parameter
- per_mode36: (Totals)|(PerGame)|(Per36)
- player_id: ex) 2544
- LeagueID: nullable


#options of tables:   
player_info.career_totals_all_star_season.get_data_frame()    
player_info.career_totals_college_season.get_data_frame()    
player_info.career_totals_post_season.get_data_frame()    
player_info.career_totals_regular_season.get_data_frame()    
player_info.season_rankings_post_season.get_data_frame()    
player_info.season_rankings_regular_season.get_data_frame()    
player_info.season_totals_all_star_season.get_data_frame()    
player_info.season_totals_college_season.get_data_frame()    
player_info.season_totals_regular_season.get_data_frame()    

**Don't run following blocks if you don't want to wait for API calls. Just read the csv file I pasted in slack 'player_stats.csv'**

In [6]:
player_info_0 = PlayerCareerStats(player_id=Player['id'][0], per_mode36 = 'Per36')
player_stats_df = player_info_0.season_totals_regular_season.get_data_frame()

In [7]:
player_stats_df

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,2,1983-84,0,1610612747,LAL,23.0,74,49,1637.0,7.3,...,0.806,1.1,2.5,3.6,3.9,1.8,0.4,2.6,3.8,17.3
1,2,1984-85,0,1610612747,LAL,24.0,81,65,2305.0,8.4,...,0.82,0.9,2.4,3.3,3.8,1.6,0.3,2.2,3.1,20.2
2,2,1985-86,0,1610612747,LAL,25.0,76,62,2190.0,8.3,...,0.784,0.9,2.2,3.1,2.7,1.4,0.2,1.8,2.7,19.3
3,2,1986-87,0,1610612747,LAL,26.0,82,82,2729.0,7.3,...,0.892,0.8,2.9,3.8,3.7,1.6,0.2,1.9,2.2,18.4
4,2,1987-88,0,1610612747,LAL,27.0,81,81,3048.0,8.4,...,0.858,0.9,3.0,3.9,4.0,1.8,0.3,1.9,2.4,20.7
5,2,1988-89,0,1610612747,LAL,28.0,74,73,2605.0,8.1,...,0.863,1.0,3.2,4.2,3.2,1.6,0.4,2.2,2.5,20.0
6,2,1989-90,0,1610612747,LAL,29.0,77,77,2593.0,6.6,...,0.766,0.7,2.7,3.4,3.8,1.1,0.4,1.7,2.5,16.6
7,2,1990-91,0,1610612747,LAL,30.0,82,82,2630.0,6.9,...,0.797,0.7,2.6,3.4,2.4,1.3,0.3,1.2,2.0,16.3
8,2,1991-92,0,1610612747,LAL,31.0,82,82,2679.0,6.2,...,0.838,1.0,3.2,4.2,3.0,1.4,0.4,1.6,1.9,16.4
9,2,1992-93,0,1610612747,LAL,32.0,58,53,1677.0,6.4,...,0.848,0.6,2.3,2.9,3.4,1.2,0.3,1.5,2.1,17.0


In [12]:
for i,v in enumerate(Player['id'][1:]):
    player_info = PlayerCareerStats(player_id=v, per_mode36 = 'Per36')
    player_info_df = player_info.season_totals_regular_season.get_data_frame()
    player_stats_df = player_stats_df.append(player_info_df)
    print(v)   
    

3
7
9
12
15
17
21
22
23
24
26
28
29
30
31
32
35
36
37
38
41
42
43
45
46
47
49
51
52
53
54
55
56
57
61
63
64
65
66
67
70
71
72
73
74
76
77
78
80
81
82
84
85
87
88
89
93
95
96
97
98
100
101
103
104
105
107
109
111
112
114
116
117
120
121
122
123
124
128
129
132
133
134
136
137
138
140
141
143
145
146
147
149
154
156
157
160
164
165
166
168
170
173
174
175
176
177
178
179
180
181
182
183
184
185
187
189
190
192
193
194
195
197
198
199
201
202
203
204
208
209
210
211
212
213
216
219
221
223
224
226
228
229
234
236
238
239
240
241
243
244
246
247
248
251
252
255
258
262
265
270
271
273
275
278
279
280
281
283
285
287
288
289
291
292
293
296
297
299
302
304
305
306
308
310
313
316
317
320
321
323
324
328
330
333
335
339
340
341
342
344
345
346
348
349
351
353
355
356
357
358
359
361
363
364
365
368
369
371
375
376
378
380
381
383
384
386
389
390
393
397
399
400
402
404
406
412
416
417
418
420
422
423
426
428
431
432
433
434
435
436
438
440
441
442
445
446
448
452
456
457
458
460
461
462
467


76669
76670
76671
76672
76673
76674
76675
76676
76677
76678
76679
76680
76681
76682
76683
76685
76686
76687
76689
76690
76691
76692
76693
76694
76695
76696
76697
76698
76699
76701
76702
76703
76704
76705
76706
76707
76708
76709
76711
76712
76713
76714
76715
76716
76717
76719
76720
76721
76722
76723
76725
76726
76727
76728
76729
76731
76732
76733
76734
76735
76736
76738
76739
76740
76742
76744
76745
76746
76747
76748
76749
76750
76751
76752
76753
76754
76755
76756
76757
76758
76759
76760
76761
76762
76763
76764
76765
76767
76768
76769
76770
76771
76772
76773
76774
76776
76778
76779
76780
76781
76782
76783
76785
76786
76788
76789
76790
76791
76793
76794
76797
76800
76801
76802
76803
76804
76805
76806
76807
76808
76809
76810
76811
76812
76813
76815
76817
76818
76819
76820
76821
76822
76823
76824
76825
76826
76827
76828
76829
76830
76831
76832
76833
76834
76835
76836
76837
76838
76839
76841
76842
76843
76844
76847
76850
76852
76853
76854
76855
76859
76860
76861
76862
76863
76865
76867
7686

78347
78348
78349
78350
78351
78352
78353
78355
78356
78357
78358
78359
78360
78361
78363
78364
78365
78366
78367
78368
78369
78370
78371
78372
78374
78375
78377
78378
78379
78380
78381
78382
78385
78386
78387
78388
78390
78391
78392
78393
78394
78395
78396
78397
78398
78399
78400
78401
78402
78403
78404
78405
78406
78407
78409
78411
78413
78414
78415
78416
78417
78418
78419
78420
78421
78422
78423
78425
78426
78427
78428
78429
78430
78431
78432
78433
78434
78435
78436
78437
78438
78439
78440
78441
78442
78443
78444
78445
78446
78447
78448
78449
78450
78453
78454
78455
78456
78457
78458
78459
78460
78461
78462
78463
78464
78465
78466
78467
78468
78469
78470
78471
78473
78474
78475
78477
78478
78479
78481
78482
78483
78484
78485
78486
78488
78489
78492
78494
78495
78497
78499
78500
78501
78504
78506
78507
78508
78509
78510
78512
78513
78514
78515
78516
78519
78520
78521
78522
78523
78524
78525
78526
78527
78528
78529
78530
78531
78532
78533
78534
78537
78538
78539
78540
78541
78543
7854

1627863
1627866
1627868
1627875
1627879
1627883
1627884
1627885
1627936
1627982
1627988
1628021
1628035
1628070
1628249
1628365
1628366
1628367
1628368
1628369
1628370
1628371
1628372
1628373
1628374
1628378
1628379
1628380
1628381
1628382
1628383
1628384
1628385
1628386
1628387
1628388
1628389
1628390
1628391
1628392
1628393
1628394
1628395
1628396
1628397
1628398
1628399
1628400
1628401
1628402
1628403
1628404
1628405
1628407
1628408
1628409
1628410
1628411
1628412
1628413
1628414
1628415
1628416
1628417
1628418
1628420
1628421
1628422
1628424
1628425
1628427
1628429
1628430
1628432
1628436
1628439
1628443
1628444
1628449
1628451
1628455
1628462
1628463
1628464
1628467
1628469
1628470
1628475
1628476
1628492
1628493
1628495
1628499
1628500
1628502
1628503
1628504
1628505
1628506
1628510
1628513
1628515
1628518
1628537
1628571
1628605
1628656
1628681
1628769
1628778
1628935
1628959
1628960
1628961
1628963
1628964
1628966
1628968
1628969
1628970
1628971
1628972
1628973
1628975
1628976


In [13]:
player_stats_df

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,66,1995-96,00,1610612750,MIN,26.0,53,0,369.0,5.3,...,0.561,3.7,4,7.7,0.8,1.7,0.8,2.5,6.9,12.8
0,74,1994-95,00,1610612758,SAC,22.0,3,0,5.0,0.0,...,0,0,0,0.0,7.2,0,0,0,21.6,0.0
0,132,1994-95,00,1610612744,GOS,23.0,41,6,395.0,4.6,...,0.682,1.9,3.4,5.3,1.6,1,1.4,2.5,4.1,15.2
0,141,1994-95,00,1610612764,WAS,26.0,62,13,982.0,3.5,...,0.614,1.6,4.6,6.2,2.5,1.7,0.4,2.1,4.7,8.9
0,189,1994-95,00,1610612755,PHL,24.0,55,8,809.0,3.2,...,0.7,0.6,2.2,2.8,7.7,1.6,0.1,4.3,2.6,8.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,1629750,2020-21,00,0,TOT,27.0,41,2,474.0,4.0,...,0.75,2,3.4,5.4,1.3,2.1,0.5,1.4,3.4,11.1
4,1629750,2021-22,00,1610612741,CHI,28.0,7,2,123.0,4.1,...,0.875,2,5,7.0,1.2,0.9,1.2,0.6,4.1,11.1
0,1629752,2019-20,00,1610612762,UTA,23.0,21,0,134.0,4.0,...,0.75,2.7,5.1,7.8,1.6,0.3,0.5,1.6,5.1,9.6
1,1629752,2020-21,00,1610612762,UTA,24.0,29,0,147.0,3.4,...,0.429,2.2,4.6,6.9,2.2,1,0.2,1,5.1,8.6


In [14]:
player_stats_df.to_csv('player_stats.csv', sep=',')

# Read the 'player_stats.csv' file and run below

In [15]:
player_stats_df = pd.read_csv('player_stats.csv')

In [16]:
player_stats_df

Unnamed: 0.1,Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,0,66,1995-96,0,1610612750,MIN,26.0,53,0.0,369.0,...,0.561,3.7,4.0,7.7,0.8,1.7,0.8,2.5,6.9,12.8
1,0,74,1994-95,0,1610612758,SAC,22.0,3,0.0,5.0,...,0.000,0.0,0.0,0.0,7.2,0.0,0.0,0.0,21.6,0.0
2,0,132,1994-95,0,1610612744,GOS,23.0,41,6.0,395.0,...,0.682,1.9,3.4,5.3,1.6,1.0,1.4,2.5,4.1,15.2
3,0,141,1994-95,0,1610612764,WAS,26.0,62,13.0,982.0,...,0.614,1.6,4.6,6.2,2.5,1.7,0.4,2.1,4.7,8.9
4,0,189,1994-95,0,1610612755,PHL,24.0,55,8.0,809.0,...,0.700,0.6,2.2,2.8,7.7,1.6,0.1,4.3,2.6,8.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27743,3,1629750,2020-21,0,0,TOT,27.0,41,2.0,474.0,...,0.750,2.0,3.4,5.4,1.3,2.1,0.5,1.4,3.4,11.1
27744,4,1629750,2021-22,0,1610612741,CHI,28.0,7,2.0,123.0,...,0.875,2.0,5.0,7.0,1.2,0.9,1.2,0.6,4.1,11.1
27745,0,1629752,2019-20,0,1610612762,UTA,23.0,21,0.0,134.0,...,0.750,2.7,5.1,7.8,1.6,0.3,0.5,1.6,5.1,9.6
27746,1,1629752,2020-21,0,1610612762,UTA,24.0,29,0.0,147.0,...,0.429,2.2,4.6,6.9,2.2,1.0,0.2,1.0,5.1,8.6


In [17]:
player_data_df = pd.merge(player_stats_df, Player_Attributes, left_on='PLAYER_ID', right_on='id')
#pd.concat([player_stats_df,Player_Attributes], keys=['PLAYER_ID', 'ID'],axis=1)


In [18]:
player_data_df

Unnamed: 0.1,Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID_x,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,...,NBA_FLAG,GAMES_PLAYED_FLAG,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,PTS_y,AST_y,REB_y,ALL_STAR_APPEARANCES,PIE
0,0,66,1995-96,0,1610612750,MIN,26.0,53,0.0,369.0,...,Y,Y,Undrafted,Undrafted,Undrafted,2.5,0.2,1.5,0.0,
1,0,66,1995-96,0,1610612750,MIN,26.0,53,0.0,369.0,...,Y,Y,Undrafted,Undrafted,Undrafted,2.5,0.2,1.5,0.0,
2,0,74,1994-95,0,1610612758,SAC,22.0,3,0.0,5.0,...,Y,Y,Undrafted,Undrafted,Undrafted,0.0,0.3,0.0,0.0,
3,0,74,1994-95,0,1610612758,SAC,22.0,3,0.0,5.0,...,Y,Y,Undrafted,Undrafted,Undrafted,0.0,0.3,0.0,0.0,
4,0,132,1994-95,0,1610612744,GOS,23.0,41,6.0,395.0,...,Y,Y,1994,2,45,4.1,0.4,1.4,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23179,2,1629750,2020-21,0,1610612741,CHI,27.0,16,0.0,128.0,...,Y,Y,Undrafted,Undrafted,Undrafted,4.2,0.4,2.1,,0.069
23180,3,1629750,2020-21,0,0,TOT,27.0,41,2.0,474.0,...,Y,Y,Undrafted,Undrafted,Undrafted,4.2,0.4,2.1,,0.069
23181,4,1629750,2021-22,0,1610612741,CHI,28.0,7,2.0,123.0,...,Y,Y,Undrafted,Undrafted,Undrafted,4.2,0.4,2.1,,0.069
23182,0,1629752,2019-20,0,1610612762,UTA,23.0,21,0.0,134.0,...,Y,Y,Undrafted,Undrafted,Undrafted,0.9,0.2,0.8,,0.035


In [19]:
#create age colume, year_in_game column and draft boolean flag
player_data_df['draft_flag'] = player_data_df['DRAFT_YEAR']!= 'Undrafted'
player_data_df['year_in_game'] = player_data_df['TO_YEAR'].astype(int) - player_data_df['FROM_YEAR'].astype(int)

#Player age should be calculated after final table to get age at the year of game
player_data_df['player_age'] = 2020 - player_data_df['BIRTHDATE'].astype(int)

In [22]:
player_data_df = player_data_df.drop(['POSITION','SCHOOL','COUNTRY','GAMES_PLAYED_CURRENT_SEASON_FLAG',
                                    'DRAFT_NUMBER','DRAFT_ROUND','NBA_FLAG','GAMES_PLAYED_FLAG','id','is_active',
                                      'LEAGUE_ID','DLEAGUE_FLAG','DRAFT_YEAR','TO_YEAR','FROM_YEAR',
                                      'PIE','ALL_STAR_APPEARANCES','Unnamed: 0'
                                     ], axis = 1)

In [23]:
# dimension
player_data_df.shape

(23184, 38)

Check NA's    
**What do we want to do with NAs from GS, FG3M, FG3A, FG3_PCT, OREB, DREB, STL, BLK, TOV**

In [24]:
player_data_df.isnull().sum()

PLAYER_ID               0
SEASON_ID               0
TEAM_ID_x               0
TEAM_ABBREVIATION       0
PLAYER_AGE              0
GP                      0
GS                   5836
MIN                     0
FGM                     0
FGA                     0
FG_PCT                 11
FG3M                 5350
FG3A                 5350
FG3_PCT              5607
FTM                     0
FTA                     0
FT_PCT                137
OREB                 3401
DREB                 3401
REB_x                   0
AST_x                   0
STL                  3401
BLK                  3401
TOV                  4610
PF                      0
PTS_x                   0
BIRTHDATE               0
HEIGHT                  2
WEIGHT                  4
JERSEY                  0
TEAM_ID_y               0
SEASON_EXP              0
PTS_y                   0
AST_y                   0
REB_y                   0
draft_flag              0
year_in_game            0
player_age              0
dtype: int64

for NAs in height and weight, I filled in with average

In [25]:
player_data_df["HEIGHT"].fillna(player_data_df['HEIGHT'].mean(), inplace = True)
player_data_df["WEIGHT"].fillna(player_data_df['WEIGHT'].mean(), inplace = True)

### Save player stats to file

In [26]:
player_data_df.to_csv('player_data.csv', sep=',')