In [1]:
# Created 6/28/2021
# Updated 9/27

import pandas as pd
import numpy as np
import nba_api.stats.endpoints as ep
import datetime
import re
from os import path

In [2]:
def lshelper(game_arr, gd):
    for x in range(len(game_arr)):
        gid = game_arr[x, 4]
        match = game_arr[x, 6]
        pm = game_arr[x, 27]
        if game_arr[x, 7] == 'W':
            winner = game_arr[x, 2]
        else:
            winner = game_arr[x, 6][-3:]
        gd[gid] = (match, pm, winner)
    return gd


class StatBucket():

    def __init__(self):
        self.clog = None
        self.log = None
        self.oslog = None
        self.outcomes = None
        self.data = None

    def update_log(self, season):
        result = ep.leaguegamefinder.LeagueGameFinder(season_nullable=season)
        all_games = result.get_data_frames()[0]
        rs = all_games[all_games.SEASON_ID == '2' + season[:4]]
        rs = rs[rs.GAME_ID.str[:3] == '002'] #may need to update
        os = all_games[all_games.SEASON_ID == '4' + season[:4]]
        os = os[os.GAME_ID.str[:3] == '004']
        self.log = rs
        self.oslog = os
        self.clog = rs.append(os)
    
    
    def get_log_stats(self):
        game_arr = self.log.to_numpy()
        os_arr = self.oslog.to_numpy()
        game_dat = dict()
        game_dat = lshelper(game_arr, game_dat)
        res = lshelper(os_arr, game_dat)        
        self.outcomes = res
        
        
def Season_csv(statfunc, gidset):
    first = gidset.pop()
    tstats = statfunc(game_id=first).get_data_frames()[1]
    for gid in gidset:
        game = statfunc(game_id=gid).get_data_frames()[1]
        tstats = tstats.append(game)
    return tstats

### advanced: ep.boxscoreadvancedv2.BoxScoreAdvancedV2
['GAME_ID', 'TEAM_ID', 'TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'MIN', 'E_OFF_RATING', 'OFF_RATING', 'E_DEF_RATING', 'DEF_RATING', 'E_NET_RATING', 'NET_RATING', 'AST_PCT', 'AST_TOV', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'E_TM_TOV_PCT', 'TM_TOV_PCT', 'EFG_PCT', 'TS_PCT', 'USG_PCT', 'E_USG_PCT', 'E_PACE', 'PACE', 'PACE_PER40', 'POSS', 'PIE']

### fourfactors: ep.boxscorefourfactorsv2.BoxScoreFourFactorsV2
['GAME_ID', 'TEAM_ID', 'TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'MIN', 'EFG_PCT', 'FTA_RATE', 'TM_TOV_PCT', 'OREB_PCT', 'OPP_EFG_PCT', 'OPP_FTA_RATE', 'OPP_TOV_PCT', 'OPP_OREB_PCT']

### 'misc': ep.boxscoremiscv2.BoxScoreMiscV2
['GAME_ID', 'TEAM_ID', 'TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'MIN', 'PTS_OFF_TOV', 'PTS_2ND_CHANCE', 'PTS_FB', 'PTS_PAINT', 'OPP_PTS_OFF_TOV', 'OPP_PTS_2ND_CHANCE', 'OPP_PTS_FB', 'OPP_PTS_PAINT', 'BLK', 'BLKA', 'PF', 'PFD']

### 'scoring': ep.boxscorescoringv2.BoxScoreScoringV2

['GAME_ID', 'TEAM_ID', 'TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'MIN', 'PCT_FGA_2PT', 'PCT_FGA_3PT', 'PCT_PTS_2PT', 'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB', 'PCT_PTS_FT', 'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 'PCT_AST_2PM', 'PCT_UAST_2PM', 'PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM', 'PCT_UAST_FGM']



## ALL DATA
['PTS', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'E_OFF_RATING',
       'OFF_RATING', 'E_DEF_RATING', 'DEF_RATING', 'E_NET_RATING',
       'NET_RATING', 'AST_PCT', 'AST_TOV', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT',
       'REB_PCT', 'E_TM_TOV_PCT', 'TM_TOV_PCT', 'EFG_PCT', 'TS_PCT', 'USG_PCT',
       'E_USG_PCT', 'E_PACE', 'PACE', 'PACE_PER40', 'POSS', 'PIE', 'FTA_RATE',
       'OPP_EFG_PCT', 'OPP_FTA_RATE', 'OPP_TOV_PCT', 'OPP_OREB_PCT',
       'PTS_OFF_TOV', 'PTS_2ND_CHANCE', 'PTS_FB', 'PTS_PAINT',
       'OPP_PTS_OFF_TOV', 'OPP_PTS_2ND_CHANCE', 'OPP_PTS_FB', 'OPP_PTS_PAINT',
       'BLKA', 'PFD', 'PCT_FGA_2PT', 'PCT_FGA_3PT', 'PCT_PTS_2PT',
       'PCT_PTS_2PT_MR', 'PCT_PTS_3PT', 'PCT_PTS_FB', 'PCT_PTS_FT',
       'PCT_PTS_OFF_TOV', 'PCT_PTS_PAINT', 'PCT_AST_2PM', 'PCT_UAST_2PM',
       'PCT_AST_3PM', 'PCT_UAST_3PM', 'PCT_AST_FGM', 'PCT_UAST_FGM']

In [3]:
# FUNCTIONS
SEASONS = ['2010-11',
          '2009-10',
          '2008-09']

# ['2012-13',
#           '2013-14',
#           '2014-15',
#           '2015-16',
#           '2016-17',
#           '2017-18',
#           '2018-19',
#           '2019-20',
#           '2020-21']

FN = ['advanced',
     'fourfactors',
     'misc',
     'scoring']
#      'summary',
#      'traditional'

FD = {'advanced':ep.boxscoreadvancedv2.BoxScoreAdvancedV2,
     'fourfactors':ep.boxscorefourfactorsv2.BoxScoreFourFactorsV2,
     'misc':ep.boxscoremiscv2.BoxScoreMiscV2,
     'scoring':ep.boxscorescoringv2.BoxScoreScoringV2,
     'summary':ep.boxscoresummaryv2.BoxScoreSummaryV2,
     'traditional':ep.boxscoretraditionalv2.BoxScoreTraditionalV2
    }

In [None]:
# FOR MULTIPLE SEASONS
for i in range(len(SEASONS)):
    for j in FN:
        if path.exists(f'DATA/raw/{j}/{j}{SEASONS[i]}.csv'):
            continue
        sb = StatBucket()
        sb.update_log(SEASONS[i][:4])
        sb.get_log_stats()
        gidset = set(sb.outcomes.keys())
        ns = Season_csv(FD[j], gidset)
        ns.sort_values('TEAM_ID', inplace=True, kind='mergesort')
        ns.sort_values('GAME_ID', inplace=True, kind='mergesort')
        ns.to_csv(f'DATA/raw/{j}/{j}{SEASONS[i]}.csv')

In [9]:
## FOR ONE SEASON / FUNCTION
# UPDATE THIS CELL THEN RUN CELL BELOW
SEASON = '2019-20'
FUNC = 'advanced'

In [12]:
sb = StatBucket()
sb.update_log(SEASON)
sb.get_log_stats()
gidset = set(sb.outcomes.keys())
# ns = Season_csv(FD[FUNC], gidset)
# ns = ns.sort_values('GAME_ID')
# ns.to_csv(f'DATA/raw/{FUNC}/{FUNC}{SEASON}.csv')

In [14]:
gidset

{'0041900222',
 '0021900068',
 '0021900643',
 '0021900011',
 '0021900594',
 '0021900144',
 '0041900156',
 '0021900719',
 '0021900555',
 '0021901307',
 '0021900910',
 '0021900826',
 '0021900699',
 '0021900619',
 '0021900825',
 '0021900100',
 '0021900579',
 '0041900102',
 '0021901232',
 '0021900940',
 '0021901268',
 '0021900263',
 '0041900401',
 '0021900687',
 '0021900195',
 '0021900595',
 '0021900220',
 '0021900854',
 '0041900217',
 '0021900432',
 '0021900468',
 '0021900463',
 '0021900464',
 '0021900447',
 '0021900335',
 '0021900845',
 '0021900618',
 '0021900766',
 '0021900320',
 '0021900488',
 '0021900236',
 '0021900370',
 '0021901239',
 '0021900118',
 '0021900346',
 '0021900567',
 '0021900551',
 '0041900167',
 '0021900760',
 '0021900623',
 '0021900592',
 '0021900818',
 '0021900722',
 '0021900355',
 '0021900415',
 '0021900274',
 '0021900177',
 '0021900073',
 '0021900096',
 '0021901309',
 '0021900915',
 '0021900206',
 '0041900402',
 '0041900104',
 '0041900224',
 '0021900228',
 '00219003

In [16]:

ep.boxscoreadvancedv2.BoxScoreAdvancedV2(game_id='0021900263').get_data_frames()[0]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,...,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,POSS,PIE
0,21900263,1610612764,WAS,Washington,1628972,Troy Brown Jr.,Troy,F,,14:11,...,28.6,0.5,0.5,0.212,0.217,107.08,108.3,90.25,32,0.031
1,21900263,1610612764,WAS,Washington,1629060,Rui Hachimura,Rui,F,,31:02,...,0.0,0.25,0.338,0.122,0.122,107.0,105.18,87.65,68,0.027
2,21900263,1610612764,WAS,Washington,1628418,Thomas Bryant,Thomas,C,,29:48,...,5.6,0.909,0.933,0.171,0.174,109.79,108.72,90.6,67,0.216
3,21900263,1610612764,WAS,Washington,203078,Bradley Beal,Bradley,G,,39:42,...,3.3,0.722,0.781,0.245,0.251,108.65,107.61,89.67,89,0.164
4,21900263,1610612764,WAS,Washington,202738,Isaiah Thomas,Isaiah,G,,25:26,...,15.0,0.5,0.5,0.238,0.243,109.05,107.58,89.65,58,0.052
5,21900263,1610612764,WAS,Washington,1629021,Moritz Wagner,Moritz,,,18:12,...,23.1,0.786,0.786,0.25,0.255,105.55,105.49,87.91,40,0.095
6,21900263,1610612764,WAS,Washington,203895,Jordan McRae,Jordan,,,24:20,...,8.3,0.688,0.688,0.145,0.154,110.54,112.44,93.7,58,0.075
7,21900263,1610612764,WAS,Washington,202722,Davis Bertans,Davis,,,27:23,...,7.1,0.778,0.81,0.162,0.168,108.85,109.56,91.3,62,0.089
8,21900263,1610612764,WAS,Washington,202397,Ish Smith,Ish,,,25:38,...,9.5,0.818,0.852,0.23,0.237,106.66,105.8,88.17,56,0.174
9,21900263,1610612764,WAS,Washington,1629067,Isaac Bonga,Isaac,,,4:18,...,0.0,0.0,0.0,0.0,0.0,102.25,100.47,83.72,9,0.016
