In [None]:
# Created 6/28/2021
# Updated 9/27

import pandas as pd
import numpy as np
import nba_api.stats.endpoints as ep
import datetime
import re
from os import path

In [None]:
def lshelper(game_arr, gd):
    for x in range(len(game_arr)):
        gid = game_arr[x, 4]
        match = game_arr[x, 6]
        pm = game_arr[x, 27]
        if game_arr[x, 7] == 'W':
            winner = game_arr[x, 2]
        else:
            winner = game_arr[x, 6][-3:]
        gd[gid] = (match, pm, winner)
    return gd


class StatBucket():

    def __init__(self):
        self.clog = None
        self.log = None
        self.oslog = None
        self.outcomes = None
        self.data = None

    def update_log(self, season):
        result = ep.leaguegamefinder.LeagueGameFinder(season_nullable=season)
        all_games = result.get_data_frames()[0]
        rs = all_games[all_games.SEASON_ID == '2' + season[:4]]
        rs = rs[rs.GAME_ID.str[:3] == '002'] #may need to update
        os = all_games[all_games.SEASON_ID == '4' + season[:4]]
        os = os[os.GAME_ID.str[:3] == '004']
        self.log = rs
        self.oslog = os
        self.clog = rs.append(os)
    
    
    def get_log_stats(self):
        game_arr = self.log.to_numpy()
        os_arr = self.oslog.to_numpy()
        game_dat = dict()
        game_dat = lshelper(game_arr, game_dat)
        res = lshelper(os_arr, game_dat)        
        self.outcomes = res
        
        
def Season_csv(statfunc, gidset):
    first = gidset.pop()
    tstats = statfunc(game_id=first).get_data_frames()[1]
    for gid in gidset:
        game = statfunc(game_id=gid).get_data_frames()[1]
        tstats = tstats.append(game)
    return tstats

In [None]:
# FUNCTIONS
SEASONS = ['2010-11',
          '2009-10',
          '2008-09']

# ['2012-13',
#           '2013-14',
#           '2014-15',
#           '2015-16',
#           '2016-17',
#           '2017-18',
#           '2018-19',
#           '2019-20',
#           '2020-21']

FN = ['advanced',
     'fourfactors',
     'misc',
     'scoring']
#      'summary',
#      'traditional'

FD = {'advanced':ep.boxscoreadvancedv2.BoxScoreAdvancedV2,
     'fourfactors':ep.boxscorefourfactorsv2.BoxScoreFourFactorsV2,
     'misc':ep.boxscoremiscv2.BoxScoreMiscV2,
     'scoring':ep.boxscorescoringv2.BoxScoreScoringV2,
     'summary':ep.boxscoresummaryv2.BoxScoreSummaryV2,
     'traditional':ep.boxscoretraditionalv2.BoxScoreTraditionalV2
    }

In [None]:
# FOR MULTIPLE SEASONS
for i in range(len(SEASONS)):
    for j in FN:
        if path.exists(f'DATA/raw/{j}/{j}{SEASONS[i]}.csv'):
            continue
        sb = StatBucket()
        sb.update_log(SEASONS[i][:4])
        sb.get_log_stats()
        gidset = set(sb.outcomes.keys())
        ns = Season_csv(FD[j], gidset)
        ns.sort_values('TEAM_ID', inplace=True, kind='mergesort')
        ns.sort_values('GAME_ID', inplace=True, kind='mergesort')
        ns.to_csv(f'DATA/raw/{j}/{j}{SEASONS[i]}.csv')

In [None]:
## FOR ONE SEASON / FUNCTION
# UPDATE THIS CELL THEN RUN CELL BELOW
SEASON = '2020-21'
FUNC = 'scoring'

In [None]:
sb = StatBucket()
sb.update_log(SEASON[:4])
sb.get_log_stats()
gidset = set(sb.outcomes.keys())
ns = Season_csv(FD[FUNC], gidset)
ns = ns.sort_values('GAME_ID')
ns.to_csv(f'DATA/raw/{FUNC}/{FUNC}{SEASON}.csv')