This script is my personal exploration into data classes in python for the future purpose of pulling NBA game data for input into a moneyline prediction model. 

In [None]:
#Run to install the NBA STATS API
import sys
!{sys.executable} -m pip install nba_api

In [1]:
import warnings
warnings.filterwarnings("ignore")

from dataclasses import dataclass
import pandas as pd
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import cumestatsteamgames, cumestatsteam, gamerotation
import pandas as pd
import json
import time
import numpy as np
import datetime as dt

# Class 1 - Gets data to build the team and games datasets

In [2]:
# Get a single season's schedule for all teams

@dataclass
class rawGameData():
    begYr: int # How do I throw an error if input is greater than current date?
    teams = pd.DataFrame(teams.get_teams())
    games = pd.DataFrame()
    
    def getGames(self):
        seasons = []
        for i in range(self.begYr,self.begYr + 1): # Add condition to account for seasons that don't exist
            seasons.append(str(i)+"-"+str(i+1)[-2:])
        print(seasons)
        
        # This code will get the games for a given season
        for i in range(0,len(self.teams)-1):
            for season in seasons:
                time.sleep(1) # throttle ourselves
                # Pull games from NBA stats API
                teamGames = cumestatsteamgames.CumeStatsTeamGames(league_id = '00',season = season ,
                                                                  season_type_all_star='Regular Season',
                                                                  team_id = self.teams['id'][i]).get_normalized_json()
                teamGames = pd.DataFrame(json.loads(teamGames)['CumeStatsTeamGames'])
                teamGames['SEASON'] = season
                self.games = self.games.append(teamGames)
        self.games['DATE'] = pd.to_datetime(self.games['MATCHUP'].astype(str).str[0:10])
        self.games['MATCHUP'] = self.games['MATCHUP'].astype(str).str[10:]
        
        # This code identifies home and away team for each game
        
        #add columns for home and away teams
        self.games['HomeTeamID'] = 0
        self.games['AwayTeamID'] = 0
        self.games = self.games.reset_index().drop(['index'],axis=1)
        
        # for each game id get the home and away team's ids 
        for index,row in self.games.iterrows():
            #print(index,row['GAME_ID'])
            time.sleep(1) # throttle self
            try:
                data = json.loads(gamerotation.GameRotation(league_id = "00",game_id = row['GAME_ID']).get_normalized_json())
            except:
                try:
                    data = json.loads(gamerotation.GameRotation(game_id = row['GAME_ID'],league_id = "00").get_normalized_json())
                except:
                    try:
                        data = gamerotation.GameRotation(game_id = row['GAME_ID'],league_id = "00")
                        data = data.get_normalized_json()
                        data = json.loads(data)
                    except:
                        print(row['GAME_ID'],row['DATE'])
                        pass
            #Home and Away Team IDs
            try:
                AwayTeam = data['AwayTeam'][0]['TEAM_ID']
                HomeTeam = data['HomeTeam'][0]['TEAM_ID']
            except:
                pass
            
            
            self.games.at[index,'HomeTeamID'] = data['HomeTeam'][0]['TEAM_ID']
            self.games.at[index,'AwayTeamID'] = data['AwayTeam'][0]['TEAM_ID']
            
        self.games.to_csv('GamesInfo.csv')
        return self.games 

        
        
    

# Class 2 - Uses dataset from Class 1 to create final dataset 

In [3]:
#Get Basic Data and Calculate Efficiency Metrics 

@dataclass
class gameModelData():
    games : pd.DataFrame()
        
    def getBasicData(self):
        self.games['HomePts'] = 0
        self.games['HomeWinFlag'] = 0
        self.games['HomeScoringMargin'] = 0
        self.games['HomeOffensiveEfficiency'] = 0.00
        
        self.games['AwayPts'] = 0
        self.games['AwayWinFlag'] = 0
        self.games['AwayScoringMargin'] = 0
        self.games['AwayOffensiveEfficiency'] = 0.00
        
        # for every team and game in a season pull stats to calculate efficiency metrics
        for index,row in self.games.iterrows(): 
            #print(index,row['GAME_ID'])
            time.sleep(1) #Throttle Self
            data = cumestatsteam.CumeStatsTeam(game_ids=row['GAME_ID'],league_id ="00",
                                           season=row['SEASON'],season_type_all_star="Regular Season",
                                           team_id = row['HomeTeamID']).get_normalized_json() #Pull basic stats form
            
            # Data conversion of API into pandas dataframe
            homeData = pd.DataFrame(json.loads(data)['TotalTeamStats'])
            
            
            if homeData.iloc[0]['TEAM_ID'] == row['HomeTeamID']: #Ensure IDs Match, then pull data
                self.games.at[index,'HomePts'] = homeData.iloc[0]['PTS']
                self.games.at[index,'HomeWinFlag'] = homeData.iloc[0]['W']

                #Make Sure There are 2 Datasets
        
                try:
                    self.games.at[index,'AwayPts'] = homeData.iloc[1]['PTS']
                    self.games.at[index,'AwayWinFlag'] = homeData.iloc[1]['W']

                    #Scoring Margin
                    self.games.at[index,'HomeScoringMargin'] = self.games.at[index,'HomePts'] - self.games.at[index,'AwayPts']
                    self.games.at[index,'AwayScoringMargin'] = self.games.at[index,'AwayPts'] - self.games.at[index,'HomePts']

                    #Offensive efficiency
                    self.games.at[index,'HomeOffensiveEfficiency'] = (homeData.iloc[0]['FG'] + homeData.iloc[0]['AST'])/(homeData.iloc[0]['FGA']-homeData.iloc[0]['OFF_REB']+homeData.iloc[0]['AST']+homeData.iloc[0]['TOTAL_TURNOVERS'])
                    #print(self.games.at[index,'HomeOffensiveEfficiency'])
                    self.games.at[index,'AwayOffensiveEfficiency'] = (homeData.iloc[1]['FG'] + homeData.iloc[1]['AST'])/(homeData.iloc[1]['FGA']-homeData.iloc[1]['OFF_REB']+homeData.iloc[1]['AST']+homeData.iloc[1]['TOTAL_TURNOVERS'])   
    
    
                
                except:
                    pass
                
            
        self.games.to_csv('basicData.csv')
        
        return print("basic load complete")
    
    def getAggData(self):
        # Home Team Aggregate Stats
        self.games = self.games.drop_duplicates()
        self.games['TotalHomeGame'] = self.games.groupby(['HomeTeamID'])['DATE'].rank(ascending=True)
        self.games['NumHomeWins'] = self.games.sort_values(by='DATE').groupby(['HomeTeamID'])['HomeWinFlag'].cumsum()
        self.games['HomeWinPctg'] = self.games['NumHomeWins'] / self.games['TotalHomeGame']
        self.games['HomeRollingOE'] = self.games.sort_values(by='DATE').groupby(['HomeTeamID'])['HomeOffensiveEfficiency'].rolling(3).mean().reset_index(0,drop=True)
        self.games['HomeAvgScoringMargin'] = self.games.sort_values(by='DATE').groupby(['HomeTeamID'])['HomeScoringMargin'].rolling(5).mean().reset_index(0,drop=True)
        self.games['HomeNxtGameWinPctg'] = self.games.sort_values(by='DATE').groupby(['HomeTeamID'])['HomeWinPctg'].shift(1)
        self.games['HomeNxtGameOE'] = self.games.sort_values(by='DATE').groupby(['HomeTeamID'])['HomeRollingOE'].shift(1)
        self.games['HomeNxtGameASM'] = self.games.sort_values(by='DATE').groupby(['HomeTeamID'])['HomeAvgScoringMargin'].shift(1)

        # Away Team Aggregate Stats
        self.games['TotalAwayGame'] = self.games.groupby(['AwayTeamID'])['DATE'].rank(ascending=True)
        self.games['NumAwayWins'] = self.games.sort_values(by='DATE').groupby(['AwayTeamID'])['AwayWinFlag'].cumsum()
        self.games['AwayWinPctg'] = self.games['NumAwayWins'] / self.games['TotalAwayGame']
        self.games['AwayRollingOE'] = self.games.sort_values(by='DATE').groupby(['HomeTeamID'])['HomeOffensiveEfficiency'].rolling(3).mean().reset_index(0,drop=True)
        self.games['AwayAvgScoringMargin'] = self.games.sort_values(by='DATE').groupby(['AwayTeamID'])['AwayScoringMargin'].rolling(5).mean().reset_index(0,drop=True)
        self.games['AwayNxtGameWinPctg'] = self.games.sort_values(by='DATE').groupby(['AwayTeamID'])['AwayWinPctg'].shift(1)
        self.games['AwayNxtGameOE'] = self.games.sort_values(by='DATE').groupby(['AwayTeamID'])['AwayRollingOE'].shift(1)
        self.games['AwayNxtGameASM'] = self.games.sort_values(by='DATE').groupby(['AwayTeamID'])['AwayAvgScoringMargin'].shift(1)

        return print("Aggregation Dataset Complete")
            
            

In [4]:
start = time.perf_counter_ns()

# From rawGameData class
data = rawGameData(2021);
games = data.getGames();

#from gameModelData class
seasonData = gameModelData(games);
seasonData.getBasicData();
seasonData.getAggData();

end  = time.perf_counter_ns()

# How long it took me to run in milliseconds
print((end - start)/1000000) 


['2021-22']
0022100836 2022-02-10 00:00:00
0022100822 2022-02-08 00:00:00
basic load complete
Aggregation Dataset Complete
8010283.9245


In [5]:
seasonData.games

Unnamed: 0,MATCHUP,GAME_ID,SEASON,DATE,HomeTeamID,AwayTeamID,HomePts,HomeWinFlag,HomeScoringMargin,HomeOffensiveEfficiency,...,HomeNxtGameOE,HomeNxtGameASM,TotalAwayGame,NumAwayWins,AwayWinPctg,AwayRollingOE,AwayAvgScoringMargin,AwayNxtGameWinPctg,AwayNxtGameOE,AwayNxtGameASM
0,Hawks at Rockets,0022101221,2021-22,2022-04-10,1610612745,1610612737,114,0,-16,0.565217,...,0.567595,0.0,41.0,16,0.390244,0.577139,5.8,0.375000,0.613360,-1.6
1,Hawks at Heat,0022101207,2021-22,2022-04-08,1610612748,1610612737,113,1,4,0.571429,...,0.579226,3.0,40.0,15,0.375000,0.613360,-1.6,0.384615,0.556935,0.4
2,Wizards at Hawks,0022101192,2021-22,2022-04-06,1610612737,1610612764,118,1,15,0.589744,...,0.596684,10.4,40.0,14,0.350000,0.583495,-9.6,0.358974,0.582609,-10.2
3,Hawks at Raptors,0022101182,2021-22,2022-04-05,1610612761,1610612737,118,1,10,0.587156,...,0.539377,14.8,39.0,15,0.384615,0.556935,0.4,0.394737,0.576556,0.4
4,Nets at Hawks,0022101163,2021-22,2022-04-02,1610612737,1610612751,122,1,7,0.534653,...,0.596432,10.8,40.0,23,0.575000,0.596684,13.4,0.589744,0.528148,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2279,Hornets at Wizards,0022100254,2021-22,2021-11-22,1610612764,1610612766,103,0,-6,0.513043,...,0.554387,6.8,11.0,5,0.454545,0.544846,-2.2,0.400000,0.620900,-9.4
2282,Wizards at Hornets,0022100213,2021-22,2021-11-17,1610612766,1610612764,97,1,10,0.595041,...,0.546892,2.0,7.0,4,0.571429,0.561239,1.4,0.666667,0.500818,0.6
2317,Pistons at Hornets,0022100915,2021-22,2022-02-27,1610612766,1610612765,126,0,-1,0.588235,...,0.571798,-1.2,31.0,6,0.193548,0.587769,-10.0,0.166667,0.556124,-13.4
2322,Hornets at Pistons,0022100838,2021-22,2022-02-11,1610612765,1610612766,119,0,-22,0.588235,...,0.525293,-9.0,31.0,15,0.483871,0.536799,9.0,0.466667,0.615543,6.6
