In [1]:
#For documentation on using this program refer to: https://github.com/RohanPankaj/NBA-Prediction/blob/master/docs/Team-Prediction.md

In [2]:
#imports
import pandas as pd 
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import time
import matplotlib.pyplot as plt
import math

import nba_api
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import playercareerstats, leaguegamefinder, playerdashboardbyclutch, playergamelogs, commonplayerinfo, teamplayeronoffdetails, teamgamelogs


In [3]:
headers = {
    'Host': 'stats.nba.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en-US,en;q=0.5',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
}

In [9]:
class Team(object):
    def __init__(self, team_abbreviation):
        
        self.team_abbreviation = team_abbreviation
        self.TeamID =  int(teams.find_team_by_abbreviation(self.team_abbreviation)["id"])
         
        self.TeamName = teams.find_team_name_by_id(team_id=self.TeamID)['full_name']

        self.StartYear = 2009 #First year data is avalible
        self.LastYear = 2021 #Current year
        
        
        
        

        
    #Complile the Team Data
    def compile_active_team_data(self):
        

    
    
        team_season_point_average_raw_data = []
        team_points_raw_data = []
        opponent_season_point_average_raw_data = []
        opponent_season_points_raw_data = []
        
        team_season_FT_average_raw_data = []
        team_season_FG_average_raw_data = []
        team_season_FG3_average_raw_data = []
        opponent_season_FT_average_raw_data = []
        opponent_season_FG_average_raw_data = []
        opponent_season_FG3_average_raw_data = []
            
        
        print(self.StartYear, self.LastYear)
        for year in range((self.StartYear), (self.LastYear)):
            print("-------------------------------------------------")
            season = str(year) + "-" + str(((year+1)-2000))
            teamLogs = teamgamelogs.TeamGameLogs(team_id_nullable=self.TeamID, season_nullable=season)
            print(season)
            

            matchup_api_data = teamLogs.get_data_frames()[0]["MATCHUP"]
            team_season_points_api_data=teamLogs.get_data_frames()[0]["PTS"]
        

            print(teamLogs.get_data_frames()[0])


            team_season_points = []
            
            
            for game in range(0, len(teamLogs.get_data_frames()[0])):
        
                        
                    #Accounting for special cases where team names have changed since 2009
                    if (self.team_abbreviation == 'BKN' and matchup_api_data[game].find("NJN") != -1):
                        self.team_abbreviation = 'NJN'
                    elif (self.team_abbreviation == 'NOP' and matchup_api_data[game].find("NOH") != -1):
                        self.team_abbreviation = 'NOH'
                    else: 
                        self.team_abbreviation = teams.find_team_name_by_id(team_id=self.TeamID)['abbreviation']
                    
                    
                    #Isoltate the opponent Abbreviation 
                    try:   
                        
                        matchup_api_data[game] = matchup_api_data[game].replace(self.team_abbreviation, "")
                        
                    except:
                        print("Unexpected Error with removing player team")
                    try:
                        matchup_api_data[game] = matchup_api_data[game].replace(" @ ", "")
                        matchup_api_data[game] = matchup_api_data[game].replace(" vs. ", "")
                    except:
                        print("Error")
                    
                    
                    
                    opponentAbbreviation = matchup_api_data[game]
                    
                    matchup_api_data[game] = matchup_api_data[game].replace('NOH', 'NOP')
                    matchup_api_data[game] = matchup_api_data[game].replace('NJN', 'BKN')
                    
                    opponentAbbreviation_for_search = matchup_api_data[game]
                    print(opponentAbbreviation_for_search)                                                        
                    opponentID = int(teams.find_team_by_abbreviation(opponentAbbreviation_for_search)["id"])
                    if (game%10==0):
                        print(game)
                        
                    opponentLogs = teamgamelogs.TeamGameLogs(team_id_nullable=opponentID, season_nullable=season, headers=headers)
                    opponent_season_points_api_data = opponentLogs.get_data_frames()[0]['PTS']
                    
                    
                    team_points_raw_data.append(team_season_points_api_data[game])
                    team_season_points.append(team_season_points_api_data[game])
                    
                    past_games = teamLogs.get_data_frames()[0].head(game+1).copy() #
                    
                    team_season_FT_average_raw_data.append(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation)])]["FTM"].mean())
                    team_season_FG_average_raw_data.append(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation)])]["FGM"].mean())
                    team_season_FG3_average_raw_data.append(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation)])]["FG3M"].mean())
                    
                    
                    if game == 0:
                        opponent_season_point_average_raw_data.append(opponent_season_points_api_data[game])
                        team_season_point_average_raw_data.append(teamLogs.get_data_frames()[0]["PTS"][game])
                        
                        
                    elif game<(len(opponentLogs.get_data_frames()[0]['PTS'])):
                        opponent_season_point_average_raw_data.append(opponent_season_points_api_data.head(game).mean())
                        team_season_point_average_raw_data.append((sum(team_season_points)/len(team_season_points)))
                                              
                        
                    else:
                        opponent_season_point_average_raw_data.append(opponent_season_points_api_data.mean())
                        team_season_point_average_raw_data.append((sum(team_season_points)/len(team_season_points)))
                    
                                            
                    time.sleep(1)
                    

        #Lengths of lists used for troublshooting
        print("the lengths")
        print(len(team_points_raw_data))
    
        print(len(team_season_point_average_raw_data))
        print(len(opponent_season_point_average_raw_data))
        print(len(team_season_FT_average_raw_data)) 
        print(len(team_season_FG_average_raw_data)) 
        print(len(team_season_FG3_average_raw_data)) 
        
       
        #adding the lists to the team data dictionary to be made into a dataframe
        team_data = {
        "Points Scored": team_points_raw_data,
        "Team Point Average" : team_season_point_average_raw_data,
        "Opponent Team Point Average": opponent_season_point_average_raw_data, 
        "Team Season FTM Average Specific": team_season_FT_average_raw_data,
        "Team Sesason FGM Average Specific": team_season_FG_average_raw_data,
        "Team Season FG3M Average Specific": team_season_FG3_average_raw_data,
        }

        team_dataFrame = pd.DataFrame(data=team_data)
        #print(team_dataFrame) #print for troublshooting

        return(team_dataFrame)


       
    #Get the Team data for outside calls
    def get_active_team_data(self):
        return(self.compile_active_team_data())
    def compile_data_for_prediction(self, opponentAbbreviation_for_prediction):
        
        season = "2020-21"
        teamLogs = teamgamelogs.TeamGameLogs(team_id_nullable=self.TeamID, season_nullable=season)
        team_season_points_api_data = teamLogs.get_data_frames()[0]["PTS"]
       
        team_season_point_average = [team_season_points_api_data.mean()]
        
        
        opponentID = int(teams.find_team_by_abbreviation(opponentAbbreviation_for_prediction)["id"])
        opponentLogs = teamgamelogs.TeamGameLogs(team_id_nullable=opponentID, season_nullable=season)
        opponent_season_points_api_data = opponentLogs.get_data_frames()[0]["PTS"]
        opponent_season_point_average = [opponent_season_points_api_data.mean()]

        past_games = teamLogs.get_data_frames()[0].copy()
        
        
        print(past_games.loc[past_games['MATCHUP']=="DEN"])
        print(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FTM"])
        team_season_FT_average = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FTM"].mean())
        team_season_FG_average = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FGM"].mean())
        team_season_FG3_average = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FG3M"].mean())
        
        if math.isnan(team_season_FT_average):
            print("-----Insufficent data for the current season. Pulling data from the previous season.-----")
            teamLogs = teamgamelogs.TeamGameLogs(team_id_nullable=self.TeamID, season_nullable="2019-20")
            past_games = teamLogs.get_data_frames()[0].copy()                                     
            team_season_FT_average = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FTM"].mean())
            team_season_FG_average = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FGM"].mean())
            team_season_FG3_average = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FG3M"].mean())
       
        
        
        prediction_data = {
        "Team Point Average" : team_season_point_average,
        "Opponent Team Point Average": opponent_season_point_average, 
        "Team Season FTM Average Specific": team_season_FT_average,
        "Team Sesason FGM Average Specific": team_season_FG_average,
        "Team Season FG3M Average Specific": team_season_FG3_average
        }
        prediction_dataframe = pd.DataFrame(data=prediction_data)
        print(prediction_dataframe)
        return(prediction_dataframe)
        
    def get_data_for_prediction(self, opponentAbbreviation_for_prediction):
        return(self.compile_data_for_prediction(opponentAbbreviation_for_prediction))
        
    



In [5]:
class Prediction(object):
    
    def __init__(self, data):
        self.data = data.copy()
        
    def sigmoid(self, dataframe):
        
        return((dataframe - self.train_stats['mean'])/self.train_stats['std'])
        #Defining the model
    def build_model(self):
        #below comented out portion is another model under testing
        
        input_layer=tf.keras.layers.Input(([len(self.train_dataset.keys())]))
        densel_layer = tf.keras.layers.Dense(units=1, input_shape=([len(self.train_dataset.keys())],))
        output = densel_layer(input_layer)
        model = tf.keras.Model(inputs=input_layer,outputs=output)

        model.compile(loss="mse",optimizer=tf.keras.optimizers.Adam(0.01), metrics=['mae', 'mse'])

     
        return model
        
    def test_prediction_accuracy(self ):
        self.train_dataset = self.data.sample(frac=0.90, random_state=0)
        self.test_dataset = self.data.drop(self.train_dataset.index)
    
        self.train_labels = self.train_dataset.pop("Points Scored")
        self.test_labels = self.test_dataset.pop("Points Scored")
        
        self.train_stats = self.train_dataset.describe()
        #self.train_stats.pop("Points Scored")
        self.train_stats = self.train_stats.transpose()
        
        normed_train_data = self.sigmoid(self.train_dataset)
        normed_test_data = self.sigmoid(self.test_dataset)

        model = self.build_model()
        model.summary()
        example_batch = normed_train_data
        print((example_batch.keys()))
        example_result = model.predict(example_batch)
        example_result

        EPOCHS = 1000
        early_stop = keras.callbacks.EarlyStopping(monitor='loss', patience=10)
        history = model.fit(normed_train_data, self.train_labels, epochs=EPOCHS, callbacks=[early_stop])

        loss, mae, mse = model.evaluate(normed_test_data, self.test_labels, verbose=0)

        print("Testing set Mean Abs Error: {:5.2f} Points Scored".format(mae))
        #make predictions and then plot them in realation to the actual values
        test_predictions = model.predict(normed_test_data).flatten()

        plt.scatter(self.test_labels, test_predictions)
        plt.xlabel("True Values [Points Scored]")
        plt.ylabel("Predictions [Points Scored]")
        plt.axis('equal')
        plt.axis('square')
        plt.xlim([60, plt.ylim()[1]])
        plt.ylim([60, plt.ylim()[1]])
        _ = plt.plot([-100,200], [-100, 200])

        print(self.test_labels)
        print(test_predictions)
        return(mae)
    def make_prediction(self, prediction_circumstances):
        self.train_dataset = self.data.copy()
        print(self.data)
        self.train_labels = self.train_dataset.pop("Points Scored")
        
        self.train_stats = self.train_dataset.describe()
        #self.train_stats.pop("Points Scored")
        self.train_stats = self.train_stats.transpose()
        
        normed_train_data = self.sigmoid(self.train_dataset)
        normed_prediction_data = self.sigmoid(prediction_circumstances)
        model = self.build_model()
        model.summary()
        
        #following not nessicary
        example_batch = normed_train_data
        print((example_batch.keys()))
        example_result = model.predict(example_batch)
        example_result
        
        EPOCHS = 1000
        early_stop = keras.callbacks.EarlyStopping(monitor='loss', patience=15)
        history = model.fit(normed_train_data, self.train_labels, epochs=EPOCHS, callbacks=[early_stop])
        
        prediction = model.predict(normed_prediction_data).flatten()
        print(prediction)
        return(prediction)

In [11]:
team1_abbreviation = str(input("What is the abbreviation of the 1st team that is playing: "))
team2_abbreviation = str(input("What is the abbreviation of the 2nd team that is playing: "))

team1 = Team(team1_abbreviation)
team2 = Team(team2_abbreviation)

team1_data = team1.get_active_team_data() 
team2_data = team2.get_active_team_data() 

team1_prediction_class = Prediction(team1_data)
team2_prediction_class = Prediction(team2_data)

print("--------------------------Testing Prediction Accuracy for {}--------------------------".format(team1_abbreviation))
team1_mae = team1_prediction_class.test_prediction_accuracy()

print("--------------------------Prediction for {}--------------------------".format(team1_abbreviation))

team1_prediction_data = team1.get_data_for_prediction(team2_abbreviation)
team1_prediction = team1_prediction_class.make_prediction(team1_prediction_data)

print("-------------------------------------------------------------------------------------------------")


print("--------------------------Testing Prediction Accuracy for {}--------------------------".format(team2_abbreviation))
team2_mae = team2_prediction_class.test_prediction_accuracy()
      
print("--------------------------Prediction for {}--------------------------".format(team2_abbreviation))

team2_prediction_data = team2.get_data_for_prediction(team1_abbreviation)
team2_prediction = team2_prediction_class.make_prediction(team2_prediction_data)

print("-------------------------------------------------------------------------------------------------")
print("{} is predicted to score: {} ± {} ".format(team1_abbreviation, team1_prediction, team1_mae))
print("{} is predicted to score: {} ± {} ".format(team2_abbreviation, team2_prediction, team2_mae))

print("-------------------------------------------------------------------------------------------------")



What is the abbreviation of the 1st team that is playing: BOS
What is the abbreviation of the 2nd team that is playing: DAL
2009 2021
-------------------------------------------------
2009-10
   SEASON_YEAR     TEAM_ID TEAM_ABBREVIATION       TEAM_NAME     GAME_ID  \
0      2009-10  1610612738               BOS  Boston Celtics  0020901218   
1      2009-10  1610612738               BOS  Boston Celtics  0020901213   
2      2009-10  1610612738               BOS  Boston Celtics  0020901191   
3      2009-10  1610612738               BOS  Boston Celtics  0020901177   
4      2009-10  1610612738               BOS  Boston Celtics  0020901159   
..         ...         ...               ...             ...         ...   
77     2009-10  1610612738               BOS  Boston Celtics  0020900053   
78     2009-10  1610612738               BOS  Boston Celtics  0020900040   
79     2009-10  1610612738               BOS  Boston Celtics  0020900022   
80     2009-10  1610612738               BOS  Bo

ReadTimeout: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)