In [None]:
#For documentation on using this program refer to: https://github.com/RohanPankaj/NBA-Prediction/blob/master/docs/Team-Prediction.md

In [None]:
#imports
import pandas as pd 
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import time
import matplotlib.pyplot as plt
import math

import nba_api
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import playercareerstats, leaguegamefinder, playerdashboardbyclutch, playergamelogs, commonplayerinfo, teamplayeronoffdetails, teamgamelogs


In [None]:
class Team(object):
    def __init__(self, team_abbreviation):
        
        try:
            self.team_abbreviation = team_abbreviation
            print("confirmed", self.team_abbreviation)
        except:
            print(self.team_abbreviation, " is not a valid team abbreviation")
        self.TeamID =  int(teams.find_team_by_abbreviation(self.team_abbreviation)["id"])
        self.TeamName = teams.find_team_name_by_id(team_id=self.TeamID)['full_name']
        self.StartYear = 2009 #First year data is avalible
        self.LastYear = 2021 #Current year
        
        
    #Complile the Team Data
    def compile_active_team_data(self):
    
        # Lists for the data feilds. Later these will be combined into a DataFrame
        team_season_point_average_raw_data = []
        team_points_raw_data = []
        opponent_season_point_average_raw_data = []
        opponent_season_points_raw_data = []
        
        team_season_FT_average_specific_raw_data = []
        team_season_FG_average_specific_raw_data = []
        team_season_FG3_average_specific_raw_data = []
        
        team_season_FTA_average_specific_raw_data = []
        team_season_FGA_average_specific_raw_data = []
        team_season_FG3A_average_specific_raw_data = []
        
        opponent_season_FT_average_raw_data = []
        opponent_season_FG_average_raw_data = []
        opponent_season_FG3_average_raw_data = []
            
        
        for year in range((self.StartYear), (self.LastYear)):
            print("-------------------------------------------------")
            season = str(year) + "-" + str(((year+1)-2000)) #format the season correctly for the API
            
            teamLogs_regular = teamgamelogs.TeamGameLogs(team_id_nullable=self.TeamID, season_nullable=season).get_data_frames()[0]
            teamLogs_playoffs = teamgamelogs.TeamGameLogs(team_id_nullable=self.TeamID, season_nullable=season, season_type_nullable="Playoffs").get_data_frames()[0]
            teamLogs = pd.concat([teamLogs_playoffs, teamLogs_regular]).reset_index(drop = True)
            
            print(season)
            
            print(teamLogs)
            matchup_api_data = teamLogs["MATCHUP"].copy()
            team_season_points_api_data=teamLogs["PTS"].copy()
            self.team_abbreviation = teamLogs["TEAM_ABBREVIATION"].copy()[0]
            

            team_season_points = []
            cycle_count = 0
            for game in reversed(range(0, len(teamLogs))):
                   
                    #Isolate the opponent abbreviation
                    try:   
                        matchup_api_data[game] = matchup_api_data[game].replace(self.team_abbreviation, "")
                    except:
                        print("Unexpected Error with removing player team")
                    try:
                        matchup_api_data[game] = matchup_api_data[game].replace(" @ ", "")
                        matchup_api_data[game] = matchup_api_data[game].replace(" vs. ", "")
                    except:
                        print("Error while isolating the opponent abbreviation")
                    
                    opponentAbbreviation = matchup_api_data[game]
                    
                    #Account for special cases where team names have changed since 2009
                    matchup_api_data[game] = matchup_api_data[game].replace('NOH', 'NOP')
                    matchup_api_data[game] = matchup_api_data[game].replace('NJN', 'BKN')
                    
                    #get the opponent ID
                    opponentAbbreviation_for_search = matchup_api_data[game]    
                    opponentID = int(teams.find_team_by_abbreviation(opponentAbbreviation_for_search)["id"])
                    
                    
                    if (game%10==0):
                        game_processed = cycle_count*10
                        print("processing game {} out of {} for the {} season".format(game_processed, len(teamLogs), season))
                        cycle_count+=1
                   
                    opponentLogs_regular = teamgamelogs.TeamGameLogs(team_id_nullable=opponentID, season_nullable=season).get_data_frames()[0] #get Team logs for the opposing team
                    opponentLogs_playoffs = teamgamelogs.TeamGameLogs(team_id_nullable=opponentID, season_nullable=season, season_type_nullable="Playoffs").get_data_frames()[0]
                    opponentLogs = pd.concat([opponentLogs_playoffs, opponentLogs_regular]).reset_index(drop = True)
                    
                    opponent_season_points_api_data = opponentLogs['PTS']
                    
                  
                    team_points_raw_data.append(team_season_points_api_data[game])
                    team_season_points.append(team_season_points_api_data[game])
                    
                    
                        
                    
                    past_games = teamLogs.tail(len(teamLogs) - game).copy()
                    
                    
                    
                    if (len(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation)])]["FTM"]) > 1):
                        past_games = teamLogs.tail((len(teamLogs) - game) - 1).copy()
                    

                    team_season_FT_average_specific_raw_data.append(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation)])]["FTM"].mean())
                    team_season_FG_average_specific_raw_data.append(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation)])]["FGM"].mean())
                    team_season_FG3_average_specific_raw_data.append(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation)])]["FG3M"].mean())
                    
                    team_season_FTA_average_specific_raw_data.append(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation)])]["FTA"].mean())
                    team_season_FGA_average_specific_raw_data.append(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation)])]["FGA"].mean())
                    team_season_FG3A_average_specific_raw_data.append(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation)])]["FG3A"].mean())
                    
                    opponent_season_FT_average_raw_data.append(opponentLogs['FTM'].mean())
                    opponent_season_FG_average_raw_data.append(opponentLogs['FGM'].mean())
                    opponent_season_FG3_average_raw_data.append(opponentLogs['FG3M'].mean())
                    
                    if game == 0:
                       
                        opponent_season_point_average_raw_data.append(opponent_season_points_api_data[game])
                        team_season_point_average_raw_data.append(teamLogs["PTS"][game])
                        
                        
                    elif game<(len(opponentLogs)):
                        opponent_season_point_average_raw_data.append(opponent_season_points_api_data.head(game).mean())
                        team_season_point_average_raw_data.append((sum(team_season_points)/len(team_season_points)))
                                              
                        
                    else:
                        opponent_season_point_average_raw_data.append(opponent_season_points_api_data.mean())
                        team_season_point_average_raw_data.append((sum(team_season_points)/len(team_season_points)))
                    
                                            
                    time.sleep(1)
                    

        #Lengths of lists used for troublshooting in case of error
        print("the lengths")
        print(len(team_points_raw_data))
        print(len(team_season_point_average_raw_data))
        print(len(opponent_season_point_average_raw_data))
        print(len(team_season_FT_average_specific_raw_data)) 
        print(len(team_season_FG_average_specific_raw_data)) 
        print(len(team_season_FG3_average_specific_raw_data)) 
        print(len(team_season_FTA_average_specific_raw_data)) 
        print(len(team_season_FGA_average_specific_raw_data)) 
        print(len(team_season_FG3A_average_specific_raw_data))
        print(len(opponent_season_FT_average_raw_data))
        print(len(opponent_season_FG_average_raw_data))
        print(len(opponent_season_FG3_average_raw_data))
       
        #adding the lists to the team data dictionary to be made into a dataframe
        team_data = {
        "Points Scored": team_points_raw_data,
        "Team Point Average" : team_season_point_average_raw_data,
        "Opponent Team Point Average": opponent_season_point_average_raw_data, 
        "Team Season FTM Average Specific": team_season_FT_average_specific_raw_data,
        "Team Sesason FGM Average Specific": team_season_FG_average_specific_raw_data,
        "Team Season FG3M Average Specific": team_season_FG3_average_specific_raw_data,
        "Team Season FTA Average Specific": team_season_FTA_average_specific_raw_data,
        "Team Sesason FGA Average Specific": team_season_FGA_average_specific_raw_data,
        "Team Season FG3A Average Specific": team_season_FG3A_average_specific_raw_data,
        "Opponent Season FTM Average": opponent_season_FT_average_raw_data,
        "Opponent Season FGM Average": opponent_season_FG_average_raw_data,
        "Opponent Season FG3M Average":opponent_season_FG3_average_raw_data
        }

        team_dataFrame = pd.DataFrame(data=team_data)
        

        return(team_dataFrame)


       
    #Get the Team data for outside calls
    def get_active_team_data(self):
        return(self.compile_active_team_data())
    def compile_data_for_prediction(self, opponentAbbreviation_for_prediction):
        
        season = "2020-21"
        teamLogs_regular = teamgamelogs.TeamGameLogs(team_id_nullable=self.TeamID, season_nullable=season).get_data_frames()[0]
        teamLogs_playoffs = teamgamelogs.TeamGameLogs(team_id_nullable=self.TeamID, season_nullable=season, season_type_nullable="Playoffs").get_data_frames()[0]
        teamLogs = pd.concat([teamLogs_playoffs, teamLogs_regular]).reset_index(drop = True)
        
        team_season_points_api_data = teamLogs["PTS"]
       
        team_season_point_average = [team_season_points_api_data.mean()]
        
        
        opponentID = int(teams.find_team_by_abbreviation(opponentAbbreviation_for_prediction)["id"])
        opponentLogs_regular = teamgamelogs.TeamGameLogs(team_id_nullable=opponentID, season_nullable=season).get_data_frames()[0] #get Team logs for the opposing team
        opponentLogs_playoffs = teamgamelogs.TeamGameLogs(team_id_nullable=opponentID, season_nullable=season, season_type_nullable="Playoffs").get_data_frames()[0]
        opponentLogs = pd.concat([opponentLogs_playoffs, opponentLogs_regular]).reset_index(drop = True)
        
        opponent_season_points_api_data = opponentLogs["PTS"]
        opponent_season_point_average = [opponent_season_points_api_data.mean()]

        past_games = teamLogs.copy()
        
        if math.isnan(past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FTM"].mean()):
            teamLogs = teamgamelogs.TeamGameLogs(team_id_nullable=self.TeamID, season_nullable="2019-20")
            past_games = teamLogs.copy()       
            
        team_season_FT_average_specific = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FTM"].mean())
        team_season_FG_average_specific = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FGM"].mean())
        team_season_FG3_average_specific = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FG3M"].mean())
        
        team_season_FTA_average_specific = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FTA"].mean())
        team_season_FGA_average_specific = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FGA"].mean())
        team_season_FG3A_average_specific = (past_games.loc[past_games['MATCHUP'].isin(["{} @ {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction),"{} vs. {}".format(self.team_abbreviation, opponentAbbreviation_for_prediction)])]["FG3A"].mean())
        
        opponent_season_FT_average = opponentLogs["FTM"].mean()
        opponent_season_FG_average = opponentLogs["FGM"].mean()
        opponent_season_FG3_average = opponentLogs["FG3M"].mean()
        
       
       
        #the lengths
        
        prediction_data = {
        "Team Point Average" : team_season_point_average,
        "Opponent Team Point Average": opponent_season_point_average, 
        "Team Season FTM Average Specific": team_season_FT_average_specific,
        "Team Sesason FGM Average Specific": team_season_FG_average_specific,
        "Team Season FG3M Average Specific": team_season_FG3_average_specific,
        "Team Season FTA Average Specific": team_season_FTA_average_specific,
        "Team Sesason FGA Average Specific": team_season_FGA_average_specific,
        "Team Season FG3A Average Specific": team_season_FG3A_average_specific,
        "Opponent Season FTM Average": opponent_season_FT_average,
        "Opponent Season FGM Average": opponent_season_FG_average,
        "Opponent Season FG3M Average": opponent_season_FG3_average
        }
        prediction_dataframe = pd.DataFrame(data=prediction_data)
        print(prediction_dataframe)
        return(prediction_dataframe)
        
    def get_data_for_prediction(self, opponentAbbreviation_for_prediction):
        return(self.compile_data_for_prediction(opponentAbbreviation_for_prediction))
        
    



In [None]:
class Prediction(object):
    
    def __init__(self, data):
        self.data = data.copy()
        
    #Normalize the data
    def sigmoid(self, dataframe):
        
        return((dataframe - self.train_stats['mean'])/self.train_stats['std'])
        
    #Defining the model
    def build_model(self):
        
        input_layer=tf.keras.layers.Input(([len(self.train_dataset.keys())]))
        densel_layer = tf.keras.layers.Dense(units=1, input_shape=([len(self.train_dataset.keys())],))
        output = densel_layer(input_layer)
        model = tf.keras.Model(inputs=input_layer,outputs=output)

        model.compile(loss="mse",optimizer=tf.keras.optimizers.Adam(0.01), metrics=['mae', 'mse'])

        return model
        
    def make_prediction(self, prediction_data):
        
        #set up train and testing data
        self.train_dataset = self.data.sample(frac=0.90, random_state=0)
        self.test_dataset = self.data.drop(self.train_dataset.index)
    
        self.train_labels = self.train_dataset.pop("Points Scored")
        self.test_labels = self.test_dataset.pop("Points Scored")
        
        self.train_stats = self.train_dataset.describe()
        #self.train_stats.pop("Points Scored")
        self.train_stats = self.train_stats.transpose()
        
        #normalize the data
        normed_train_data = self.sigmoid(self.train_dataset)
        normed_test_data = self.sigmoid(self.test_dataset)

        #build the model
        model = self.build_model()
        model.summary()
        example_batch = normed_train_data
        print((example_batch.keys()))
        example_result = model.predict(example_batch)
        example_result

        EPOCHS = 1000
        early_stop = keras.callbacks.EarlyStopping(monitor='loss', patience=10)
        history = model.fit(normed_train_data, self.train_labels, epochs=EPOCHS, callbacks=[early_stop])

        #get key metrics
        loss, mae, mse = model.evaluate(normed_test_data, self.test_labels, verbose=0)

        print("Testing set Mean Abs Error: {:5.2f} Points Scored".format(mae))
        
        #make predictions and then plot them in realation to the actual values
        test_predictions = model.predict(normed_test_data).flatten()

        plt.scatter(self.test_labels, test_predictions)
        plt.xlabel("True Values [Points Scored]")
        plt.ylabel("Predictions [Points Scored]")
        plt.axis('equal')
        plt.axis('square')
        plt.xlim([60, plt.ylim()[1]])
        plt.ylim([60, plt.ylim()[1]])
        _ = plt.plot([-100,200], [-100, 200])

        
        
        #print tests for reference
        print(self.test_labels)
        print(test_predictions)
        
        #make prediction
        normed_prediction_data = self.sigmoid(prediction_data)
        game_prediction = model.predict(normed_prediction_data).flatten()
        
        return(game_prediction, mae) #return predicted values


In [None]:
team1_abbreviation = str(input("What is the abbreviation of the 1st team that is playing: "))
team2_abbreviation = str(input("What is the abbreviation of the 2nd team that is playing: "))

team1 = Team(team1_abbreviation)
team2 = Team(team2_abbreviation)

team1_data = team1.get_active_team_data() 
team2_data = team2.get_active_team_data() 

team1_prediction_class = Prediction(team1_data)
team2_prediction_class = Prediction(team2_data)



print("--------------------------Prediction for {}--------------------------".format(team1_abbreviation))

team1_prediction_data = team1.get_data_for_prediction(team2_abbreviation)
team1_prediction, team1_margin = team1_prediction_class.make_prediction(team1_prediction_data)

      
print("--------------------------Prediction for {}--------------------------".format(team2_abbreviation))

team2_prediction_data = team2.get_data_for_prediction(team1_abbreviation)
team2_prediction, team2_margin = team2_prediction_class.make_prediction(team2_prediction_data)

print("-------------------------------------------------------------------------------------------------")
print("{} is predicted to score: {} ± {}".format(team1_abbreviation, team1_prediction, team1_margin))
print("{} is predicted to score: {} ± {}".format(team2_abbreviation, team2_prediction, team2_margin))

print("-------------------------------------------------------------------------------------------------")

