## Import necessary libraries

In [1]:
import warnings
warnings.simplefilter('ignore')

In [2]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

import joblib

## Read and Display NBA data

In [3]:
raptordata = pd.read_csv(r'C:\Users\User\Downloads\nba_data_science_project\data\rawdata\latest_RAPTOR_by_team.csv')
nbadf = pd.read_csv(r"C:\Users\User\Downloads\nba_data_science_project\data\transformed_data\nba_16-23.csv")

In [5]:
# Display NBA players' game stats/data

raptordata.head()

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_box_offense,raptor_box_defense,raptor_box_total,...,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact
0,Precious Achiuwa,achiupr01,2023,RS,TOR,2328,1140,-2.43626,0.613139,-1.823122,...,-1.764278,-0.275741,-2.040019,0.409029,0.409029,0.0,-1.850761,-0.17528,-2.026041,-0.957109
1,Steven Adams,adamsst01,2023,RS,MEM,2391,1133,-0.537262,4.171893,3.634631,...,0.418716,3.548031,3.966747,3.891812,3.891812,0.0,0.027877,3.566829,3.594706,0.237911
2,Bam Adebayo,adebaba01,2023,PO,MIA,1192,598,-1.468046,1.692868,0.224823,...,-1.176961,2.366698,1.189737,1.230109,0.0,1.230109,0.216186,1.569094,1.78528,-0.761953
3,Bam Adebayo,adebaba01,2023,RS,MIA,5252,2598,-1.856952,2.661994,0.805041,...,-1.150384,2.71708,1.566696,5.696162,5.696162,0.0,-0.958213,2.653257,1.695044,-0.451875
4,Ochai Agbaji,agbajoc01,2023,RS,UTA,2604,1209,-1.328228,-2.197352,-3.525581,...,-1.014981,-1.963656,-2.978636,-0.141332,-0.141332,0.0,-1.157265,-2.504082,-3.661347,0.215671


In [6]:
# Get the unique categories of the 'season_type' column

raptordata['season_type'].unique()

array(['RS', 'PO'], dtype=object)

In [7]:
# Create dataframe containing data for only the Regular Season (RS)

rs_raptordata = raptordata.loc[raptordata['season_type'] == 'RS']

## Virtual Team

In [8]:
# Create virtual team of 15 randomly selected players

virtualteamdf = rs_raptordata.sample(n=15)
virtualteamdf

Unnamed: 0,player_name,player_id,season,season_type,team,poss,mp,raptor_box_offense,raptor_box_defense,raptor_box_total,...,raptor_offense,raptor_defense,raptor_total,war_total,war_reg_season,war_playoffs,predator_offense,predator_defense,predator_total,pace_impact
805,Zion Williamson,willizi01,2023,RS,NOP,2035,956,3.330455,-0.088071,3.242384,...,3.171139,0.41073,3.581869,3.120498,3.120498,0.0,1.802325,-0.483475,1.31885,1.049764
718,Jayson Tatum,tatumja01,2023,RS,BOS,5678,2732,4.0808,-0.523465,3.557335,...,4.433547,-0.730195,3.703352,8.989684,8.989684,0.0,5.933758,-0.304206,5.629552,-0.060893
47,Will Barton,bartowi01,2023,RS,WAS,1637,782,-1.197453,-4.870875,-6.068328,...,-1.967554,-4.329319,-6.296874,-1.413247,-1.413247,0.0,-1.651205,-4.25785,-5.909055,-0.133489
624,Neemias Queta,quetane01,2023,RS,SAC,70,29,-2.489666,-3.38005,-5.869716,...,-6.363588,-3.350069,-9.713657,-0.104016,-0.104016,0.0,-6.146549,-3.006482,-9.153031,0.963404
80,Brandon Boston Jr.,bostobr01,2023,RS,LAC,529,248,1.229145,-1.068228,0.160918,...,0.494293,-1.557353,-1.063059,0.213268,0.213268,0.0,-0.82022,-1.566644,-2.386864,-0.085262
143,Brandon Clarke,clarkbr01,2023,RS,MEM,2362,1090,-0.284359,0.30222,0.01786,...,-0.771216,0.573387,-0.197829,1.421004,1.421004,0.0,-0.899255,0.415492,-0.483763,0.120606
778,Peyton Watson,watsope01,2023,RS,DEN,383,186,-3.283507,-3.426445,-6.709952,...,-4.033737,-3.186268,-7.220006,-0.423544,-0.423544,0.0,-3.176776,-4.485027,-7.661802,-0.154013
277,AJ Griffin,griffaj01,2023,RS,ATL,2997,1401,0.091348,-1.074463,-0.983115,...,0.367446,-0.591416,-0.22397,1.79974,1.79974,0.0,0.580532,-1.132491,-0.55196,-0.326546
99,Bruce Brown,brownbr01,2023,RS,DEN,4712,2280,-0.77278,0.666994,-0.105786,...,-1.748989,0.478078,-1.270911,1.704732,1.704732,0.0,-1.327603,0.4263,-0.901303,-0.928488
612,Micah Potter,pottemi01,2023,RS,UTA,117,52,1.64775,2.031994,3.679744,...,-0.482602,3.489006,3.006405,0.153374,0.153374,0.0,-0.650274,1.968116,1.317842,0.432139


In [9]:
raptordata.columns

Index(['player_name', 'player_id', 'season', 'season_type', 'team', 'poss',
       'mp', 'raptor_box_offense', 'raptor_box_defense', 'raptor_box_total',
       'raptor_onoff_offense', 'raptor_onoff_defense', 'raptor_onoff_total',
       'raptor_offense', 'raptor_defense', 'raptor_total', 'war_total',
       'war_reg_season', 'war_playoffs', 'predator_offense',
       'predator_defense', 'predator_total', 'pace_impact'],
      dtype='object')

In [10]:
dropcols = ['player_name', 'player_id', 'season_type', 'poss',
       'mp', 'raptor_box_offense', 'raptor_box_defense', 'raptor_box_total',
       'raptor_onoff_offense', 'raptor_onoff_defense', 'raptor_onoff_total',
       'raptor_offense', 'raptor_defense', 'war_total',
       'war_reg_season', 'war_playoffs', 'predator_offense',
       'predator_defense', 'predator_total', 'pace_impact']

In [11]:
# Drop unnecessary columns

virtualteamdf = virtualteamdf.drop(columns=dropcols)
virtualteamdf.head()

Unnamed: 0,season,team,raptor_total
805,2023,NOP,3.581869
718,2023,BOS,3.703352
47,2023,WAS,-6.296874
624,2023,SAC,-9.713657
80,2023,LAC,-1.063059


In [12]:
# Ensure no missing/null values in the dataframe

display('NULL VALUES; ' , virtualteamdf.isnull().sum())

'NULL VALUES; '

season          0
team            0
raptor_total    0
dtype: int64

In [13]:
# Group the dataframe by the 'season' column
# Calculate the total/sum of the team's raptor rating

groupsum = virtualteamdf.groupby(['season'], as_index=False).sum()
groupsum.head()

Unnamed: 0,season,raptor_total
0,2023,-47.125651


In [14]:
# Display NBA 2016-2023 season data

nbadf.head()

Unnamed: 0,season,team,elo_rating,points,L,W,W%,conf,playoffs_y_n
0,2016,GSW,1800.0,9421.0,9,73,89.02,west,1
1,2016,SAS,1745.0,8490.0,15,67,81.71,west,1
2,2016,OKC,1630.0,9038.0,27,55,67.07,west,1
3,2016,LAC,1626.0,8569.0,29,53,64.63,west,1
4,2016,POR,1534.0,8622.0,38,44,53.66,west,1


In [15]:
# Create dataframe for the East Conference 2023 season
# Display statistical features of the numerical columns of the dataframe

nba2023 = nbadf.loc[(nbadf['season'] == 2023) & (nbadf['conf'] == 'east')]
nba2023.describe()

Unnamed: 0,season,elo_rating,points,L,W,W%,playoffs_y_n
count,15.0,15.0,15.0,15.0,15.0,15.0,15.0
mean,2023.0,1512.266667,9377.4,40.466667,41.933333,50.89,0.466667
std,0.0,83.362776,227.106583,11.224124,11.221578,13.677526,0.516398
min,2023.0,1346.0,9045.0,24.0,17.0,20.73,0.0
25%,2023.0,1453.0,9194.5,33.0,35.0,42.68,0.0
50%,2023.0,1527.0,9359.0,41.0,42.0,50.6,0.0
75%,2023.0,1563.0,9524.5,47.0,49.0,59.76,1.0
max,2023.0,1653.0,9827.0,65.0,58.0,70.73,1.0


In [16]:
# Generate random column values for virtual team using normal distribution
# Values used in the Numpy methods are gotten from the 'Mean' and 'Standard Deviation' of the respective columns in the NBA 2023 dataframe

points = np.random.normal(9377.400000, 227.106583, 1)
wins = np.random.normal(41.933333, 11.221578, 1)
losses = np.random.normal(40.466667, 11.224124, 1)
points, wins, losses

(array([8933.03315256]), array([37.54883873]), array([44.76679701]))

## Virtual NBA 2023 Season

* A virtual team of 15 randomly selected players was created and attached to the East Conference 2023 season.
* The virtual team was assigned the name 'The Avengers' with alias; (TAV).
* Random column values were generated to give the team a position in the 2023 league table.

In [17]:
# Generate column values

groupsum['team'] = 'TAV'
groupsum['points'] = points	
groupsum['L'] = losses
groupsum['W'] = wins
groupsum['W%'] = ((groupsum['W']/(groupsum['W'] + groupsum['L'])*100)).round(2)
groupsum['playoffs_y_n'] = np.nan
groupsum['conf'] = 'east'
groupsum.rename(columns={'raptor_total': 'elo_rating'}, inplace=True)

In [18]:
# Merge the virtual team dataframe with the 2023 season dataframe
# Sort the dataframe according to 'W%' to determine league positions

virtualnba2023 = pd.concat([nba2023, groupsum], axis=0, ignore_index=True)
virtualnba2023.sort_values('W%', ascending=False, ignore_index=True, inplace=True)
virtualnba2023

Unnamed: 0,season,team,elo_rating,points,L,W,W%,conf,playoffs_y_n
0,2023,MIL,1599.0,9589.0,24.0,58.0,70.73,east,1.0
1,2023,BOS,1653.0,9671.0,25.0,57.0,69.51,east,1.0
2,2023,PHI,1598.0,9448.0,28.0,54.0,65.85,east,1.0
3,2023,CLE,1577.0,9205.0,31.0,51.0,62.2,east,1.0
4,2023,NYK,1549.0,9514.0,35.0,47.0,57.32,east,1.0
5,2023,BRK,1541.0,9295.0,37.0,45.0,54.88,east,1.0
6,2023,MIA,1548.0,9184.0,39.0,45.0,53.57,east,1.0
7,2023,ATL,1518.0,9827.0,41.0,42.0,50.6,east,0.0
8,2023,TOR,1527.0,9359.0,42.0,41.0,49.4,east,0.0
9,2023,CHI,1493.0,9476.0,43.0,41.0,48.81,east,0.0


In [19]:
# Create function to dtermine which teams make it to playoffs

def playoffs(df):
    playoffslist = []
    for i in df.index.values.tolist():
        if i <= 6:
            playoffslist.append(1)
        else:
            playoffslist.append(0)
    df['playoffs_y_n'] = pd.Series(playoffslist)
    return df

In [20]:
# Determine teams that make it to playoffs

virtualnba2023 = playoffs(virtualnba2023)
virtualnba2023.head()

Unnamed: 0,season,team,elo_rating,points,L,W,W%,conf,playoffs_y_n
0,2023,MIL,1599.0,9589.0,24.0,58.0,70.73,east,1
1,2023,BOS,1653.0,9671.0,25.0,57.0,69.51,east,1
2,2023,PHI,1598.0,9448.0,28.0,54.0,65.85,east,1
3,2023,CLE,1577.0,9205.0,31.0,51.0,62.2,east,1
4,2023,NYK,1549.0,9514.0,35.0,47.0,57.32,east,1


## ML Predictions

* Predict the teams that make it to 2023 playoffs using the already trained models.

In [22]:
# Load the trained models

logreg_model = joblib.load(r'C:\Users\User\Downloads\nba_data_science_project\models\conference_qualifiers_models\east_log_model.h5')
rfc_model = joblib.load(r'C:\Users\User\Downloads\nba_data_science_project\models\conference_qualifiers_models\east_rfc_model.h5')

In [23]:
# Select columns for normalizing

modeldata = virtualnba2023[['elo_rating', 'points', 'L', 'W', 'W%']]

In [24]:
# Normalize the dataframe

scaler = MinMaxScaler(feature_range=(0,1))
scaleddata = scaler.fit_transform(modeldata)
scaleddf = pd.DataFrame(scaleddata)

In [25]:
# Predict teams that make it to playoffs
# Predict the probabilities of each team making it to playoffs

predictions_2023 = virtualnba2023[["season", "team", "playoffs_y_n"]]

log_probability = logreg_model.predict_proba(scaleddf)[:,1].tolist()
rfc_probability = rfc_model.predict_proba(scaleddf)[:,1].tolist()

log_prediction = logreg_model.predict(scaleddf).tolist()
rfc_prediction = rfc_model.predict(scaleddf).tolist()

predictions_2023["prediction_log"] = log_prediction
predictions_2023["prob_log"] = log_probability
predictions_2023["prediction_rfc"] = rfc_prediction
predictions_2023["prob_rfc"] = rfc_probability

predictions_2023.sort_values("prob_log", ascending = False)
predictions_2023.sort_values("prob_rfc", ascending = False)

Unnamed: 0,season,team,playoffs_y_n,prediction_log,prob_log,prediction_rfc,prob_rfc
0,2023,MIL,1,1,0.953257,1,1.0
1,2023,BOS,1,1,0.948451,1,1.0
2,2023,PHI,1,1,0.911651,1,1.0
3,2023,CLE,1,1,0.855989,1,1.0
4,2023,NYK,1,1,0.760297,1,1.0
5,2023,BRK,1,1,0.686066,1,0.78
6,2023,MIA,1,1,0.645118,0,0.24
7,2023,ATL,0,1,0.568605,0,0.18
8,2023,TOR,0,1,0.510248,0,0.05
9,2023,CHI,0,0,0.487145,0,0.05
