In [492]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
from tensorflow.keras.models import Sequential
import tensorflow as tf
from google.colab import files
import numpy as np

In [493]:
#  Import nba data and clear out some columns
nba_gamestats_df = pd.read_csv("Basketball_Data/Curated_Game_Stats_V2_2019-2022.csv")
nba_gamestats_df.drop(['Rk'], axis=1, inplace=True)
print(nba_gamestats_df.head())

nba_advancedgame_df = pd.read_csv("Basketball_Data/Advanced_Game_Stats_V2.csv")
nba_advancedgame_df.drop(['Rk', 'Unnamed: 4', 'Result'], axis=1, inplace=True)
nba_advancedgame_df.head()

  Team        Date  PTS Unnamed: 4  Opp          Result   MP  FG  FGA    FG%  \
0  HOU  2019-10-30  159          @  WAS       W 159-158  240  55  103  0.534   
1  HOU  2019-11-30  158        NaN  ATL       W 158-111  240  52   89  0.584   
2  WAS  2019-10-30  158        NaN  HOU       L 158-159  240  57   91  0.626   
3  HOU  2020-07-31  153          @  DAL  W 153-149 (OT)  265  52  101  0.515   
4  ATL  2020-01-26  152        NaN  WAS       W 152-133  240  54   93  0.581   

   ...  2P.1  2PA.1  2P%.1  3P.1  3PA.1  3P%.1  FT.1  FTA.1  FT%.1  PTS.2  
0  ...    37     55  0.673    20     36  0.556    24     29  0.828    158  
1  ...    23     43  0.535    15     46  0.326    20     25  0.800    111  
2  ...    32     49  0.653    23     54  0.426    26     33  0.788    159  
3  ...    29     56  0.518    21     49  0.429    28     38  0.737    149  
4  ...    31     55  0.564    11     33  0.333    38     42  0.905    133  

[5 rows x 33 columns]


Unnamed: 0,Team,Date,eFG%,Opp,ORtg,FTr,3PAr,TS%,eFG%.1,FT/FGA,ORtg.1,FTr.1,3PAr.1,TS%.1,eFG%.2,FT/FGA.1
0,DAL,2021-12-01,0.795,NOP,152.3,0.108,0.41,0.799,0.795,0.084,117.2,0.18,0.416,0.557,0.522,0.157
1,DEN,2022-01-13,0.753,POR,140.7,0.241,0.506,0.763,0.753,0.181,108.5,0.326,0.465,0.549,0.529,0.198
2,BRK,2021-10-31,0.743,DET,123.0,0.222,0.347,0.74,0.743,0.139,95.6,0.278,0.418,0.513,0.481,0.19
3,BOS,2022-04-03,0.742,WAS,152.7,0.143,0.484,0.744,0.742,0.099,108.2,0.244,0.256,0.562,0.512,0.22
4,CHO,2022-01-05,0.741,DET,136.5,0.358,0.519,0.747,0.741,0.247,108.2,0.217,0.457,0.551,0.543,0.12


In [494]:
complete_stats_df = pd.merge(nba_gamestats_df, nba_advancedgame_df, on=["Team", "Date", "Opp"] , how='inner')
complete_stats_df

Unnamed: 0,Team,Date,PTS,Unnamed: 4,Opp,Result,MP,FG,FGA,FG%,...,3PAr,TS%,eFG%.1,FT/FGA,ORtg.1,FTr.1,3PAr.1,TS%.1,eFG%.2,FT/FGA.1
0,HOU,2019-10-30,159,@,WAS,W 159-158,240,55,103,0.534,...,0.524,0.676,0.646,0.252,144.1,0.319,0.396,0.761,0.736,0.264
1,HOU,2019-11-30,158,,ATL,W 158-111,240,52,89,0.584,...,0.573,0.760,0.725,0.326,108.2,0.281,0.517,0.555,0.511,0.225
2,WAS,2019-10-30,158,,HOU,L 158-159,240,57,91,0.626,...,0.396,0.761,0.736,0.264,145.0,0.320,0.524,0.676,0.646,0.252
3,HOU,2020-07-31,153,@,DAL,W 153-149 (OT),265,52,101,0.515,...,0.475,0.647,0.609,0.297,122.8,0.362,0.467,0.612,0.576,0.267
4,ATL,2020-01-26,152,,WAS,W 152-133,240,54,93,0.581,...,0.376,0.704,0.661,0.312,124.1,0.477,0.375,0.625,0.540,0.432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4573,DAL,2022-03-09,77,,NYK,L 77-107,240,27,86,0.314,...,0.512,0.397,0.349,0.198,119.9,0.393,0.405,0.543,0.482,0.310
4574,CHI,2021-11-22,77,,IND,L 77-109,240,31,85,0.365,...,0.329,0.420,0.400,0.106,115.1,0.200,0.344,0.557,0.517,0.178
4575,SAC,2022-01-25,75,@,BOS,L 75-128,240,29,95,0.305,...,0.347,0.369,0.337,0.116,133.8,0.068,0.437,0.603,0.592,0.058
4576,NYK,2022-01-08,75,@,BOS,L 75-99,240,29,79,0.367,...,0.392,0.438,0.437,0.076,118.5,0.224,0.382,0.593,0.553,0.197


In [495]:
#Check types for nba game stats
complete_stats_df.dtypes

Team           object
Date           object
PTS             int64
Unnamed: 4     object
Opp            object
Result         object
MP              int64
FG              int64
FGA             int64
FG%           float64
2P              int64
2PA             int64
2P%           float64
3P              int64
3PA             int64
3P%           float64
FT              int64
FTA             int64
FT%           float64
PTS.1           int64
FG.1            int64
FGA.1           int64
FG%.1         float64
2P.1            int64
2PA.1           int64
2P%.1         float64
3P.1            int64
3PA.1           int64
3P%.1         float64
FT.1            int64
FTA.1           int64
FT%.1         float64
PTS.2           int64
eFG%          float64
ORtg          float64
FTr           float64
3PAr          float64
TS%           float64
eFG%.1        float64
FT/FGA        float64
ORtg.1        float64
FTr.1         float64
3PAr.1        float64
TS%.1         float64
eFG%.2        float64
FT/FGA.1  

In [496]:
#Drop columns not needed, rename column
complete_stats_df = complete_stats_df.drop(["Result", "Opp", "eFG%.1", "Date"], axis=1)

complete_stats_df.rename({
  "Unnamed: 4" : "Home/Away"
}, axis=1, inplace=True)

complete_stats_df['Home/Away'] = complete_stats_df['Home/Away'].replace(['@', ''], ['Away', 'Home'])
complete_stats_df['Home/Away'].fillna(value="Home", inplace=True)

# Loop through the column names and replace the .1's with Home/Visitor Score
new_column_names = {}
for col in complete_stats_df.columns:
    if col.endswith('.1'):
        new_column_names[col] = col.replace('.1', '_opponent')
    elif col.endswith('.2'):
        new_column_names[col] = col.replace('.2', '_opponent')

# Rename the columns
complete_stats_df.rename(columns=new_column_names, inplace=True)


complete_stats_df.head()

Unnamed: 0,Team,PTS,Home/Away,MP,FG,FGA,FG%,2P,2PA,2P%,...,FTr,3PAr,TS%,FT/FGA,ORtg_opponent,FTr_opponent,3PAr_opponent,TS%_opponent,eFG%_opponent,FT/FGA_opponent
0,HOU,159,Away,240,55,103,0.534,32,49,0.653,...,0.32,0.524,0.676,0.252,144.1,0.319,0.396,0.761,0.736,0.264
1,HOU,158,Home,240,52,89,0.584,27,38,0.711,...,0.382,0.573,0.76,0.326,108.2,0.281,0.517,0.555,0.511,0.225
2,WAS,158,Home,240,57,91,0.626,37,55,0.673,...,0.319,0.396,0.761,0.264,145.0,0.32,0.524,0.676,0.646,0.252
3,HOU,153,Away,265,52,101,0.515,33,53,0.623,...,0.386,0.475,0.647,0.297,122.8,0.362,0.467,0.612,0.576,0.267
4,ATL,152,Home,240,54,93,0.581,39,58,0.672,...,0.366,0.376,0.704,0.312,124.1,0.477,0.375,0.625,0.54,0.432


In [497]:
complete_stats_df = complete_stats_df.drop(["2P", "2P%", "2P%_opponent", "2PA_opponent", "2P_opponent", "3P%", "3P%_opponent", "3PA_opponent", "3P_opponent", "FG%", "FG%_opponent", "FGA_opponent", "FG_opponent", "FT", "FT%", "FT%_opponent", "FTA", "FTA_opponent", "FT_opponent", "MP", "PTS_opponent" ], axis=1)
complete_stats_df


Unnamed: 0,Team,PTS,Home/Away,FG,FGA,2PA,3P,3PA,eFG%,ORtg,FTr,3PAr,TS%,FT/FGA,ORtg_opponent,FTr_opponent,3PAr_opponent,TS%_opponent,eFG%_opponent,FT/FGA_opponent
0,HOU,159,Away,55,103,49,23,54,0.646,145.0,0.320,0.524,0.676,0.252,144.1,0.319,0.396,0.761,0.736,0.264
1,HOU,158,Home,52,89,38,25,51,0.725,154.0,0.382,0.573,0.760,0.326,108.2,0.281,0.517,0.555,0.511,0.225
2,WAS,158,Home,57,91,55,20,36,0.736,144.1,0.319,0.396,0.761,0.264,145.0,0.320,0.524,0.676,0.646,0.252
3,HOU,153,Away,52,101,53,19,48,0.609,126.1,0.386,0.475,0.647,0.297,122.8,0.362,0.467,0.612,0.576,0.267
4,ATL,152,Home,54,93,58,15,35,0.661,141.9,0.366,0.376,0.704,0.312,124.1,0.477,0.375,0.625,0.540,0.432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4573,DAL,77,Home,27,86,42,6,44,0.349,86.3,0.291,0.512,0.397,0.198,119.9,0.393,0.405,0.543,0.482,0.310
4574,CHI,77,Home,31,85,57,6,28,0.400,81.3,0.176,0.329,0.420,0.106,115.1,0.200,0.344,0.557,0.517,0.178
4575,SAC,75,Away,29,95,62,6,33,0.337,78.4,0.158,0.347,0.369,0.116,133.8,0.068,0.437,0.603,0.592,0.058
4576,NYK,75,Away,29,79,48,11,31,0.437,89.7,0.190,0.392,0.438,0.076,118.5,0.224,0.382,0.593,0.553,0.197


In [498]:
#Transform Dataframe
complete_stats_df=complete_stats_df.astype({
  'PTS' : 'float',
  'Team': 'string',
  'Home/Away': 'string',
  'FG': 'float',
  'FGA': 'float',
  '2PA': 'float',
  '3P': 'float',
  '3PA': 'float',
  'Team': 'string',
  'ORtg_opponent': 'float',
  'FTr_opponent': 'float',
  '3PAr_opponent': 'float',
  'TS%_opponent': 'float',
  'eFG%_opponent': 'float',
  'FT/FGA_opponent': 'float',
  'ORtg': 'float',
  'FTr': 'float',
  '3PAr': 'float',
  'TS%': 'float',
  'FT/FGA': 'float',
})

complete_stats_df.dtypes

Team                string
PTS                float64
Home/Away           string
FG                 float64
FGA                float64
2PA                float64
3P                 float64
3PA                float64
eFG%               float64
ORtg               float64
FTr                float64
3PAr               float64
TS%                float64
FT/FGA             float64
ORtg_opponent      float64
FTr_opponent       float64
3PAr_opponent      float64
TS%_opponent       float64
eFG%_opponent      float64
FT/FGA_opponent    float64
dtype: object

In [499]:
# Groupby Teams and average stats to per/game data and round to two decimal places
season_team_averages = complete_stats_df.groupby('Team').mean().reset_index().round(2)

season_team_averages.head()

  season_team_averages = complete_stats_df.groupby('Team').mean().reset_index().round(2)


Unnamed: 0,Team,PTS,FG,FGA,2PA,3P,3PA,eFG%,ORtg,FTr,3PAr,TS%,FT/FGA,ORtg_opponent,FTr_opponent,3PAr_opponent,TS%_opponent,eFG%_opponent,FT/FGA_opponent
0,ATL,112.96,41.1,89.32,54.17,12.49,35.15,0.53,113.05,0.26,0.39,0.57,0.21,115.6,0.26,0.38,0.58,0.54,0.2
1,BOS,112.64,40.99,88.41,52.49,12.92,35.92,0.54,114.79,0.25,0.41,0.58,0.2,107.75,0.26,0.4,0.54,0.51,0.2
2,BRK,112.38,41.23,89.28,54.55,12.21,34.73,0.53,112.35,0.26,0.39,0.57,0.2,112.21,0.25,0.39,0.55,0.52,0.2
3,CHI,109.5,40.78,87.68,56.07,11.33,31.61,0.53,111.21,0.24,0.36,0.57,0.19,112.86,0.28,0.37,0.58,0.54,0.22
4,CHO,109.82,40.36,89.0,52.53,13.12,36.47,0.53,111.56,0.24,0.41,0.56,0.18,114.34,0.23,0.42,0.58,0.55,0.18


In [500]:
# Split our preprocessed data into our features and target arrays
y = complete_stats_df['PTS']
X = complete_stats_df.drop(['PTS', 'Team' ], axis=1)

In [501]:
#One Hot Encoding
X = pd.get_dummies(X, columns=["Home/Away"])

In [502]:
X.head()

Unnamed: 0,FG,FGA,2PA,3P,3PA,eFG%,ORtg,FTr,3PAr,TS%,FT/FGA,ORtg_opponent,FTr_opponent,3PAr_opponent,TS%_opponent,eFG%_opponent,FT/FGA_opponent,Home/Away_Away,Home/Away_Home
0,55.0,103.0,49.0,23.0,54.0,0.646,145.0,0.32,0.524,0.676,0.252,144.1,0.319,0.396,0.761,0.736,0.264,1,0
1,52.0,89.0,38.0,25.0,51.0,0.725,154.0,0.382,0.573,0.76,0.326,108.2,0.281,0.517,0.555,0.511,0.225,0,1
2,57.0,91.0,55.0,20.0,36.0,0.736,144.1,0.319,0.396,0.761,0.264,145.0,0.32,0.524,0.676,0.646,0.252,0,1
3,52.0,101.0,53.0,19.0,48.0,0.609,126.1,0.386,0.475,0.647,0.297,122.8,0.362,0.467,0.612,0.576,0.267,1,0
4,54.0,93.0,58.0,15.0,35.0,0.661,141.9,0.366,0.376,0.704,0.312,124.1,0.477,0.375,0.625,0.54,0.432,0,1


In [503]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=42)

In [504]:
# Create a StandardScaler instances
scaler = StandardScaler()


# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [505]:
#Instantiate Neural Network Model
nn_model = tf.keras.models.Sequential()

# unit = 2 * X_train_scaled.shape[1]

#Dense Layers
nn_model.add(tf.keras.layers.Dense(units=2, activation="relu", input_dim=19))

nn_model.add(tf.keras.layers.Dense(units=40, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=20, activation="relu"))


#Output Layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="linear"))


In [506]:
#Summary of Neural Network
nn_model.summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_56 (Dense)            (None, 2)                 40        
                                                                 
 dense_57 (Dense)            (None, 40)                120       
                                                                 
 dense_58 (Dense)            (None, 20)                820       
                                                                 
 dense_59 (Dense)            (None, 1)                 21        
                                                                 
Total params: 1001 (3.91 KB)
Trainable params: 1001 (3.91 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [507]:
#Compile the model
nn_model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse", "mae"])

#Train model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [508]:
model_loss, model_mse, model_mae = nn_model.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss (MSE): {model_loss}, MSE: {model_mse}, MAE: {model_mae}")

36/36 - 0s - loss: 0.4356 - mse: 0.4356 - mae: 0.5006 - 183ms/epoch - 5ms/step
Loss (MSE): 0.43560805916786194, MSE: 0.43560805916786194, MAE: 0.5005599856376648


In [509]:
model_filename = "NBA_Model_V2.h5"
nn_model.save(model_filename)
files.download('NBA_Model_V2.h5')

  saving_api.save_model(


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [510]:
Advanced_Game_Stats_2023 = pd.read_csv("Basketball_Data/Advanced_Game_Stats_2001-2003.csv")

print(Advanced_Game_Stats_2023)

Curated_Game_Stats_2023 = pd.read_csv("Basketball_Data/curated_game_stats.csv")

Curated_Game_Stats_2023.head()

        Rk Team        Date   eFG% Unnamed: 4  Opp     Result   ORtg    FTr  \
0        1  DAL  2021-12-01  0.795          @  NOP  W 139-107  152.3  0.108   
1        2  MIN  2022-12-18  0.787        NaN  CHI  W 150-126  147.9  0.184   
2        3  MIN  2022-10-26  0.775        NaN  SAS  W 134-122  131.1  0.200   
3        4  BRK  2022-12-21  0.768        NaN  GSW  W 143-113  139.8  0.190   
4        5  LAC  2023-03-29  0.756          @  MEM  W 141-132  137.1  0.397   
...    ...  ...         ...    ...        ...  ...        ...    ...    ...   
4915  4916  DAL  2021-10-29  0.346          @  DEN   L 75-106   75.7  0.346   
4916  4917  TOR  2021-10-20  0.345        NaN  WAS    L 83-98   80.0  0.227   
4917  4918  PHI  2022-02-15  0.338        NaN  BOS   L 87-135   94.8  0.488   
4918  4919  SAC  2022-01-25  0.337          @  BOS   L 75-128   78.4  0.158   
4919  4920  POR  2022-03-07  0.319          @  MIN   L 81-124   75.4  0.458   

       3PAr    TS%  eFG%.1  FT/FGA  ORtg.1  FTr.1  

Unnamed: 0.1,Unnamed: 0,Team,Date,PTS,Opp,FG,FGA,2PA,3P,3PA,Opponent_Points
0,0,SAC,2023-02-24,176,LAC,65,111,70,18,41,175
1,1,LAC,2023-02-24,175,SAC,59,98,53,26,45,176
2,2,CHO,2022-01-26,158,IND,54,93,48,24,45,126
3,3,GSW,2023-04-09,157,POR,58,96,47,27,49,101
4,4,SAS,2022-02-25,157,WAS,57,105,76,9,29,153


In [511]:
complete_stats_merge = pd.merge(Curated_Game_Stats_2023, Advanced_Game_Stats_2023, on=["Team", "Date", "Opp"] , how='inner')
complete_stats_merge.head()

Unnamed: 0.1,Unnamed: 0,Team,Date,PTS,Opp,FG,FGA,2PA,3P,3PA,...,3PAr,TS%,eFG%.1,FT/FGA,ORtg.1,FTr.1,3PAr.1,TS%.1,eFG%.2,FT/FGA.1
0,0,SAC,2023-02-24,176,LAC,65,111,70,18,41,...,0.369,0.696,0.667,0.252,136.1,0.367,0.459,0.769,0.735,0.316
1,1,LAC,2023-02-24,175,SAC,59,98,53,26,45,...,0.459,0.769,0.735,0.316,136.9,0.315,0.369,0.696,0.667,0.252
2,2,CHO,2022-01-26,158,IND,54,93,48,24,45,...,0.484,0.735,0.71,0.28,117.8,0.37,0.37,0.589,0.533,0.304
3,3,GSW,2023-04-09,157,POR,58,96,47,27,49,...,0.51,0.762,0.745,0.146,92.6,0.253,0.368,0.478,0.447,0.168
4,4,SAS,2022-02-25,157,WAS,57,105,76,9,29,...,0.276,0.633,0.586,0.324,127.3,0.222,0.352,0.645,0.62,0.176


In [512]:
# Convert the "Date" column to datetime
complete_stats_merge['Date'] = pd.to_datetime(complete_stats_merge['Date'])

# Filter rows with Date >= '2022-10-01'
complete_stats_merge = complete_stats_merge[complete_stats_merge['Date'] >= '2022-10-01']

search_date = '2022-03-05'

if search_date in complete_stats_merge['Date'].astype(str).values:
    print(f"The date {search_date} still exists in the DataFrame.")
else:
    print(f"The date {search_date} has been successfully removed from the DataFrame.")

The date 2022-03-05 has been successfully removed from the DataFrame.


In [513]:
#Check types for nba game stats
complete_stats_merge.dtypes

Unnamed: 0                  int64
Team                       object
Date               datetime64[ns]
PTS                        object
Opp                        object
FG                         object
FGA                        object
2PA                        object
3P                         object
3PA                        object
Opponent_Points            object
Rk                          int64
eFG%                      float64
Unnamed: 4                 object
Result                     object
ORtg                      float64
FTr                       float64
3PAr                      float64
TS%                       float64
eFG%.1                    float64
FT/FGA                    float64
ORtg.1                    float64
FTr.1                     float64
3PAr.1                    float64
TS%.1                     float64
eFG%.2                    float64
FT/FGA.1                  float64
dtype: object

In [514]:
#Drop columns not needed, rename column
complete_stats_merge = complete_stats_merge.drop(["Result", "Unnamed: 0", "Opp", "eFG%.1", "Date", "Opponent_Points", "Rk" ], axis=1)

# Rename column to "Home/Away"
complete_stats_merge.rename({
  "Unnamed: 4" : "Home/Away"
}, axis=1, inplace=True)

# Fill with NA Vals
complete_stats_merge['Home/Away'] = np.nan

# Loop through the column names and replace the .1's with Home/Visitor Score
new_column_names = {}
for col in complete_stats_merge.columns:
    if col.endswith('.1'):
        new_column_names[col] = col.replace('.1', '_opponent')
    elif col.endswith('.2'):
        new_column_names[col] = col.replace('.2', '_opponent')
    else:
        new_column_names[col] = col

# Rename the columns
complete_stats_merge.rename(columns=new_column_names, inplace=True)

complete_stats_merge

Unnamed: 0,Team,PTS,FG,FGA,2PA,3P,3PA,eFG%,Home/Away,ORtg,FTr,3PAr,TS%,FT/FGA,ORtg_opponent,FTr_opponent,3PAr_opponent,TS%_opponent,eFG%_opponent,FT/FGA_opponent
0,SAC,176,65,111,70,18,41,0.667,,136.9,0.315,0.369,0.696,0.252,136.1,0.367,0.459,0.769,0.735,0.316
1,LAC,175,59,98,53,26,45,0.735,,136.1,0.367,0.459,0.769,0.316,136.9,0.315,0.369,0.696,0.667,0.252
3,GSW,157,58,96,47,27,49,0.745,,143.9,0.167,0.510,0.762,0.146,92.6,0.253,0.368,0.478,0.447,0.168
5,OKC,153,56,97,51,21,46,0.686,,147.6,0.237,0.474,0.714,0.206,116.7,0.303,0.337,0.600,0.562,0.236
6,SAC,153,56,94,53,20,41,0.702,,140.9,0.266,0.436,0.729,0.223,111.4,0.407,0.346,0.633,0.562,0.370
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4892,IND,82,28,79,40,11,39,0.424,,88.5,0.253,0.494,0.467,0.190,93.9,0.217,0.410,0.478,0.440,0.169
4893,POR,82,30,79,64,7,15,0.424,,86.5,0.304,0.190,0.458,0.190,107.5,0.341,0.366,0.541,0.494,0.256
4900,DET,81,31,85,52,9,33,0.418,,84.1,0.153,0.388,0.446,0.118,107.0,0.232,0.316,0.492,0.453,0.179
4901,CLE,81,29,83,48,8,35,0.398,,82.7,0.241,0.422,0.441,0.181,94.0,0.244,0.337,0.483,0.436,0.198


In [515]:
#Transform Dataframe
complete_stats_merge=complete_stats_merge.astype({
  'PTS' : 'float',
  'Team': 'string',
  'FG': 'float',
  'FGA': 'float',
  '2PA': 'float',
  '3P': 'float',
  '3PA': 'float',
  'Team': 'string',
  'ORtg_opponent': 'float',
  'FTr_opponent': 'float',
  '3PAr_opponent': 'float',
  'TS%_opponent': 'float',
  'eFG%_opponent': 'float',
  'FT/FGA_opponent': 'float',
  'ORtg': 'float',
  'FTr': 'float',
  '3PAr': 'float',
  'TS%': 'float',
  'FT/FGA': 'float',
})

complete_stats_merge.dtypes

Team                string
PTS                float64
FG                 float64
FGA                float64
2PA                float64
3P                 float64
3PA                float64
eFG%               float64
Home/Away          float64
ORtg               float64
FTr                float64
3PAr               float64
TS%                float64
FT/FGA             float64
ORtg_opponent      float64
FTr_opponent       float64
3PAr_opponent      float64
TS%_opponent       float64
eFG%_opponent      float64
FT/FGA_opponent    float64
dtype: object

In [516]:
# Groupby Teams and average stats to per/game data and round to two decimal places
complete_stats_2023 = complete_stats_merge.groupby('Team').mean().reset_index().round(2)

# Turn the grouped data to csv for backend
complete_stats_2023.to_csv('season_total_averages_v2.csv', encoding = 'utf-8-sig')

# files.download('season_team_averages.csv')
complete_stats_2023.head()

Unnamed: 0,Team,PTS,FG,FGA,2PA,3P,3PA,eFG%,Home/Away,ORtg,FTr,3PAr,TS%,FT/FGA,ORtg_opponent,FTr_opponent,3PAr_opponent,TS%_opponent,eFG%_opponent,FT/FGA_opponent
0,ATL,118.43,44.61,92.37,61.82,10.76,30.55,0.54,,117.45,0.25,0.33,0.58,0.2,117.16,0.26,0.37,0.59,0.55,0.21
1,BOS,117.94,42.2,88.76,46.17,16.04,42.59,0.57,,118.98,0.25,0.48,0.6,0.2,112.28,0.24,0.37,0.56,0.53,0.18
2,BRK,113.35,41.45,85.1,51.3,12.78,33.79,0.56,,116.07,0.26,0.4,0.6,0.21,115.07,0.28,0.36,0.57,0.53,0.21
3,CHI,113.12,42.54,86.78,57.91,10.41,28.87,0.55,,114.52,0.25,0.33,0.59,0.2,113.16,0.25,0.43,0.58,0.55,0.2
4,CHO,110.95,41.28,90.4,57.85,10.74,32.55,0.52,,109.86,0.26,0.36,0.55,0.2,116.14,0.27,0.38,0.58,0.55,0.21


In [517]:
complete_stats_2023['Home/Away'] = np.nan

complete_stats_2023.head()

Unnamed: 0,Team,PTS,FG,FGA,2PA,3P,3PA,eFG%,Home/Away,ORtg,FTr,3PAr,TS%,FT/FGA,ORtg_opponent,FTr_opponent,3PAr_opponent,TS%_opponent,eFG%_opponent,FT/FGA_opponent
0,ATL,118.43,44.61,92.37,61.82,10.76,30.55,0.54,,117.45,0.25,0.33,0.58,0.2,117.16,0.26,0.37,0.59,0.55,0.21
1,BOS,117.94,42.2,88.76,46.17,16.04,42.59,0.57,,118.98,0.25,0.48,0.6,0.2,112.28,0.24,0.37,0.56,0.53,0.18
2,BRK,113.35,41.45,85.1,51.3,12.78,33.79,0.56,,116.07,0.26,0.4,0.6,0.21,115.07,0.28,0.36,0.57,0.53,0.21
3,CHI,113.12,42.54,86.78,57.91,10.41,28.87,0.55,,114.52,0.25,0.33,0.59,0.2,113.16,0.25,0.43,0.58,0.55,0.2
4,CHO,110.95,41.28,90.4,57.85,10.74,32.55,0.52,,109.86,0.26,0.36,0.55,0.2,116.14,0.27,0.38,0.58,0.55,0.21


In [518]:
# test_predict_data = complete_stats_2023[complete_stats_2023['Team'] == 'ATL']
test_predict_data = complete_stats_2023[complete_stats_2023['Team'].isin(['ATL', 'CHO'])]

# Set the first row's 'Home/Away' value to 'Home'
test_predict_data.iloc[0, test_predict_data.columns.get_loc('Home/Away')] = 'Home'

# Set the second row's 'Home/Away' value to 'Away'
test_predict_data.iloc[1, test_predict_data.columns.get_loc('Home/Away')] = 'Away'
test_predict_data

Unnamed: 0,Team,PTS,FG,FGA,2PA,3P,3PA,eFG%,Home/Away,ORtg,FTr,3PAr,TS%,FT/FGA,ORtg_opponent,FTr_opponent,3PAr_opponent,TS%_opponent,eFG%_opponent,FT/FGA_opponent
0,ATL,118.43,44.61,92.37,61.82,10.76,30.55,0.54,Home,117.45,0.25,0.33,0.58,0.2,117.16,0.26,0.37,0.59,0.55,0.21
4,CHO,110.95,41.28,90.4,57.85,10.74,32.55,0.52,Away,109.86,0.26,0.36,0.55,0.2,116.14,0.27,0.38,0.58,0.55,0.21


In [519]:
test_predict_data.drop(['Team', 'PTS'], axis=1, inplace=True)
test_predict_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_predict_data.drop(['Team', 'PTS'], axis=1, inplace=True)


Unnamed: 0,FG,FGA,2PA,3P,3PA,eFG%,Home/Away,ORtg,FTr,3PAr,TS%,FT/FGA,ORtg_opponent,FTr_opponent,3PAr_opponent,TS%_opponent,eFG%_opponent,FT/FGA_opponent
0,44.61,92.37,61.82,10.76,30.55,0.54,Home,117.45,0.25,0.33,0.58,0.2,117.16,0.26,0.37,0.59,0.55,0.21
4,41.28,90.4,57.85,10.74,32.55,0.52,Away,109.86,0.26,0.36,0.55,0.2,116.14,0.27,0.38,0.58,0.55,0.21


In [520]:
#One Hot Encoding
test_predict_data = pd.get_dummies(test_predict_data, columns=["Home/Away"])
test_predict_data

Unnamed: 0,FG,FGA,2PA,3P,3PA,eFG%,ORtg,FTr,3PAr,TS%,FT/FGA,ORtg_opponent,FTr_opponent,3PAr_opponent,TS%_opponent,eFG%_opponent,FT/FGA_opponent,Home/Away_Away,Home/Away_Home
0,44.61,92.37,61.82,10.76,30.55,0.54,117.45,0.25,0.33,0.58,0.2,117.16,0.26,0.37,0.59,0.55,0.21,0,1
4,41.28,90.4,57.85,10.74,32.55,0.52,109.86,0.26,0.36,0.55,0.2,116.14,0.27,0.38,0.58,0.55,0.21,1,0


In [521]:
# Scale this new data instance with  StandardScaler
test_predict_data_scaled = X_scaler.transform(test_predict_data)


In [522]:
# Use the model to predict a point total
predictions = nn_model.predict(test_predict_data_scaled)
predicted_scores_rounded = np.round(predictions)
predicted_scores_rounded





array([[118.],
       [111.]], dtype=float32)