**import dependencies**

In [30]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import numpy as np
import tensorflow as tf

**import final 8 csv file**

In [2]:
final8_df = pd.read_csv('Final8_results.csv')

final8_df.head()

Unnamed: 0,Team1,Team1Score,Team2,Team2Score
0,Golden State Warriors,123,Utah Jazz,116
1,Miami Heat,113,Sacramento Kings,115
2,Phoenix Suns,133,Charlotte Hornets,99
3,Dallas Mavericks,95,Oklahoma City Thunder,86
4,Memphis Grizzlies,118,Brooklyn Nets,104


**import team player csv**

In [3]:
player_df = pd.read_csv('NBA_team_player_data.csv', skiprows = 1)

player_df.head()

Unnamed: 0,RANK,FULL NAME,TEAM,POS,AGE,GP,MPG,MIN\n,USG,TO,...,RPG,TRB,APG,AST%,SPG,BPG,TOPGT.,VI,ORTG,DRTG
0,,Jayson Tatum,Bos,F-G,24.2,11,41.2,85.9,31.1,14.0,...,5.6,7.7,6.1,25.8,1.27,1.0,4.0,9.6,109.7,102.4
1,,Jaylen Brown,Bos,G-F,25.56,11,37.7,78.6,25.7,13.0,...,6.5,9.6,3.8,17.2,1.55,0.55,2.82,8.7,111.0,102.7
2,,Marcus Smart,Bos,G,28.2,10,36.6,76.2,19.8,14.8,...,3.7,5.7,6.2,25.8,1.1,0.4,2.4,7.7,111.5,102.9
3,,Al Horford,Bos,C-F,35.95,11,36.4,75.9,13.3,7.5,...,9.4,14.5,3.4,13.9,1.09,1.36,0.82,8.2,134.5,97.7
4,,Grant Williams,Bos,F,23.46,11,30.9,64.3,14.7,8.1,...,4.4,8.0,0.8,3.9,0.27,1.09,0.82,4.4,117.7,100.6


**clean data**

In [4]:
final8_df.isnull().sum()

Team1         0
Team1Score    0
Team2         0
Team2Score    0
dtype: int64

In [5]:
player_df.isnull().sum()

RANK         83
FULL NAME     0
TEAM          0
POS           0
AGE           0
GP            0
MPG           0
MIN\n         0
USG           0
TO            0
FTA           0
FT%           0
2PA           0
2P%           0
3PA           0
3P%           0
eFG           0
TS            0
PPG           0
RPG           0
TRB           0
APG           0
AST%          0
SPG           0
BPG           0
TOPGT.        0
VI            0
ORTG          1
DRTG          1
dtype: int64

In [6]:
player_df = player_df.drop(columns = ['RANK'])

player_df.head()

Unnamed: 0,FULL NAME,TEAM,POS,AGE,GP,MPG,MIN\n,USG,TO,FTA,...,RPG,TRB,APG,AST%,SPG,BPG,TOPGT.,VI,ORTG,DRTG
0,Jayson Tatum,Bos,F-G,24.2,11,41.2,85.9,31.1,14.0,81,...,5.6,7.7,6.1,25.8,1.27,1.0,4.0,9.6,109.7,102.4
1,Jaylen Brown,Bos,G-F,25.56,11,37.7,78.6,25.7,13.0,56,...,6.5,9.6,3.8,17.2,1.55,0.55,2.82,8.7,111.0,102.7
2,Marcus Smart,Bos,G,28.2,10,36.6,76.2,19.8,14.8,29,...,3.7,5.7,6.2,25.8,1.1,0.4,2.4,7.7,111.5,102.9
3,Al Horford,Bos,C-F,35.95,11,36.4,75.9,13.3,7.5,12,...,9.4,14.5,3.4,13.9,1.09,1.36,0.82,8.2,134.5,97.7
4,Grant Williams,Bos,F,23.46,11,30.9,64.3,14.7,8.1,23,...,4.4,8.0,0.8,3.9,0.27,1.09,0.82,4.4,117.7,100.6


In [7]:
player_df['TEAM'].values

array(['Bos', 'Bos', 'Bos', 'Bos', 'Bos', 'Bos', 'Bos', 'Bos', 'Bos',
       'Bos', 'Dal', 'Dal', 'Dal', 'Dal', 'Dal', 'Dal', 'Dal', 'Dal',
       'Dal', 'Dal', 'Gol', 'Gol', 'Gol', 'Gol', 'Gol', 'Gol', 'Gol',
       'Gol', 'Gol', 'Gol', 'Gol', 'Mia', 'Mia', 'Mia', 'Mia', 'Mia',
       'Mia', 'Mia', 'Mia', 'Mia', 'Mia', 'Mia', 'Phi', 'Phi', 'Phi',
       'Phi', 'Phi', 'Phi', 'Phi', 'Phi', 'Phi', 'Phi', 'Mem', 'Mem',
       'Mem', 'Mem', 'Mem', 'Mem', 'Mem', 'Mem', 'Mem', 'Mem', 'Mem',
       'Pho', 'Pho', 'Pho', 'Pho', 'Pho', 'Pho', 'Pho', 'Pho', 'Pho',
       'Pho', 'Mil', 'Mil', 'Mil', 'Mil', 'Mil', 'Mil', 'Mil', 'Mil',
       'Mil', 'Mil'], dtype=object)

In [8]:
final8_df['Team1'].values

array(['Golden State Warriors', 'Miami Heat', 'Phoenix Suns',
       'Dallas Mavericks', 'Memphis Grizzlies', 'Miami Heat',
       'Memphis Grizzlies', 'Phoenix Suns', 'Philadelphia 76ers',
       'Golden State Warriors', 'Miami Heat', 'Boston Celtics',
       'Golden State Warriors', 'Milwaukee Bucks', 'Dallas Mavericks',
       'Memphis Grizzlies', 'Milwaukee Bucks', 'Miami Heat',
       'Memphis Grizzlies', 'Milwaukee Bucks', 'Philadelphia 76ers',
       'Phoenix Suns', 'Golden State Warriors', 'Boston Celtics',
       'Miami Heat', 'Dallas Mavericks', 'Golden State Warriors',
       'Phoenix Suns', 'Boston Celtics', 'Golden State Warriors',
       'Dallas Mavericks', 'Philadelphia 76ers', 'Phoenix Suns',
       'Golden State Warriors', 'Philadelphia 76ers', 'Milwaukee Bucks',
       'Phoenix Suns', 'Memphis Grizzlies', 'Phoenix Suns', 'Miami Heat',
       'Memphis Grizzlies', 'Boston Celtics', 'Philadelphia 76ers',
       'Memphis Grizzlies', 'Dallas Mavericks', 'Milwaukee Bucks',


In [9]:
player_df = player_df.replace({ 
    'TEAM': 
    {
    'Bos':'Boston Celtics',
    'Dal': 'Dallas Mavericks',
    'Gol':'Golden State Warriors',
    'Mia':'Miami Heat',
    'Phi':'Philadelphia 76ers',
    'Mem':'Memphis Grizzlies',
    'Pho':'Phoenix Suns',
    'Mil':'Milwaukee Bucks'
        
    }})

player_df.head()

Unnamed: 0,FULL NAME,TEAM,POS,AGE,GP,MPG,MIN\n,USG,TO,FTA,...,RPG,TRB,APG,AST%,SPG,BPG,TOPGT.,VI,ORTG,DRTG
0,Jayson Tatum,Boston Celtics,F-G,24.2,11,41.2,85.9,31.1,14.0,81,...,5.6,7.7,6.1,25.8,1.27,1.0,4.0,9.6,109.7,102.4
1,Jaylen Brown,Boston Celtics,G-F,25.56,11,37.7,78.6,25.7,13.0,56,...,6.5,9.6,3.8,17.2,1.55,0.55,2.82,8.7,111.0,102.7
2,Marcus Smart,Boston Celtics,G,28.2,10,36.6,76.2,19.8,14.8,29,...,3.7,5.7,6.2,25.8,1.1,0.4,2.4,7.7,111.5,102.9
3,Al Horford,Boston Celtics,C-F,35.95,11,36.4,75.9,13.3,7.5,12,...,9.4,14.5,3.4,13.9,1.09,1.36,0.82,8.2,134.5,97.7
4,Grant Williams,Boston Celtics,F,23.46,11,30.9,64.3,14.7,8.1,23,...,4.4,8.0,0.8,3.9,0.27,1.09,0.82,4.4,117.7,100.6


In [10]:
player_df['TEAM'].values

array(['Boston Celtics', 'Boston Celtics', 'Boston Celtics',
       'Boston Celtics', 'Boston Celtics', 'Boston Celtics',
       'Boston Celtics', 'Boston Celtics', 'Boston Celtics',
       'Boston Celtics', 'Dallas Mavericks', 'Dallas Mavericks',
       'Dallas Mavericks', 'Dallas Mavericks', 'Dallas Mavericks',
       'Dallas Mavericks', 'Dallas Mavericks', 'Dallas Mavericks',
       'Dallas Mavericks', 'Dallas Mavericks', 'Golden State Warriors',
       'Golden State Warriors', 'Golden State Warriors',
       'Golden State Warriors', 'Golden State Warriors',
       'Golden State Warriors', 'Golden State Warriors',
       'Golden State Warriors', 'Golden State Warriors',
       'Golden State Warriors', 'Golden State Warriors', 'Miami Heat',
       'Miami Heat', 'Miami Heat', 'Miami Heat', 'Miami Heat',
       'Miami Heat', 'Miami Heat', 'Miami Heat', 'Miami Heat',
       'Miami Heat', 'Miami Heat', 'Philadelphia 76ers',
       'Philadelphia 76ers', 'Philadelphia 76ers', 'Philadelphia

**adding win/loss column**

In [12]:
condlist = [
    final8_df['Team1Score'] > final8_df['Team2Score'],
    final8_df['Team1Score'] < final8_df['Team2Score']
]

win_loss = [1,0]

final8_df['Team1Win'] = np.select(condlist, win_loss, 1)

final8_df.head()

Unnamed: 0,Team1,Team1Score,Team2,Team2Score,Team1Win
0,Golden State Warriors,123,Utah Jazz,116,1
1,Miami Heat,113,Sacramento Kings,115,0
2,Phoenix Suns,133,Charlotte Hornets,99,1
3,Dallas Mavericks,95,Oklahoma City Thunder,86,1
4,Memphis Grizzlies,118,Brooklyn Nets,104,1


**merge the dataframes**

In [13]:
finals_df = final8_df.merge(player_df, left_on = 'Team1', right_on = 'TEAM')

finals_df.head()

Unnamed: 0,Team1,Team1Score,Team2,Team2Score,Team1Win,FULL NAME,TEAM,POS,AGE,GP,...,RPG,TRB,APG,AST%,SPG,BPG,TOPGT.,VI,ORTG,DRTG
0,Golden State Warriors,123,Utah Jazz,116,1,Klay Thompson,Golden State Warriors,G,32.27,11,...,4.5,7.0,2.0,8.8,1.18,0.91,1.64,6.5,105.5,106.1
1,Golden State Warriors,123,Utah Jazz,116,1,Stephen Curry,Golden State Warriors,G,34.17,11,...,4.2,6.9,5.6,27.9,1.09,0.45,2.64,10.4,117.2,105.4
2,Golden State Warriors,123,Utah Jazz,116,1,Andrew Wiggins,Golden State Warriors,F,27.23,11,...,6.9,12.0,1.2,5.4,0.91,1.09,1.36,6.2,112.3,104.0
3,Golden State Warriors,123,Utah Jazz,116,1,Jordan Poole,Golden State Warriors,G,22.91,11,...,3.4,6.0,4.8,23.7,0.73,0.55,2.82,8.8,116.8,104.5
4,Golden State Warriors,123,Utah Jazz,116,1,Draymond Green,Golden State Warriors,F,32.2,11,...,7.2,13.0,6.6,28.3,1.0,1.18,2.91,9.5,108.8,95.9


In [15]:
finals_df.sample(10)

Unnamed: 0,Team1,Team1Score,Team2,Team2Score,Team1Win,FULL NAME,TEAM,POS,AGE,GP,...,RPG,TRB,APG,AST%,SPG,BPG,TOPGT.,VI,ORTG,DRTG
731,Miami Heat,123,Phoenix Suns,100,1,Tyler Herro,Miami Heat,G,22.32,11,...,4.2,9.2,2.9,16.9,0.73,0.36,1.91,8.0,100.2,106.2
1207,Phoenix Suns,101,Dallas Mavericks,111,0,Chris Paul,Phoenix Suns,G,37.03,13,...,4.2,7.1,8.3,36.4,1.46,0.15,2.38,9.9,129.5,103.7
2526,Boston Celtics,132,Oklahoma City Thunder,123,1,Derrick White,Boston Celtics,G,27.87,11,...,4.0,9.5,2.4,14.1,0.82,0.27,0.73,6.8,106.7,101.5
1180,Phoenix Suns,111,Miami Heat,90,1,Cameron Johnson,Phoenix Suns,F,26.2,13,...,3.5,8.4,1.5,8.7,0.38,0.08,0.92,6.2,120.1,104.2
2834,Milwaukee Bucks,107,Phoenix Suns,131,0,Brook Lopez,Milwaukee Bucks,C,34.12,12,...,5.9,11.0,0.7,3.7,0.5,1.5,0.92,5.1,117.3,104.4
538,Miami Heat,112,San Antonio Spurs,95,1,Dewayne Dedmon,Miami Heat,C,32.76,10,...,3.3,20.2,0.4,6.1,0.1,0.2,0.5,6.9,106.7,82.4
690,Miami Heat,110,Atlanta Hawks,86,1,Caleb Martin,Miami Heat,F,26.63,11,...,1.5,8.4,0.3,3.7,0.82,0.18,0.55,4.1,73.7,100.5
241,Golden State Warriors,115,Washington Wizards,123,0,Damion Lee,Golden State Warriors,G-F,29.57,8,...,1.9,10.1,0.6,8.0,0.13,0.0,0.5,5.8,100.1,106.5
243,Golden State Warriors,95,Memphis Grizzlies,123,0,Stephen Curry,Golden State Warriors,G,34.17,11,...,4.2,6.9,5.6,27.9,1.09,0.45,2.64,10.4,117.2,105.4
2239,Philadelphia 76ers,132,Toronto Raptors,97,1,Paul Reed,Philadelphia 76ers,F,22.92,12,...,3.9,20.6,0.8,11.6,0.75,0.5,0.75,7.8,118.7,88.3


**clean new dataframe**

In [19]:
finals_cleaned_df = finals_df.drop(['Team1', 'Team1Score', 'Team2', 'Team2Score', 'FULL NAME', 'AGE', 'POS'], axis = 1)

finals_cleaned_df.sample(10)

Unnamed: 0,Team1Win,TEAM,GP,MPG,MIN\n,USG,TO,FTA,FT%,2PA,...,RPG,TRB,APG,AST%,SPG,BPG,TOPGT.,VI,ORTG,DRTG
1947,0,Memphis Grizzlies,11,30.5,63.5,25.1,10.7,25,0.64,100,...,2.7,4.7,2.7,13.8,1.0,0.27,2.0,6.2,91.3,104.2
2909,0,Milwaukee Bucks,11,11.4,23.8,8.9,23.2,2,1.0,12,...,1.5,6.5,0.9,10.9,0.73,0.0,0.55,5.0,107.0,102.5
874,1,Phoenix Suns,9,9.5,19.9,13.7,23.4,6,0.5,17,...,2.1,12.9,0.6,8.0,0.11,0.22,0.67,6.4,113.6,97.9
3085,0,Milwaukee Bucks,12,26.5,55.2,13.2,7.2,5,1.0,24,...,4.3,8.2,0.9,5.2,0.42,0.33,0.58,5.0,120.4,109.9
2020,1,Philadelphia 76ers,9,6.8,14.2,19.8,7.7,2,1.0,13,...,1.3,11.9,0.4,11.3,0.11,0.11,0.22,6.9,113.1,101.8
2289,0,Philadelphia 76ers,12,11.6,24.2,16.2,18.7,7,0.571,33,...,3.9,20.6,0.8,11.6,0.75,0.5,0.75,7.8,118.7,88.3
2384,1,Boston Celtics,11,36.4,75.9,13.3,7.5,12,0.833,54,...,9.4,14.5,3.4,13.9,1.09,1.36,0.82,8.2,134.5,97.7
890,1,Phoenix Suns,13,24.6,51.3,18.0,9.6,32,0.813,40,...,3.5,8.4,1.5,8.7,0.38,0.08,0.92,6.2,120.1,104.2
1038,1,Phoenix Suns,13,30.5,63.6,22.9,9.6,33,0.636,160,...,8.9,17.1,1.7,9.3,0.38,0.77,1.46,8.5,124.1,102.7
575,1,Miami Heat,11,29.4,61.3,10.8,7.7,4,1.0,39,...,5.6,11.6,2.0,9.8,0.55,0.27,0.55,6.3,141.1,102.0


In [21]:
finals_cleaned_df = finals_cleaned_df.rename(columns = {'Team1Win':'TeamWin'})

finals_cleaned_df.sample(5)

Unnamed: 0,TeamWin,TEAM,GP,MPG,MIN\n,USG,TO,FTA,FT%,2PA,...,RPG,TRB,APG,AST%,SPG,BPG,TOPGT.,VI,ORTG,DRTG
752,0,Miami Heat,5,28.9,60.1,14.6,16.9,5,0.8,13,...,3.6,7.6,5.2,24.3,1.0,0.6,1.6,6.8,95.0,104.0
2230,0,Philadelphia 76ers,9,6.8,14.2,19.8,7.7,2,1.0,13,...,1.3,11.9,0.4,11.3,0.11,0.11,0.22,6.9,113.1,101.8
2265,0,Philadelphia 76ers,12,26.6,55.5,15.1,12.7,1,0.0,13,...,3.1,7.1,0.8,4.9,0.92,0.33,1.08,4.2,103.9,108.1
32,0,Golden State Warriors,8,10.2,21.3,13.8,15.2,3,0.667,13,...,1.9,10.1,0.6,8.0,0.13,0.0,0.5,5.8,100.1,106.5
2470,1,Boston Celtics,3,2.0,4.1,22.8,0.0,0,0.0,2,...,0.7,19.1,0.0,0.0,0.0,0.0,0.0,0.0,,


In [34]:
finals_cleaned_df.dtypes

TeamWin      int64
TEAM        object
GP           int64
MPG        float64
MIN\n      float64
USG        float64
TO         float64
FTA          int64
FT%        float64
2PA          int64
2P%        float64
3PA          int64
3P%        float64
eFG        float64
TS         float64
PPG        float64
RPG        float64
TRB        float64
APG        float64
AST%       float64
SPG        float64
BPG        float64
TOPGT.     float64
VI         float64
ORTG       float64
DRTG       float64
dtype: object

**separate features from the target**

In [35]:
y = finals_cleaned_df.TeamWin.values
X = finals_cleaned_df.drop(columns = ['TeamWin', 'TEAM']).values

**split data into training and testing**

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)

**stadardize the features**

In [37]:
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

**compile, train, and evaluate model**

In [41]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 60
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units = hidden_nodes_layer1, activation = 'relu' , input_dim = number_input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units = hidden_nodes_layer2, activation = 'relu'))

# Output layer
nn.add(tf.keras.layers.Dense(units = 1, activation = 'sigmoid'))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 60)                1500      
                                                                 
 dense_1 (Dense)             (None, 30)                1830      
                                                                 
 dense_2 (Dense)             (None, 1)                 31        
                                                                 
Total params: 3,361
Trainable params: 3,361
Non-trainable params: 0
_________________________________________________________________


2022-05-23 01:37:10.409956: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [42]:
# Compile the model
nn.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy']

SyntaxError: unexpected EOF while parsing (445428624.py, line 2)

**logistic regression model**

In [None]:
# instantiate logistic regression model
# model = LogisticRegression(random_state = 1)

# fit and train the model
# model.fit(X_train, y_train)

# make predictions
# pred = model.predict(X_test_scaled)

In [None]:
# accuracy score
# print(accuracy_score(y_test, pred))

In [None]:
# classification report
# print(classification_report(y_test, pred))