In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler

In [2]:
train_east_df = pd.read_csv('../CSV/NBA-teams-data-east.csv')
curr_east_df = pd.read_csv('../CSV/NBA-2022-team-data-east.csv')

train_west_df = pd.read_csv('../CSV/NBA-teams-data-west.csv')
curr_west_df = pd.read_csv('../CSV/NBA-2022-team-data-west.csv')

In [3]:
X_east = train_east_df.drop(['Team','Year','Conf','made_playoff'], axis = 1)
y_east = train_east_df['made_playoff']

X_west = train_west_df.drop(['Team','Year','Conf','made_playoff'], axis = 1)
y_west = train_west_df['made_playoff']

In [4]:
X_train_east, X_test_east, y_train_east, y_test_east = train_test_split(X_east, y_east, test_size = .30,random_state=42)
X_train_west, X_test_west, y_train_west, y_test_west = train_test_split(X_west, y_west,test_size = .30, random_state=42)

In [5]:
LinSVC_east = LinearSVC(max_iter = 10000, random_state = 42, C = 1)
LinSVC_east.fit(X_train_east, y_train_east)
LinSVC_west = LinearSVC(max_iter = 10000, random_state = 42, C = 1)
LinSVC_west.fit(X_train_west,y_train_west)

LinearSVC(C=1, max_iter=10000, random_state=42)

In [6]:
print("LinSVC East")
print(f"Training Data Score: {LinSVC_east.score(X_train_east, y_train_east)}")
print(f"Testing Data Score: {LinSVC_east.score(X_test_east, y_test_east)}")

print("LinSVC West")
print(f"Training Data Score: {LinSVC_west.score(X_train_west, y_train_west)}")
print(f"Testing Data Score: {LinSVC_west.score(X_test_west, y_test_west)}")

LinSVC East
Training Data Score: 0.9445843828715366
Testing Data Score: 0.9181286549707602
LinSVC West
Training Data Score: 0.9318181818181818
Testing Data Score: 0.9176470588235294


In [7]:
coeff_df = pd.DataFrame(train_east_df.columns.delete(0))
coeff_df.columns = ['Feature']
coeff_df["Correlation"] = pd.Series(LinSVC_east.coef_[0])

coeff_df.sort_values(by='Correlation', ascending=False)

Unnamed: 0,Feature,Correlation
20,top_PTS,2.468343
4,3P%,0.990451
0,FGA,0.913893
9,FTA,0.605837
14,STL,0.131236
12,DRB,0.122146
18,PTS,0.067584
21,top_5_PTS,0.060264
25,top_5_stats_sum,0.05807
16,TOV,0.055294


In [8]:
coeff_df = pd.DataFrame(train_west_df.columns.delete(0))
coeff_df.columns = ['Feature']
coeff_df["Correlation"] = pd.Series(LinSVC_west.coef_[0])
coeff_df.sort_values(by='Correlation', ascending=False)

Unnamed: 0,Feature,Correlation
20,top_PTS,1.723019
19,W%,0.536536
6,2PA,0.516352
3,3PA,0.505658
10,FT%,0.449534
8,FT,0.176705
25,top_5_stats_sum,0.16763
11,ORB,0.153284
14,STL,0.146034
15,BLK,0.090565


In [9]:
coeff_df['Feature'].tolist()

['FGA',
 'FG%',
 '3P',
 '3PA',
 '3P%',
 '2P',
 '2PA',
 '2P%',
 'FT',
 'FTA',
 'FT%',
 'ORB',
 'DRB',
 'AST',
 'STL',
 'BLK',
 'TOV',
 'PF',
 'PTS',
 'W%',
 'top_PTS',
 'top_5_PTS',
 'top_5_AST',
 'top_5_DRB',
 'top_5_ORB',
 'top_5_stats_sum',
 'Team',
 'Year',
 'Conf',
 'made_playoff']

In [10]:
drop_cols_east = ['DRB','top_5_PTS','top_5_stats_sum','TOV','ORB','2P','AST','Team','PF','top_5_DRB'
            ,'2PA','FT','top_5_ORB','3P','FG%','BLK','2P%','FT%','Year','Conf','made_playoff']
drop_cols_west = ['FT','top_5_stats_sum','ORB','STL','BLK','PTS','top_5_PTS','TOV',
                  'top_5_ORB','AST','top_5_DRB','2P%','PF','top_5_AST','Team','2P','DRB','FGA','FG%',
                  '3P%','FTA','3P','Year','Conf','made_playoff']

In [11]:
X_east = train_east_df.drop(drop_cols_east, axis = 1)
y_east = train_east_df['made_playoff']

X_west = train_west_df.drop(drop_cols_west, axis = 1)
y_west = train_west_df['made_playoff']

In [12]:
X_train_east, X_test_east, y_train_east, y_test_east = train_test_split(X_east, y_east, test_size = .30,random_state=42)
X_train_west, X_test_west, y_train_west, y_test_west = train_test_split(X_west, y_west,test_size = .30, random_state=42)

In [13]:
LinSVC_east = LinearSVC(max_iter = 10000, random_state = 42, C = 1)
LinSVC_east.fit(X_train_east, y_train_east)
LinSVC_west = LinearSVC(max_iter = 10000, random_state = 42, C = 1)
LinSVC_west.fit(X_train_west,y_train_west)

LinearSVC(C=1, max_iter=10000, random_state=42)

In [14]:
print("LinSVC East")
print(f"Training Data Score: {LinSVC_east.score(X_train_east, y_train_east)}")
print(f"Testing Data Score: {LinSVC_east.score(X_test_east, y_test_east)}")

print("LinSVC West")
print(f"Training Data Score: {LinSVC_west.score(X_train_west, y_train_west)}")
print(f"Testing Data Score: {LinSVC_west.score(X_test_west, y_test_west)}")

LinSVC East
Training Data Score: 0.9395465994962217
Testing Data Score: 0.9298245614035088
LinSVC West
Training Data Score: 0.9141414141414141
Testing Data Score: 0.9058823529411765


In [15]:
teams_east = curr_east_df['Team']
teams_west = curr_west_df['Team']
curr_east_df = curr_east_df.drop(drop_cols_east, axis = 1)
curr_west_df = curr_west_df.drop(drop_cols_west, axis = 1)

In [16]:
scaler = StandardScaler() 
curr_east_df = pd.DataFrame(scaler.fit_transform(curr_east_df), columns = curr_east_df.columns)

In [17]:
predictions_east = LinSVC_east.predict(curr_east_df)
results_east = pd.DataFrame()
results_east['Team'] = teams_east
results_east['predictions'] = predictions_east
print(results_east.sort_values('predictions', ascending = False))

                   Team  predictions
0         Chicago Bulls            1
3    Washington Wizards            1
5    Philadelphia 76ers            1
6       New York Knicks            1
8        Boston Celtics            1
9            Miami Heat            1
11        Brooklyn Nets            1
12        Atlanta Hawks            1
13      Milwaukee Bucks            1
14    Charlotte Hornets            1
1         Orlando Magic            0
2   Cleveland Cavaliers            0
4       Toronto Raptors            0
7        Indiana Pacers            0
10      Detroit Pistons            0


In [18]:
predictions_west = LinSVC_west.predict(curr_west_df)
results_west = pd.DataFrame()
results_west['Team'] = teams_west
results_west['predictions'] = predictions_west
print(results_west.sort_values('predictions', ascending = False))

                      Team  predictions
1                Utah Jazz            1
4           Denver Nuggets            1
5             Phoenix Suns            1
6         Dallas Mavericks            1
7     Los Angeles Clippers            1
8        Memphis Grizzlies            1
12      Los Angeles Lakers            1
13  Portland Trail Blazers            1
14   Golden State Warriors            1
0         Sacramento Kings            0
2          Houston Rockets            0
3     New Orleans Pelicans            0
9        San Antonio Spurs            0
10  Minnesota Timberwolves            0
11   Oklahoma City Thunder            0


In [19]:
rbf_SVC_east = SVC(gamma = 'scale')
rbf_SVC_east.fit(X_train_east,y_train_east)
rbf_SVC_west = SVC(gamma = 'scale')
rbf_SVC_west.fit(X_train_west,y_train_west)

SVC()

In [20]:
print("rbf_SVC East")
print(f"Training Data Score: {rbf_SVC_east.score(X_train_east, y_train_east)}")
print(f"Testing Data Score: {rbf_SVC_east.score(X_test_east, y_test_east)}")

print("rbf_SVC West")
print(f"Training Data Score: {rbf_SVC_west.score(X_train_west, y_train_west)}")
print(f"Testing Data Score: {rbf_SVC_west.score(X_test_west, y_test_west)}")

rbf_SVC East
Training Data Score: 0.9596977329974811
Testing Data Score: 0.8888888888888888
rbf_SVC West
Training Data Score: 0.9368686868686869
Testing Data Score: 0.9235294117647059


In [21]:
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf'],
             'probability':[True]}
 
grid_east = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)
grid_east.fit(X_train_east,y_train_east)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END ...C=0.1, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END ...C=0.1, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 3/5] END ...C=0.1, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 4/5] END ...C=0.1, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 5/5] END ...C=0.1, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 1/5] END .C=0.1, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END .C=0.1, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 3/5] END .C=0.1, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 4/5] END .C=0.1, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 5/5] END .C=0.1, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 1/5] END C=0.1, gamma=0.01, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END C=0.1, gamma=0.01, kernel=rbf, pro

[CV 1/5] END ..C=1000, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END ..C=1000, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 3/5] END ..C=1000, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 4/5] END ..C=1000, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 5/5] END ..C=1000, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 1/5] END C=1000, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END C=1000, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 3/5] END C=1000, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 4/5] END C=1000, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 5/5] END C=1000, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 1/5] END C=1000, gamma=0.01, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END C=1000, gamma=0.01, kernel=rbf, probability=True; total time=   0.0s
[CV 3/5] END C=1000, gamma

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf'], 'probability': [True]},
             verbose=3)

In [22]:
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf'],
             'probability':[True]}
 
grid_west = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)
grid_west.fit(X_train_west,y_train_west)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END ...C=0.1, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END ...C=0.1, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 3/5] END ...C=0.1, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 4/5] END ...C=0.1, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 5/5] END ...C=0.1, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 1/5] END .C=0.1, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END .C=0.1, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 3/5] END .C=0.1, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 4/5] END .C=0.1, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 5/5] END .C=0.1, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 1/5] END C=0.1, gamma=0.01, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END C=0.1, gamma=0.01, kernel=rbf, pro

[CV 3/5] END ..C=1000, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 4/5] END ..C=1000, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 5/5] END ..C=1000, gamma=1, kernel=rbf, probability=True; total time=   0.0s
[CV 1/5] END C=1000, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END C=1000, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 3/5] END C=1000, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 4/5] END C=1000, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 5/5] END C=1000, gamma=0.1, kernel=rbf, probability=True; total time=   0.0s
[CV 1/5] END C=1000, gamma=0.01, kernel=rbf, probability=True; total time=   0.0s
[CV 2/5] END C=1000, gamma=0.01, kernel=rbf, probability=True; total time=   0.0s
[CV 3/5] END C=1000, gamma=0.01, kernel=rbf, probability=True; total time=   0.0s
[CV 4/5] END C=1000, gamma=0.01, kernel=rbf, probability=True; total time=   0.0s
[CV 5/5] END C=1000, gam

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf'], 'probability': [True]},
             verbose=3)

In [23]:
print("grid East")
print(f"Training Data Score: {grid_east.score(X_train_east, y_train_east)}")
print(f"Testing Data Score: {grid_east.score(X_test_east, y_test_east)}")

print("grid West")
print(f"Training Data Score: {grid_west.score(X_train_west, y_train_west)}")
print(f"Testing Data Score: {grid_west.score(X_test_west, y_test_west)}")

grid East
Training Data Score: 0.9319899244332494
Testing Data Score: 0.9239766081871345
grid West
Training Data Score: 0.9191919191919192
Testing Data Score: 0.9235294117647059


In [24]:
predictions_east = grid_east.predict(curr_east_df)
predictions_east_prob = grid_east.predict_proba(curr_east_df)[:,1]
predictions_west = grid_west.predict(curr_west_df)
predictions_west_prob = grid_west.predict_proba(curr_west_df)[:,1]

In [25]:
results_east = pd.DataFrame()
results_east['Team'] = teams_east
results_east['predictions'] = predictions_east
results_east['prediction probability'] = predictions_east_prob
print(results_east.sort_values('prediction probability', ascending = False))

                   Team  predictions  prediction probability
11        Brooklyn Nets            1                1.000000
9            Miami Heat            1                0.999990
0         Chicago Bulls            1                0.996540
3    Washington Wizards            1                0.996281
13      Milwaukee Bucks            1                0.987168
14    Charlotte Hornets            1                0.981060
6       New York Knicks            1                0.937375
8        Boston Celtics            1                0.881454
5    Philadelphia 76ers            1                0.862537
12        Atlanta Hawks            1                0.816318
2   Cleveland Cavaliers            0                0.427048
4       Toronto Raptors            0                0.212694
7        Indiana Pacers            0                0.049706
10      Detroit Pistons            0                0.000015
1         Orlando Magic            0                0.000012


In [26]:
results_west = pd.DataFrame()
results_west['Team'] = teams_west
results_west['predictions'] = predictions_west
results_west['prediction probability'] = predictions_west_prob
print(results_west.sort_values('prediction probability', ascending = False))

                      Team  predictions  prediction probability
14   Golden State Warriors            1                1.000000
5             Phoenix Suns            1                1.000000
1                Utah Jazz            1                0.985643
6         Dallas Mavericks            1                0.890659
7     Los Angeles Clippers            1                0.868424
13  Portland Trail Blazers            1                0.715284
8        Memphis Grizzlies            1                0.676011
12      Los Angeles Lakers            1                0.646166
4           Denver Nuggets            1                0.584961
10  Minnesota Timberwolves            0                0.396653
0         Sacramento Kings            0                0.035531
11   Oklahoma City Thunder            0                0.005905
9        San Antonio Spurs            0                0.001720
3     New Orleans Pelicans            0                0.000580
2          Houston Rockets            0 

In [29]:
results_east['Conf'] = 'East'
results_west['Conf'] = 'West'

In [30]:
results = pd.concat([results_east,results_west])

In [31]:
results.to_csv('../Results/SVC-Results.csv')