In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split,GridSearchCV, RandomizedSearchCV

In [2]:
train_east_df = pd.read_csv('../CSV/NBA-teams-data-east.csv')
curr_east_df = pd.read_csv('../CSV/NBA-2022-team-data-east.csv')

train_west_df = pd.read_csv('../CSV/NBA-teams-data-west.csv')
curr_west_df = pd.read_csv('../CSV/NBA-2022-team-data-west.csv')

In [3]:
X_east = train_east_df.drop(['Team','Year','Conf','made_playoff'], axis = 1)
y_east = train_east_df['made_playoff']

X_west = train_west_df.drop(['Team','Year','Conf','made_playoff'], axis = 1)
y_west = train_west_df['made_playoff']

In [4]:
X_train_east, X_test_east, y_train_east, y_test_east = train_test_split(X_east, y_east, test_size = .30,random_state=42)
X_train_west, X_test_west, y_train_west, y_test_west = train_test_split(X_west, y_west,test_size = .30, random_state=42)

In [5]:
random_forest_east = RandomForestClassifier(oob_score = True)
random_forest_east.fit(X_train_east,y_train_east)

random_forest_west = RandomForestClassifier(oob_score = True)
random_forest_west.fit(X_train_west,y_train_west)

RandomForestClassifier(oob_score=True)

In [6]:
print("random_forest East")
print(f"Training Data Score: {random_forest_east.score(X_train_east,y_train_east)}")
print(f"Testing Data Score: {random_forest_east.score(X_test_east, y_test_east)}")

print("random_forest West")
print(f"Training Data Score: {random_forest_west.score(X_train_west, y_train_west)}")
print(f"Testing Data Score: {random_forest_west.score(X_test_west, y_test_west)}")

random_forest East
Training Data Score: 1.0
Testing Data Score: 0.9005847953216374
random_forest West
Training Data Score: 1.0
Testing Data Score: 0.9058823529411765


In [7]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf}

In [8]:
print(random_grid)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt'], 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4]}


In [9]:
RandomForest = RandomForestClassifier()

In [10]:
#Create a randomSearchCV to find better parameters
RF_random_search_east = RandomizedSearchCV(estimator = RandomForest, param_distributions = random_grid,n_iter = 20, 
                                        cv = 3, verbose=2, random_state=42, n_jobs = -1)
RF_random_search_west = RandomizedSearchCV(estimator = RandomForest, param_distributions = random_grid,n_iter = 20, 
                                           cv = 3, verbose=2, random_state=42, n_jobs = -1)

In [11]:
RF_random_search_east.fit(X_train_east,y_train_east)
RF_random_search_west.fit(X_train_west,y_train_west)

Fitting 3 folds for each of 20 candidates, totalling 60 fits
Fitting 3 folds for each of 20 candidates, totalling 60 fits


RandomizedSearchCV(cv=3, estimator=RandomForestClassifier(), n_iter=20,
                   n_jobs=-1,
                   param_distributions={'max_depth': [10, 20, 30, 40, 50, 60,
                                                      70, 80, 90, 100, 110,
                                                      None],
                                        'max_features': ['auto', 'sqrt'],
                                        'min_samples_leaf': [1, 2, 4],
                                        'min_samples_split': [2, 5, 10],
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000]},
                   random_state=42, verbose=2)

In [12]:
print("RFrandomSearchCV East")
print(f"Training Data Score: {RF_random_search_east.score(X_train_east,y_train_east)}")
print(f"Testing Data Score: {RF_random_search_east.score(X_test_east, y_test_east)}")
print("RFrandomSearchCV West")
print(f"Training Data Score: {RF_random_search_west.score(X_train_west,y_train_west)}")
print(f"Testing Data Score: {RF_random_search_west.score(X_test_west, y_test_west)}")

RFrandomSearchCV East
Training Data Score: 1.0
Testing Data Score: 0.9122807017543859
RFrandomSearchCV West
Training Data Score: 0.9696969696969697
Testing Data Score: 0.9117647058823529


In [13]:
teams_east = curr_east_df['Team']
teams_west = curr_west_df['Team']
curr_east_df = curr_east_df.drop(['Team','Year','Conf','made_playoff'], axis = 1)
curr_west_df = curr_west_df.drop(['Team','Year','Conf','made_playoff'], axis = 1)

In [14]:
predictions_east = RF_random_search_east.predict(curr_east_df)
predictions_east_prob = RF_random_search_east.predict_proba(curr_east_df)[:,1]
predictions_west = RF_random_search_west.predict(curr_west_df)
predictions_west_prob = RF_random_search_west.predict_proba(curr_west_df)[:,1]

In [15]:
results_east = pd.DataFrame()
results_east['Team'] = teams_east
results_east['predictions'] = predictions_east
results_east['prediction probability'] = predictions_east_prob
print(results_east.sort_values('prediction probability', ascending = False))

                   Team  predictions  prediction probability
5    Philadelphia 76ers            1                0.957222
3    Washington Wizards            1                0.952416
13      Milwaukee Bucks            1                0.934442
8        Boston Celtics            1                0.925359
9            Miami Heat            1                0.883915
0         Chicago Bulls            1                0.880192
12        Atlanta Hawks            1                0.878946
14    Charlotte Hornets            1                0.866970
11        Brooklyn Nets            1                0.828180
6       New York Knicks            1                0.800701
2   Cleveland Cavaliers            1                0.523712
4       Toronto Raptors            0                0.441443
7        Indiana Pacers            0                0.248902
1         Orlando Magic            0                0.081640
10      Detroit Pistons            0                0.048379


In [16]:
results_west = pd.DataFrame()
results_west['Team'] = teams_west
results_west['predictions'] = predictions_west
results_west['prediction probability'] = predictions_west_prob
print(results_west.sort_values('prediction probability', ascending = False))

                      Team  predictions  prediction probability
5             Phoenix Suns            1                0.979040
1                Utah Jazz            1                0.951586
14   Golden State Warriors            1                0.936667
6         Dallas Mavericks            1                0.783607
7     Los Angeles Clippers            1                0.767407
13  Portland Trail Blazers            1                0.726201
8        Memphis Grizzlies            1                0.690869
12      Los Angeles Lakers            1                0.648654
4           Denver Nuggets            1                0.595147
10  Minnesota Timberwolves            0                0.464882
9        San Antonio Spurs            0                0.411515
0         Sacramento Kings            0                0.135367
11   Oklahoma City Thunder            0                0.111549
2          Houston Rockets            0                0.106882
3     New Orleans Pelicans            0 

In [17]:
results_east['Conf'] = 'East'
results_west['Conf'] = 'West'

In [18]:
results = pd.concat([results_east,results_west])

In [19]:
results.to_csv('../Results/Random-Forest-Results.csv')