In [7]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

# NOTE: This file does Hyperparameter tuning for the different models, to find the most optimal parameters

# Read data from data cleaning
total_data_df = pd.read_csv('output.csv')
# Print the columns
print(total_data_df.columns)

# Select features and target
features =['Age' ,'G', 'GS', 'MP', 'FG', 
           'FG%', '3P', '3P%', 'eFG%', 'ORB', 
           'DRB', 'AST', 'PTS', 'PER', 'FTr', 
           'USG%', 'OWS', 'DWS', 'WS', 'OBPM', 
           'DBPM', 'VORP', 'Tm_Rcrd']
target = 'MVP_Shr'
season_list = [f"{year}_{year + 1 - 2000:02}" for year in range(2000, 2023)]
mse_list = []
r_squared_list = []

def model_testing(model):
    mvp_list = []
    mse_sum = 0
    r_squared_sum = 0
    for season in season_list:
        # Split data into training and testing sets
        X_train = total_data_df[total_data_df['Szn'] != season][features]
        X_test = total_data_df[total_data_df['Szn'] == season][features]
        y_train = total_data_df[total_data_df['Szn'] != season][target]
        y_test = total_data_df[total_data_df['Szn'] == season][target]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # add the top candidates to the prediction
        predictions = pd.DataFrame({'Player': total_data_df[total_data_df['Szn'] == season]['Plyr'], 'Predicted MVP Share': y_pred})
        mvp_list.append(predictions.sort_values(by='Predicted MVP Share', ascending=False) .head(1)['Player'].values[0])         # values[0] is to select the value since dataframes are by default indexed
        
        mse = mean_squared_error(y_test, y_pred)
        r_squared = r2_score(y_test, y_pred)
        mse_sum += mse
        r_squared_sum += r_squared
    
    mse_avg = round(mse_sum / len(season_list), 3)
    r_squared_avg = round(r_squared_sum / len(season_list), 3)

    print('AVG MSE and R-squared:', mse_avg, r_squared_avg)
    mse_list.append(mse_avg)
    r_squared_list.append(r_squared_avg)
    
    return mvp_list


Index(['Plyr', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA',
       '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB',
       'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'id', 'Szn', 'PER',
       'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%',
       'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM',
       'VORP', 'MVP_Rnk', 'MVP_Shr', 'Tm_Rcrd', 'Pos_C', 'Pos_PF', 'Pos_PG',
       'Pos_SF', 'Pos_SG', 'MVP_strk'],
      dtype='object')


### Bagging Regressor

In [None]:
from sklearn.ensemble import BaggingRegressor
results = []
for n in range(150,210,10):
    print(f'Testing Bagging Regressor n_estimators={n}')
    model_result = model_testing(BaggingRegressor(n_estimators=n))
    # append table dataframe to result
    results.append(model_result)

# Make a dataframe, where each column is a model, and each row is a season
results_df_bagging = pd.DataFrame()
mvp_winners = ['Allen Iverson', 'Tim Duncan', 'Tim Duncan', 'Kevin Garnett', 'Steve Nash', 
               'Steve Nash', 'Dirk Nowitzki', 'Kobe Bryant', 'LeBron James', 'LeBron James', 
               'Derick Rose', 'LeBron James', 'LeBron James', 'Kevin Durant', 'Stephen Curry', 
               'Stephen Curry', 'Russell Westbrook', 'James Harden', 'Giannis Antetokounmpo', 
               'Giannis Antetokounmpo', 'Nikola Jokić', 'Nikola Jokić', 'Joel Embiid']
results_df_bagging['Truth'] = mvp_winners

for i in range(len(results)):
    results_df_bagging[f'Testing Bagging Regressor n_estimators={50 + i * 10}'] = results[i]

results_df_bagging

### Random Forest Regressor

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
import numpy as np

results = []

for season in season_list:
    # Split data into training and testing sets
    X_train = total_data_df[total_data_df['Szn'] != season][features]
    X_test = total_data_df[total_data_df['Szn'] == season][features]
    y_train = total_data_df[total_data_df['Szn'] != season][target]
    y_test = total_data_df[total_data_df['Szn'] == season][target]

    rfr = RandomForestRegressor()

    params = {
        'n_estimators': np.arange(50, 110, 50),
        'max_features': np.arange(5, 20, 5)
    }

    grid_search = GridSearchCV(estimator=rfr, param_grid=params, cv=5, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error', return_train_score=True)
    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_
    print("Best parameters:", best_params)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best parameters: {'max_features': 15, 'n_estimators': 100}
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best parameters: {'max_features': 5, 'n_estimators': 50}
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best parameters: {'max_features': 5, 'n_estimators': 50}
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best parameters: {'max_features': 10, 'n_estimators': 100}
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best parameters: {'max_features': 5, 'n_estimators': 100}
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best parameters: {'max_features': 5, 'n_estimators': 100}
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best parameters: {'max_features': 15, 'n_estimators': 50}
Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best parameters: {'max_features': 15, 'n_estimators': 100}
Fitting 5 folds for each of 6 candidates, totalling 30 

In [4]:
from sklearn.ensemble import RandomForestRegressor
results = []
for n in range(50,110,50):
    for i in range(5,20,5):
        print(f'Testing Random Forest Regressor n_estimators={n}, max_features={i}')
        model_result = model_testing(RandomForestRegressor(n_estimators=n, max_features=i))
        # append table dataframe to result
        results.append(model_result)


Testing Random Forest Regressor n_estimators=50, max_features=5
Testing Random Forest Regressor n_estimators=50, max_features=10
Testing Random Forest Regressor n_estimators=50, max_features=15
Testing Random Forest Regressor n_estimators=100, max_features=5
Testing Random Forest Regressor n_estimators=100, max_features=10
Testing Random Forest Regressor n_estimators=100, max_features=15


In [6]:
# Make a dataframe, where each column is a model, and each row is a season
results_df = pd.DataFrame()
mvp_winners = ['Allen Iverson', 'Tim Duncan', 'Tim Duncan', 'Kevin Garnett', 'Steve Nash', 
               'Steve Nash', 'Dirk Nowitzki', 'Kobe Bryant', 'LeBron James', 'LeBron James', 
               'Derick Rose', 'LeBron James', 'LeBron James', 'Kevin Durant', 'Stephen Curry', 
               'Stephen Curry', 'Russell Westbrook', 'James Harden', 'Giannis Antetokounmpo', 
               'Giannis Antetokounmpo', 'Nikola Jokić', 'Nikola Jokić', 'Joel Embiid']
results_df['Truth'] = mvp_winners

for i in range(2):
    for j in range(3):
        results_df[f'Testing Random Forest Regressor n_estimators={50 + i * 10}, max_features={(j+1)*5}'] = results[i+j]

results_df

Unnamed: 0,Truth,"Testing Random Forest Regressor n_estimators=50, max_features=5","Testing Random Forest Regressor n_estimators=50, max_features=10","Testing Random Forest Regressor n_estimators=50, max_features=15","Testing Random Forest Regressor n_estimators=60, max_features=5","Testing Random Forest Regressor n_estimators=60, max_features=10","Testing Random Forest Regressor n_estimators=60, max_features=15"
0,Allen Iverson,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal
1,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan
2,Tim Duncan,Tracy McGrady,Tracy McGrady,Tracy McGrady,Tracy McGrady,Tracy McGrady,Tracy McGrady
3,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett
4,Steve Nash,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett
5,Steve Nash,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki
6,Dirk Nowitzki,Dirk Nowitzki,Steve Nash,Steve Nash,Steve Nash,Steve Nash,Steve Nash
7,Kobe Bryant,LeBron James,LeBron James,Chris Paul,LeBron James,Chris Paul,LeBron James
8,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James
9,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James


In [7]:
# Accuracy Calculation (correct/23)
for i in range(len(results)):
    correct = 0
    for j in range(len(results[0])):
        if results[i][j] == mvp_winners[j]:
            correct += 1
    print(correct/23)

0.6521739130434783
0.5217391304347826
0.5652173913043478
0.6086956521739131
0.6086956521739131
0.5652173913043478


### Decision Tree Regressor

In [23]:
from sklearn.tree import DecisionTreeRegressor
results = []
for i in range(5,55,5):
    print(f'Testing Decision Tree Regressor max_depth={i}')
    model_result = model_testing(DecisionTreeRegressor(max_features=i))
    # append table dataframe to result
    results.append(model_result)

# Make a dataframe, where each column is a model, and each row is a season
results_df_decision = pd.DataFrame()

mvp_winners = ['Allen Iverson', 'Tim Duncan', 'Tim Duncan', 'Kevin Garnett', 'Steve Nash', 
               'Steve Nash', 'Dirk Nowitzki', 'Kobe Bryant', 'LeBron James', 'LeBron James', 
               'Derick Rose', 'LeBron James', 'LeBron James', 'Kevin Durant', 'Stephen Curry', 
               'Stephen Curry', 'Russell Westbrook', 'James Harden', 'Giannis Antetokounmpo', 
               'Giannis Antetokounmpo', 'Nikola Jokić', 'Nikola Jokić', 'Joel Embiid']
results_df_decision['Truth'] = mvp_winners


for i in range(len(results)):
    results_df_decision[f'Testing Random Forest Regressor max_depth={(i+1)*5}'] = results[i]

results_df_decision

Testing Decision Tree Regressor max_depth=5
Testing Decision Tree Regressor max_depth=10
Testing Decision Tree Regressor max_depth=15
Testing Decision Tree Regressor max_depth=20
Testing Decision Tree Regressor max_depth=25
Testing Decision Tree Regressor max_depth=30
Testing Decision Tree Regressor max_depth=35
Testing Decision Tree Regressor max_depth=40
Testing Decision Tree Regressor max_depth=45
Testing Decision Tree Regressor max_depth=50


Unnamed: 0,Truth,Testing Random Forest Regressor max_depth=5,Testing Random Forest Regressor max_depth=10,Testing Random Forest Regressor max_depth=15,Testing Random Forest Regressor max_depth=20,Testing Random Forest Regressor max_depth=25,Testing Random Forest Regressor max_depth=30,Testing Random Forest Regressor max_depth=35,Testing Random Forest Regressor max_depth=40,Testing Random Forest Regressor max_depth=45,Testing Random Forest Regressor max_depth=50
0,Allen Iverson,John Stockton,Allen Iverson,John Stockton,Allen Iverson,Chris Webber,John Stockton,Allen Iverson,John Stockton,John Stockton,John Stockton
1,Tim Duncan,Tim Duncan,Tim Duncan,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal
2,Tim Duncan,Tracy McGrady,Jason Kidd,Dirk Nowitzki,Tim Duncan,Steve Nash,Steve Nash,Tim Duncan,Steve Nash,Tim Duncan,Tim Duncan
3,Kevin Garnett,Jermaine O'Neal,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett
4,Steve Nash,Amar'e Stoudemire,Tracy McGrady,Tracy McGrady,Shaquille O'Neal,Amar'e Stoudemire,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal
5,Steve Nash,Ray Allen,Chauncey Billups,Dirk Nowitzki,LeBron James,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki
6,Dirk Nowitzki,Steve Nash,LeBron James,Steve Nash,Steve Nash,Steve Nash,Tim Duncan,Steve Nash,Steve Nash,Steve Nash,Tim Duncan
7,Kobe Bryant,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Deron Williams,Deron Williams,Deron Williams,Deron Williams,Chris Paul,Deron Williams
8,LeBron James,Dwyane Wade,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James
9,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James


In [24]:
# Accuracy Calculation (correct/23)
for i in range(len(results)):
    correct = 0
    for j in range(len(results[0])):
        if results[i][j] == mvp_winners[j]:
            correct += 1
    print(correct/23)

0.34782608695652173
0.43478260869565216
0.391304347826087
0.43478260869565216
0.4782608695652174
0.4782608695652174
0.4782608695652174
0.4782608695652174
0.4782608695652174
0.43478260869565216


### Gradient Boosting

In [12]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
import numpy as np
results = []
for season in season_list:
    # Split data into training and testing sets
    X_train = total_data_df[total_data_df['Szn'] != season][features]
    X_test = total_data_df[total_data_df['Szn'] == season][features]
    y_train = total_data_df[total_data_df['Szn'] != season][target]
    y_test = total_data_df[total_data_df['Szn'] == season][target]

    gbr = GradientBoostingRegressor()

    params = {
        'max_depth': np.arange(5,30,5),
        'n_estimators': np.arange(50,210,50),
        'learning_rate': np.arange(0.01,0.31,0.1)
    }

    grid_search = GridSearchCV(estimator=gbr, param_grid=params, cv=5, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error', return_train_score=True)
    grid_search.fit(X_train, y_train)
    # Find best params for each season
    print(grid_search.best_params_)


Fitting 5 folds for each of 60 candidates, totalling 300 fits
{'learning_rate': 0.21000000000000002, 'max_depth': 5, 'n_estimators': 200}
Fitting 5 folds for each of 60 candidates, totalling 300 fits
{'learning_rate': 0.21000000000000002, 'max_depth': 10, 'n_estimators': 200}
Fitting 5 folds for each of 60 candidates, totalling 300 fits
{'learning_rate': 0.11, 'max_depth': 5, 'n_estimators': 50}
Fitting 5 folds for each of 60 candidates, totalling 300 fits
{'learning_rate': 0.21000000000000002, 'max_depth': 5, 'n_estimators': 100}
Fitting 5 folds for each of 60 candidates, totalling 300 fits
{'learning_rate': 0.11, 'max_depth': 5, 'n_estimators': 200}
Fitting 5 folds for each of 60 candidates, totalling 300 fits
{'learning_rate': 0.21000000000000002, 'max_depth': 5, 'n_estimators': 150}
Fitting 5 folds for each of 60 candidates, totalling 300 fits
{'learning_rate': 0.11, 'max_depth': 5, 'n_estimators': 100}
Fitting 5 folds for each of 60 candidates, totalling 300 fits
{'learning_rate':

In [21]:
# Apply the parameters that appeared most frequently above

# append table dataframe to result
for n in range(50,210,50):
    for i in range(3):
        print(f'Testing Gradient Boosting Regressor learning_rate: {0.01+i*0.1}, max_depth: 5, n_estimators: {n}')
        model_result = model_testing(GradientBoostingRegressor(max_depth=5, learning_rate=0.01+i*0.1, n_estimators=n))
        # append table dataframe to result
        results.append(model_result)

# Make a dataframe, where each column is a model, and each row is a season
results_df_gbr = pd.DataFrame()
mvp_winners = ['Allen Iverson', 'Tim Duncan', 'Tim Duncan', 'Kevin Garnett', 'Steve Nash', 
               'Steve Nash', 'Dirk Nowitzki', 'Kobe Bryant', 'LeBron James', 'LeBron James', 
               'Derick Rose', 'LeBron James', 'LeBron James', 'Kevin Durant', 'Stephen Curry', 
               'Stephen Curry', 'Russell Westbrook', 'James Harden', 'Giannis Antetokounmpo', 
               'Giannis Antetokounmpo', 'Nikola Jokić', 'Nikola Jokić', 'Joel Embiid']
results_df_gbr['Truth'] = mvp_winners
for i in range(len(results)):
    results_df_gbr[f'Gradient Boosting Regressor {i}'] = results[i]
    
results_df_gbr

Testing Gradient Boosting Regressor learning_rate: 0.01, max_depth: 5, n_estimators: 50
Testing Gradient Boosting Regressor learning_rate: 0.11, max_depth: 5, n_estimators: 50
Testing Gradient Boosting Regressor learning_rate: 0.21000000000000002, max_depth: 5, n_estimators: 50
Testing Gradient Boosting Regressor learning_rate: 0.01, max_depth: 5, n_estimators: 100
Testing Gradient Boosting Regressor learning_rate: 0.11, max_depth: 5, n_estimators: 100
Testing Gradient Boosting Regressor learning_rate: 0.21000000000000002, max_depth: 5, n_estimators: 100
Testing Gradient Boosting Regressor learning_rate: 0.01, max_depth: 5, n_estimators: 150
Testing Gradient Boosting Regressor learning_rate: 0.11, max_depth: 5, n_estimators: 150
Testing Gradient Boosting Regressor learning_rate: 0.21000000000000002, max_depth: 5, n_estimators: 150
Testing Gradient Boosting Regressor learning_rate: 0.01, max_depth: 5, n_estimators: 200
Testing Gradient Boosting Regressor learning_rate: 0.11, max_depth: 

Unnamed: 0,Truth,Gradient Boosting Regressor 0,Gradient Boosting Regressor 1,Gradient Boosting Regressor 2,Gradient Boosting Regressor 3,Gradient Boosting Regressor 4,Gradient Boosting Regressor 5,Gradient Boosting Regressor 6,Gradient Boosting Regressor 7,Gradient Boosting Regressor 8,...,Gradient Boosting Regressor 15,Gradient Boosting Regressor 16,Gradient Boosting Regressor 17,Gradient Boosting Regressor 18,Gradient Boosting Regressor 19,Gradient Boosting Regressor 20,Gradient Boosting Regressor 21,Gradient Boosting Regressor 22,Gradient Boosting Regressor 23,Gradient Boosting Regressor 24
0,Allen Iverson,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,...,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal
1,Tim Duncan,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Tim Duncan,Shaquille O'Neal,Shaquille O'Neal,...,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Tim Duncan,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Tim Duncan
2,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Kobe Bryant,Tim Duncan,Tim Duncan,Kobe Bryant,Tim Duncan,Tim Duncan,...,Tracy McGrady,Tim Duncan,Tim Duncan,Kobe Bryant,Kobe Bryant,Tim Duncan,Kobe Bryant,Kobe Bryant,Tim Duncan,Tracy McGrady
3,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,...,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett
4,Steve Nash,Tracy McGrady,Tracy McGrady,Kevin Garnett,Kevin Garnett,Tracy McGrady,Kevin Garnett,Dirk Nowitzki,Kevin Garnett,Kevin Garnett,...,Stephon Marbury,Tracy McGrady,Kevin Garnett,Stephon Marbury,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett
5,Steve Nash,LeBron James,LeBron James,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,...,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki
6,Dirk Nowitzki,LeBron James,LeBron James,Steve Nash,Steve Nash,Tim Duncan,Steve Nash,Steve Nash,Steve Nash,Steve Nash,...,Steve Nash,Tim Duncan,Steve Nash,Steve Nash,Steve Nash,Steve Nash,Steve Nash,Steve Nash,Steve Nash,Steve Nash
7,Kobe Bryant,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul,...,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul,Chris Paul
8,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,...,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James
9,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,...,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James


In [None]:
# Accuracy Calculation (correct/23)
for i in range(len(results)):
    correct = 0
    for j in range(len(results[0])):
        if results[i][j] == mvp_winners[j]:
            correct += 1
    print(correct/23)

### AdaBoost Regresssion

In [38]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import GridSearchCV
results = []
for season in season_list:
    # Split data into training and testing sets
    X_train = total_data_df[total_data_df['Szn'] != season][features]
    X_test = total_data_df[total_data_df['Szn'] == season][features]
    y_train = total_data_df[total_data_df['Szn'] != season][target]
    y_test = total_data_df[total_data_df['Szn'] == season][target]

    ada = AdaBoostRegressor()

    params = {
        'n_estimators': np.arange(50,160,50),
        'learning_rate': np.arange(0.5,1.1,0.1)
    }

    grid_search = GridSearchCV(estimator=ada, param_grid=params, cv=5, n_jobs=-1, verbose=1, scoring='neg_mean_absolute_error', return_train_score=True)
    grid_search.fit(X_train, y_train)
    # Find best params for each season
    print(grid_search.best_params_)

Fitting 5 folds for each of 21 candidates, totalling 105 fits
{'learning_rate': 0.5, 'n_estimators': 50}
Fitting 5 folds for each of 21 candidates, totalling 105 fits
{'learning_rate': 0.5, 'n_estimators': 50}
Fitting 5 folds for each of 21 candidates, totalling 105 fits
{'learning_rate': 0.5, 'n_estimators': 50}
Fitting 5 folds for each of 21 candidates, totalling 105 fits
{'learning_rate': 0.5, 'n_estimators': 50}
Fitting 5 folds for each of 21 candidates, totalling 105 fits
{'learning_rate': 0.5, 'n_estimators': 50}
Fitting 5 folds for each of 21 candidates, totalling 105 fits
{'learning_rate': 0.5, 'n_estimators': 50}
Fitting 5 folds for each of 21 candidates, totalling 105 fits
{'learning_rate': 0.6, 'n_estimators': 50}
Fitting 5 folds for each of 21 candidates, totalling 105 fits
{'learning_rate': 0.5, 'n_estimators': 50}
Fitting 5 folds for each of 21 candidates, totalling 105 fits
{'learning_rate': 0.5, 'n_estimators': 50}
Fitting 5 folds for each of 21 candidates, totalling 10

In [39]:
# Apply the parameters that appeared most frequently above
print('Testing AdaBoost Regressor learning_rate: 0.5, n_estimators: 50')
model_result = model_testing(AdaBoostRegressor(learning_rate= 0.5, n_estimators=50))
# append table dataframe to result
results.append(model_result)

# Make a dataframe, where each column is a model, and each row is a season
results_df_ada = pd.DataFrame()
mvp_winners = ['Allen Iverson', 'Tim Duncan', 'Tim Duncan', 'Kevin Garnett', 'Steve Nash', 
               'Steve Nash', 'Dirk Nowitzki', 'Kobe Bryant', 'LeBron James', 'LeBron James', 
               'Derick Rose', 'LeBron James', 'LeBron James', 'Kevin Durant', 'Stephen Curry', 
               'Stephen Curry', 'Russell Westbrook', 'James Harden', 'Giannis Antetokounmpo', 
               'Giannis Antetokounmpo', 'Nikola Jokić', 'Nikola Jokić', 'Joel Embiid']
results_df_ada['Truth'] = mvp_winners
results_df_ada['AdaBoost Regressor'] = results[0]
results_df_ada

Testing AdaBoost Regressor learning_rate: 0.5, n_estimators: 50


Unnamed: 0,Truth,AdaBoost Regressor
0,Allen Iverson,Shaquille O'Neal
1,Tim Duncan,Tim Duncan
2,Tim Duncan,Tim Duncan
3,Kevin Garnett,Kevin Garnett
4,Steve Nash,Kevin Garnett
5,Steve Nash,Dirk Nowitzki
6,Dirk Nowitzki,Steve Nash
7,Kobe Bryant,Chris Paul
8,LeBron James,LeBron James
9,LeBron James,LeBron James


In [40]:
# Accuracy Calculation (correct/23)
for i in range(len(results)):
    correct = 0
    for j in range(len(results[0])):
        if results[i][j] == mvp_winners[j]:
            correct += 1
    print(correct/23)

0.6086956521739131


### SVM

In [8]:
from sklearn.svm import SVR

results = []
C = [0.001,0.01,0.1,0.5,1,2,5,10]
    
for c in C:
    print(f'Testing SVM with kernel=kbf C={c}')
    model_result = model_testing(SVR(C=c))
    # append table dataframe to result
    results.append(model_result)

# Make a dataframe, where each column is a model, and each row is a season
results_df_svm = pd.DataFrame()
mvp_winners = ['Allen Iverson', 'Tim Duncan', 'Tim Duncan', 'Kevin Garnett', 'Steve Nash', 
               'Steve Nash', 'Dirk Nowitzki', 'Kobe Bryant', 'LeBron James', 'LeBron James', 
               'Derick Rose', 'LeBron James', 'LeBron James', 'Kevin Durant', 'Stephen Curry', 
               'Stephen Curry', 'Russell Westbrook', 'James Harden', 'Giannis Antetokounmpo', 
               'Giannis Antetokounmpo', 'Nikola Jokić', 'Nikola Jokić', 'Joel Embiid']
results_df_svm['Truth'] = mvp_winners

for i in range(len(results)):
    results_df_svm[f'SVM with C={C[i]}'] = results[i]

results_df_svm

Testing SVM with kernel=kbf C=0.001
AVG MSE and R-squared: 0.026 -0.163
Testing SVM with kernel=kbf C=0.01
AVG MSE and R-squared: 0.023 -0.042
Testing SVM with kernel=kbf C=0.1
AVG MSE and R-squared: 0.014 0.341
Testing SVM with kernel=kbf C=0.5
AVG MSE and R-squared: 0.013 0.42
Testing SVM with kernel=kbf C=1
AVG MSE and R-squared: 0.012 0.461
Testing SVM with kernel=kbf C=2
AVG MSE and R-squared: 0.01 0.519
Testing SVM with kernel=kbf C=5
AVG MSE and R-squared: 0.01 0.542
Testing SVM with kernel=kbf C=10
AVG MSE and R-squared: 0.009 0.56


Unnamed: 0,Truth,SVM with C=0.001,SVM with C=0.01,SVM with C=0.1,SVM with C=0.5,SVM with C=1,SVM with C=2,SVM with C=5,SVM with C=10
0,Allen Iverson,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal
1,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan
2,Tim Duncan,Tracy McGrady,Tim Duncan,Tracy McGrady,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan
3,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett
4,Steve Nash,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki
5,Steve Nash,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki
6,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki,Dirk Nowitzki
7,Kobe Bryant,LeBron James,LeBron James,LeBron James,LeBron James,Chris Paul,Chris Paul,Chris Paul,Chris Paul
8,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James
9,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James,LeBron James


In [9]:
# Accuracy Calculation (correct/23)
final_metrics = pd.DataFrame()

for i in range(len(results)):
    correct = 0
    for j in range(len(results[0])):
        if results[i][j] == mvp_winners[j]:
            correct += 1
            
    metrics = {'C': [C[i]],
               'MSE': [mse_list[i]],
               'R2': [r_squared_list[i]],
                'Accuracy': [round((correct / 23), 3)]}
    
    model_data = pd.DataFrame(data=metrics)
    final_metrics = pd.concat([final_metrics, model_data], ignore_index=True)

final_metrics

Unnamed: 0,C,MSE,R2,Accuracy
0,0.001,0.026,-0.163,0.696
1,0.01,0.023,-0.042,0.739
2,0.1,0.014,0.341,0.696
3,0.5,0.013,0.42,0.739
4,1.0,0.012,0.461,0.739
5,2.0,0.01,0.519,0.739
6,5.0,0.01,0.542,0.739
7,10.0,0.009,0.56,0.739
