# Model Selection
### Imports

In [44]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

### Import Data
Import the data generated by the data_clean.ipynb notebook

In [45]:
#Read data from data cleaning
total_data_df = pd.read_csv('output.csv')

### Simple Linear Regression
We're going to have to try a lot of different models and parameter tuning, but for now we'll just set up a simple Linear Regression model via K-Fold Cross Validation, where the set we leave out (for validation), will be a set corresponding to a particular season

In [46]:
# Print the columns
print(total_data_df.columns)

# Select features and target
features = ['Pos_PG', 'Pos_SG', 'Pos_SF', 'Pos_PF', 'Pos_C', 'G', 'GS', 'MP', 'FG', 'FG%', '3P', '3P%', '2P', '2P%', 'eFG%', 'ORB', 'DRB', 'AST', 'BLK', 'PTS', 'PER', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM', 'VORP', 'Tm_Rcrd']
target = 'MVP_Shr'

season_list = [f"{year}_{year + 1 - 2000:02}" for year in range(2000, 2023)]

def model_testing(model):
    mvp_list = []
    for season in season_list:
        # Split data into training and testing sets
        X_train = total_data_df[total_data_df['Szn'] != season][features]
        X_test = total_data_df[total_data_df['Szn'] == season][features]
        y_train = total_data_df[total_data_df['Szn'] != season][target]
        y_test = total_data_df[total_data_df['Szn'] == season][target]

        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)

        # add the top candidates to the prediction
        predictions = pd.DataFrame({'Player': total_data_df[total_data_df['Szn'] == season]['Plyr'], 'Predicted MVP Share': y_pred})
        mvp_list.append(predictions.sort_values(by='Predicted MVP Share', ascending=False).head(1)['Player'].values[0])         # values[0] is to select the value since dataframes are by default indexed

        # predictions = pd.DataFrame({'Player': total_data_df[total_data_df['Szn'] == season]['Plyr'], 'Predicted MVP Share': y_pred})
        # print(f'MVP Predictions for the {season} season:')
        # print(predictions.sort_values(by='Predicted MVP Share', ascending=False).head(1))
        # print('\n')

    return mvp_list

Index(['Plyr', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA',
       '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB',
       'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'id', 'Szn', 'PER',
       'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%',
       'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM',
       'VORP', 'MVP_Rnk', 'MVP_Shr', 'Tm_Rcrd', 'Pos_C', 'Pos_PF', 'Pos_PG',
       'Pos_SF', 'Pos_SG'],
      dtype='object')


In [47]:
# list of all models
models = [
    LinearRegression(),
    Lasso(alpha=0.1),
    Ridge(alpha=0.1)
]

results = []

# test each model
for model in models:
    print(f'Testing {model}')
    model_result = model_testing(model)
    # append table dataframe to result
    results.append(model_result)

# Make a dataframe, where each column is a model, and each row is a season
results_df = pd.DataFrame()

mvp_winners = ['Allen Iverson', 'Tim Duncan', 'Tim Duncan', 'Kevin Garnett', 'Steve Nash', 'Steve Nash', 'Dirk Nowitzki', 'Kobe Bryant', 'LeBron James', 'LeBron James', 'Derick Rose', 'LeBron James', 'LeBron James', 'Kevin Durant', 'Stephen Curry', 'Stephen Curry', 'Russell Westbrook', 'James Harden', 'Giannis Antetokounmpo', 'Giannis Antetokounmpo', 'Nikola Jokić', 'Nikola Jokić', 'Joel Embiid']
results_df['Truth'] = mvp_winners

for i, model in enumerate(models):
    results_df[model] = results[i]

results_df



Testing LinearRegression()
Testing Lasso(alpha=0.1)
Testing Ridge(alpha=0.1)


Unnamed: 0,Truth,LinearRegression(),Lasso(alpha=0.1),Ridge(alpha=0.1)
0,Allen Iverson,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal
1,Tim Duncan,Tim Duncan,Shaquille O'Neal,Tim Duncan
2,Tim Duncan,Tracy McGrady,Tracy McGrady,Tracy McGrady
3,Kevin Garnett,Kevin Garnett,Kevin Garnett,Kevin Garnett
4,Steve Nash,Kevin Garnett,Shaquille O'Neal,Kevin Garnett
5,Steve Nash,LeBron James,LeBron James,LeBron James
6,Dirk Nowitzki,LeBron James,Dwyane Wade,LeBron James
7,Kobe Bryant,LeBron James,Chris Paul,LeBron James
8,LeBron James,LeBron James,LeBron James,LeBron James
9,LeBron James,LeBron James,LeBron James,LeBron James
