In [1]:
from pscore_data_reader_preproc import read_and_process_data, model_evaluation
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Read in the data using the pre-processing code
x_train, y_train, x_test, y_test, train_team_values, test_team_values = read_and_process_data(
     r"E:\github_repos\Private_Projects\NCAA_FBS_AP_Ranking_Predictions\python_ap\scripts_and_data\data\score_pred_train_data.csv",
     r"E:\github_repos\Private_Projects\NCAA_FBS_AP_Ranking_Predictions\python_ap\scripts_and_data\data\score_pred_test_data.csv",
     True
)

# check to make sure the data looks right
x_train.shape
x_test.shape

train data has shape: (14648, 490)
test data has shape: (132, 490)


(132, 490)

In [3]:
# build grid search for lasso regression 
alpha_values = np.logspace(-4, 1, 15) # 15 numbers between -4 and 4 range, decreasing the range from ridge

# set the alpha values to a list
lasso_param_grid = {'alpha': alpha_values}

# Now set up ridge model
lasso = Lasso(max_iter=5000)

# lastly, build inital grid search model
lasso_grid_search = GridSearchCV(estimator=lasso, param_grid=lasso_param_grid, cv=5, scoring='neg_mean_squared_error', verbose = 1)

In [None]:
# fit the final model
lasso_grid_search.fit(x_train, y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits


In [None]:
# Plot values across the params
plt.plot(alpha_values, -lasso_grid_search.cv_results_['mean_test_score'], marker='o')  # Negative MSE is stored
plt.xscale('log')  # Log scale to better visualize the values across the large range of alpha
plt.xlabel('Alpha values')
plt.ylabel('Mean Test Score (Negative MSE)')
plt.title('Learning Curve for Lasso Regression')
plt.show()