In [35]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge

In [36]:
# Get stats from csv
games_df = pd.read_csv('summary_data.csv')

In [43]:
# Baseline model: Splitting testing and training data with FG3M as the output
X = games_df[['three_pm_avg_to_date']]
y = games_df['team_three_pm_next_game']

# Baseline + FT% model:
#X = games_df[['three_pm_avg_to_date', 'free_throw_percent_avg_to_date']]
#y = games_df['team_three_pm_next_game']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [38]:
# Initialize a RandomForestRegressor
rf_model = RandomForestRegressor(random_state=42)

# Define k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform k-fold cross-validation on the training set
cv_r2_scores = cross_val_score(rf_model, X_train, y_train, cv=kf, scoring='r2')
average_cv_r2 = np.mean(cv_r2_scores)

cv_mse_scores = cross_val_score(rf_model, X_train, y_train, cv=kf, scoring='neg_mean_squared_error')
average_cv_mse = -np.mean(cv_mse_scores)

# Print the results
print(f'Average R-squared value (Cross-validation): {average_cv_r2}')
print(f'Average Mean Squared Error (Cross-validation): {average_cv_mse}')

Average R-squared value (Cross-validation): 0.1320383552833372
Average Mean Squared Error (Cross-validation): 10.335062906339108


In [45]:
# Initialize a Ridge model
ridge_model = Ridge(alpha=1.0)  # You can adjust the regularization strength (alpha) as needed

# Define k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform k-fold cross-validation on the training set
cv_r2_scores = cross_val_score(ridge_model, X_train, y_train, cv=kf, scoring='r2')
average_cv_r2 = np.mean(cv_r2_scores)

cv_mse_scores = cross_val_score(ridge_model, X_train, y_train, cv=kf, scoring='neg_mean_squared_error')
average_cv_mse = -np.mean(cv_mse_scores)

# Print the results
print(f'Average R-squared value (Cross-validation): {average_cv_r2}')
print(f'Average Mean Squared Error (Cross-validation): {average_cv_mse}')

Average R-squared value (Cross-validation): 0.2948073393866424
Average Mean Squared Error (Cross-validation): 8.39769966168765


In [46]:
# Initialize a Lasso Regression model
lasso_model = Lasso(alpha=1.0)  # You can adjust the regularization strength (alpha) as needed

# Define k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Perform k-fold cross-validation
cv_r2_scores = cross_val_score(lasso_model, X, y, cv=kf, scoring='r2')
average_cv_r2 = np.mean(cv_r2_scores)

cv_mse_scores = -cross_val_score(lasso_model, X, y, cv=kf, scoring='neg_mean_squared_error')
average_cv_mse = np.mean(cv_mse_scores)

# Print the results
print(f'Average R-squared value (Cross-validation): {average_cv_r2}')
print(f'Average Mean Squared Error (Cross-validation): {average_cv_mse}')

Average R-squared value (Cross-validation): 0.2752552435384342
Average Mean Squared Error (Cross-validation): 8.636009118409959
