# NBA Awards Prediction Models

In this notebook, we will load and preprocess NBA data, and train several models to predict different awards and statistical leaders using `RandomForestRegressor` and `RandomForestClassifier` from `scikit-learn`. The models will be saved using `joblib`.


In [None]:
# Import necessary libraries
import pandas as pd  # For data manipulation and analysis
from sklearn.model_selection import train_test_split  # For splitting the data into training and testing sets
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, GradientBoostingRegressor, GradientBoostingClassifier  # For creating and training models
from sklearn.svm import SVR, SVC  # For Support Vector Machine models
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix, classification_report  # For evaluating model performance
import joblib  # For saving the trained models
import logging  # For logging information, warnings, and errors

# Configure logging
logging.basicConfig(level=logging.INFO)


## Load and Preprocess Data

First, we will load the cleaned NBA data from a CSV file and define our feature and target variables.


In [None]:
# Load data
data = pd.read_csv('cleaned_nba_stats.csv')

# Define features and target variables
features = ['PTS', 'AST', 'REB', 'BLK', 'STL', 'PER']
X = data[features]


## Train and Evaluate Models

We will train and evaluate three models (Random Forest, Gradient Boosting, and Support Vector Machine) for each of the following tasks:
1. MVP prediction
2. Defensive Player of the Year (DPOY) prediction
3. Points leader prediction
4. Assists leader prediction
5. Rebounds leader prediction

We will use appropriate evaluation metrics for each task.


In [None]:
# Define the target variable for MVP
y_mvp = data['MVP_votes']  # Assume 'MVP_votes' is the target column for MVP

# Split the data into training and testing sets
X_train_mvp, X_test_mvp, y_train_mvp, y_test_mvp = train_test_split(X, y_mvp, test_size=0.2, random_state=42)



#### Random Forest Regressor


In [None]:
# Train the model
rf_mvp = RandomForestRegressor(n_estimators=100, random_state=42)
rf_mvp.fit(X_train_mvp, y_train_mvp)

# Make predictions
y_pred_rf_mvp = rf_mvp.predict(X_test_mvp)

# Evaluate the model
mse_rf_mvp = mean_squared_error(y_test_mvp, y_pred_rf_mvp)
logging.info(f'Random Forest MVP MSE: {mse_rf_mvp}')

# Save the trained model
#joblib.dump(rf_mvp, 'rf_mvp_model.pkl')


#### Gradient Boosting Regressor


In [None]:
# Train the model
gb_mvp = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_mvp.fit(X_train_mvp, y_train_mvp)

# Make predictions
y_pred_gb_mvp = gb_mvp.predict(X_test_mvp)

# Evaluate the model
mse_gb_mvp = mean_squared_error(y_test_mvp, y_pred_gb_mvp)
logging.info(f'Gradient Boosting MVP MSE: {mse_gb_mvp}')

# Save the trained model
#joblib.dump(gb_mvp, 'gb_mvp_model.pkl')


#### Support Vector Regressor


In [None]:
# Train the model
svr_mvp = SVR()
svr_mvp.fit(X_train_mvp, y_train_mvp)

# Make predictions
y_pred_svr_mvp = svr_mvp.predict(X_test_mvp)

# Evaluate the model
mse_svr_mvp = mean_squared_error(y_test_mvp, y_pred_svr_mvp)
logging.info(f'Support Vector MVP MSE: {mse_svr_mvp}')

# Save the trained model
#joblib.dump(svr_mvp, 'svr_mvp_model.pkl')


## Train Defensive Player of the Year (DPOY) Model

Next, we will train a model to predict the Defensive Player of the Year (DPOY) votes using `RandomForestClassifier`.


In [None]:
# Define the target variable for DPOY
y_dpoty = data['DPOTY_votes']  # Assume 'DPOTY_votes' is the target column for Defensive Player of the Year

# Split the data into training and testing sets
X_train_dpoty, X_test_dpoty, y_train_dpoty, y_test_dpoty = train_test_split(X, y_dpoty, test_size=0.2, random_state=42)



#### Random Forest Classifier


In [None]:
# Train the model
rf_dpoty = RandomForestClassifier(n_estimators=100, random_state=42)
rf_dpoty.fit(X_train_dpoty, y_train_dpoty)

# Make predictions
y_pred_rf_dpoty = rf_dpoty.predict(X_test_dpoty)

# Evaluate the model
accuracy_rf_dpoty = accuracy_score(y_test_dpoty, y_pred_rf_dpoty)
conf_matrix_rf_dpoty = confusion_matrix(y_test_dpoty, y_pred_rf_dpoty)
logging.info(f'Random Forest DPOY Accuracy: {accuracy_rf_dpoty}')
logging.info(f'Random Forest DPOY Confusion Matrix: \n{conf_matrix_rf_dpoty}')

# Save the trained model
joblib.dump(rf_dpoty, 'rf_dpoty_model.pkl')


#### Gradient Boosting Classifier


In [None]:
# Train the model
gb_dpoty = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_dpoty.fit(X_train_dpoty, y_train_dpoty)

# Make predictions
y_pred_gb_dpoty = gb_dpoty.predict(X_test_dpoty)

# Evaluate the model
accuracy_gb_dpoty = accuracy_score(y_test_dpoty, y_pred_gb_dpoty)
conf_matrix_gb_dpoty = confusion_matrix(y_test_dpoty, y_pred_gb_dpoty)
logging.info(f'Gradient Boosting DPOY Accuracy: {accuracy_gb_dpoty}')
logging.info(f'Gradient Boosting DPOY Confusion Matrix: \n{conf_matrix_gb_dpoty}')

# Save the trained model
joblib.dump(gb_dpoty, 'gb_dpoty_model.pkl')


#### Support Vector Classifier


In [None]:
# Train the model
svc_dpoty = SVC()
svc_dpoty.fit(X_train_dpoty, y_train_dpoty)

# Make predictions
y_pred_svc_dpoty = svc_dpoty.predict(X_test_dpoty)

# Evaluate the model
accuracy_svc_dpoty = accuracy_score(y_test_dpoty, y_pred_svc_dpoty)
conf_matrix_svc_dpoty = confusion_matrix(y_test_dpoty, y_pred_svc_dpoty)
logging.info(f'Support Vector DPOY Accuracy: {accuracy_svc_dpoty}')
logging.info(f'Support Vector DPOY Confusion Matrix: \n{conf_matrix_svc_dpoty}')

# Save the trained model
joblib.dump(svc_dpoty, 'svc_dpoty_model.pkl')


## Train Points Leader Model

We will train a model to predict the points leader using `RandomForestRegressor`.


In [None]:
# Define the target variable for points leader
y_points_leader = data['PTS']



#### Random Forest Regressor


In [None]:
# Train the model
rf_points_leader = RandomForestRegressor(n_estimators=100, random_state=42)
rf_points_leader.fit(X, y_points_leader)

# Make predictions
y_pred_rf_points_leader = rf_points_leader.predict(X)

# Evaluate the model
mse_rf_points_leader = mean_squared_error(y_points_leader, y_pred_rf_points_leader)
logging.info(f'Random Forest Points Leader MSE: {mse_rf_points_leader}')

# Save the trained model
joblib.dump(rf_points_leader, 'rf_points_leader_model.pkl')


#### Gradient Boosting Regressor


In [None]:
# Train the model
gb_points_leader = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_points_leader.fit(X, y_points_leader)

# Make predictions
y_pred_gb_points_leader = gb_points_leader.predict(X)

# Evaluate the model
mse_gb_points_leader = mean_squared_error(y_points_leader, y_pred_gb_points_leader)
logging.info(f'Gradient Boosting Points Leader MSE: {mse_gb_points_leader}')

# Save the trained model
joblib.dump(gb_points_leader, 'gb_points_leader_model.pkl')


#### Support Vector Regressor


In [None]:
# Train the model
svr_points_leader = SVR()
svr_points_leader.fit(X, y_points_leader)

# Make predictions
y_pred_svr_points_leader = svr_points_leader.predict(X)

# Evaluate the model
mse_svr_points_leader = mean_squared_error(y_points_leader, y_pred_svr_points_leader)
logging.info(f'Support Vector Points Leader MSE: {mse_svr_points_leader}')

# Save the trained model
joblib.dump(svr_points_leader, 'svr_points_leader_model.pkl')


## Train Assists Leader Model

We will train a model to predict the assists leader using `RandomForestRegressor`.


In [None]:
# Define the target variable for assists leader
y_assists_leader = data['AST']


#### Random Forest Regressor


In [None]:
# Train the model
rf_assists_leader = RandomForestRegressor(n_estimators=100, random_state=42)
rf_assists_leader.fit(X, y_assists_leader)

# Make predictions
y_pred_rf_assists_leader = rf_assists_leader.predict(X)

# Evaluate the model
mse_rf_assists_leader = mean_squared_error(y_assists_leader, y_pred_rf_assists_leader)
logging.info(f'Random Forest Assists Leader MSE: {mse_rf_assists_leader}')

# Save the trained model
joblib.dump(rf_assists_leader, 'rf_assists_leader_model.pkl')


#### Gradient Boosting Regressor


In [None]:
# Train the model
gb_assists_leader = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_assists_leader.fit(X, y_assists_leader)

# Make predictions
y_pred_gb_assists_leader = gb_assists_leader.predict(X)

# Evaluate the model
mse_gb_assists_leader = mean_squared_error(y_assists_leader, y_pred_gb_assists_leader)
logging.info(f'Gradient Boosting Assists Leader MSE: {mse_gb_assists_leader}')

# Save the trained model
joblib.dump(gb_assists_leader, 'gb_assists_leader_model.pkl')


#### Support Vector Regressor


In [None]:
# Train the model
svr_assists_leader = SVR()
svr_assists_leader.fit(X, y_assists_leader)

# Make predictions
y_pred_svr_assists_leader = svr_assists_leader.predict(X)

# Evaluate the model
mse_svr_assists_leader = mean_squared_error(y_assists_leader, y_pred_svr_assists_leader)
logging.info(f'Support Vector Assists Leader MSE: {mse_svr_assists_leader}')

# Save the trained model
joblib.dump(svr_assists_leader, 'svr_assists_leader_model.pkl')


## Train Rebounds Leader Model

We will train a model to predict the rebounds leader using `RandomForestRegressor`.


In [None]:
# Define the target variable for rebounds leader
y_rebounds_leader = data['REB']


#### Random Forest Regressor


In [None]:
# Train the model
rf_rebounds_leader = RandomForestRegressor(n_estimators=100, random_state=42)
rf_rebounds_leader.fit(X, y_rebounds_leader)

# Make predictions
y_pred_rf_rebounds_leader = rf_rebounds_leader.predict(X)

# Evaluate the model
mse_rf_rebounds_leader = mean_squared_error(y_rebounds_leader, y_pred_rf_rebounds_leader)
logging.info(f'Random Forest Rebounds Leader MSE: {mse_rf_rebounds_leader}')

# Save the trained model
joblib.dump(rf_rebounds_leader, 'rf_rebounds_leader_model.pkl')


#### Gradient Boosting Regressor


In [None]:
# Train the model
gb_rebounds_leader = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_rebounds_leader.fit(X, y_rebounds_leader)

# Make predictions
y_pred_gb_rebounds_leader = gb_rebounds_leader.predict(X)

# Evaluate the model
mse_gb_rebounds_leader = mean_squared_error(y_rebounds_leader, y_pred_gb_rebounds_leader)
logging.info(f'Gradient Boosting Rebounds Leader MSE: {mse_gb_rebounds_leader}')

# Save the trained model
joblib.dump(gb_rebounds_leader, 'gb_rebounds_leader_model.pkl')


#### Support Vector Regressor


In [None]:
# Train the model
svr_rebounds_leader = SVR()
svr_rebounds_leader.fit(X, y_rebounds_leader)

# Make predictions
y_pred_svr_rebounds_leader = svr_rebounds_leader.predict(X)

# Evaluate the model
mse_svr_rebounds_leader = mean_squared_error(y_rebounds_leader, y_pred_svr_rebounds_leader)
logging.info(f'Support Vector Rebounds Leader MSE: {mse_svr_rebounds_leader}')

# Save the trained model
joblib.dump(svr_rebounds_leader, 'svr_rebounds_leader_model.pkl')
