In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error


In [2]:
df_2022 = pd.read_csv('Resources/model_2022-2023.csv')

In [3]:
df_2022.head()

Unnamed: 0.1,Unnamed: 0,minutes/game,I_F_xOnGoal,I_F_xGoals,I_F_xRebounds,I_F_shotAttempts,I_F_points,I_F_rebounds,I_F_reboundGoals,I_F_takeaways,...,I_F_lowDangerShots,I_F_mediumDangerShots,I_F_highDangerShots,I_F_lowDangerxGoals,I_F_mediumDangerxGoals,I_F_highDangerxGoals,I_F_lowDangerGoals,I_F_mediumDangerGoals,I_F_highDangerGoals,I_F_unblockedShotAttempts
0,0,9.32,49.47,6.7,3.8,79,15,5,1,15,...,33,29,7,1.16,3.58,1.96,0,4,1,69
1,1,15.01,37.1,1.36,1.83,78,14,0,0,27,...,50,2,0,1.09,0.28,0.0,1,1,0,52
2,2,13.04,69.79,2.44,3.85,140,5,4,0,9,...,97,5,0,1.98,0.46,0.0,2,0,0,102
3,3,14.02,32.17,1.21,1.81,79,8,5,0,8,...,46,1,0,1.13,0.08,0.0,1,1,0,47
4,4,16.0,39.69,6.81,2.85,65,16,5,1,6,...,29,14,10,1.07,1.73,4.02,1,2,4,53


In [4]:
df_2022.drop('Unnamed: 0', axis='columns',inplace=True)
df_2022.head()

Unnamed: 0,minutes/game,I_F_xOnGoal,I_F_xGoals,I_F_xRebounds,I_F_shotAttempts,I_F_points,I_F_rebounds,I_F_reboundGoals,I_F_takeaways,I_F_giveaways,I_F_lowDangerShots,I_F_mediumDangerShots,I_F_highDangerShots,I_F_lowDangerxGoals,I_F_mediumDangerxGoals,I_F_highDangerxGoals,I_F_lowDangerGoals,I_F_mediumDangerGoals,I_F_highDangerGoals,I_F_unblockedShotAttempts
0,9.32,49.47,6.7,3.8,79,15,5,1,15,16,33,29,7,1.16,3.58,1.96,0,4,1,69
1,15.01,37.1,1.36,1.83,78,14,0,0,27,20,50,2,0,1.09,0.28,0.0,1,1,0,52
2,13.04,69.79,2.44,3.85,140,5,4,0,9,18,97,5,0,1.98,0.46,0.0,2,0,0,102
3,14.02,32.17,1.21,1.81,79,8,5,0,8,12,46,1,0,1.13,0.08,0.0,1,1,0,47
4,16.0,39.69,6.81,2.85,65,16,5,1,6,12,29,14,10,1.07,1.73,4.02,1,2,4,53


# Preparing the Data

In [5]:
#Splitting the data
x = df_2022.copy()
x.drop('I_F_points', axis=1, inplace=True)
X = x.values
y = df_2022['I_F_points']

In [6]:
#further splitting into test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [7]:
#creating the scaler variable
scaler = StandardScaler()

In [8]:
X_scaler = scaler.fit(X_train)

In [9]:
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Linear Regression Model

In [10]:
linear_model = LinearRegression()

In [11]:
# Fit the data into the model
linear_model.fit(X_train_scaled, y_train)

LinearRegression()

In [12]:
# Display the model's best fit line formula
print(f"Model's formula: y = {linear_model.intercept_} + {linear_model.coef_[0]}X")

Model's formula: y = 28.337931034482736 + 1.8408743574543023X


In [13]:
#Make predictions with the model using X_test_scaled
predicted_y_values = linear_model.predict(X_test_scaled)


In [15]:
# Compute metrics for the linear regression model: score, r2, mse, rmse, std
score = linear_model.score(X_test_scaled, y_test, sample_weight=None)
r2 = r2_score(y_test, predicted_y_values)
mse = mean_squared_error(y_test, predicted_y_values)
mae = mean_absolute_error(y_test, predicted_y_values)

rmse = np.sqrt(mse)
std = np.std(y_test)

# Print relevant metrics.
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The mean absolute error is {mae}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")

The r2 is 0.9353594536511055.
The mean squared error is 43.0821443878969.
The mean absolute error is 4.733909576384172.
The root mean squared error is 6.5636989866916435.
The standard deviation is 25.81642765917697.


# Random Forest

In [16]:
# Create a random forest classifier
rf_model = RandomForestRegressor(n_estimators=1000, random_state=42)

In [17]:
# Fitting the model
rf_model_fit = rf_model.fit(X_train_scaled, y_train)

In [18]:
# Making predictions using the testing data
predictions = rf_model_fit.predict(X_test_scaled)

In [19]:
# Compute metrics for the linear regression model: score, r2, mse, rmse, std
score = rf_model_fit.score(X_test_scaled, y_test, sample_weight=None)
r2 = r2_score(y_test, predicted_y_values)
mse = mean_squared_error(y_test, predicted_y_values)
mae = mean_absolute_error(y_test, predicted_y_values)

rmse = np.sqrt(mse)
std = np.std(y_test)

# Print relevant metrics.
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The mean absolute error is {mae}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")

The r2 is 0.9353594536511055.
The mean squared error is 43.0821443878969.
The mean absolute error is 4.733909576384172.
The root mean squared error is 6.5636989866916435.
The standard deviation is 25.81642765917697.


# Neural Network

In [20]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=5, activation="selu", input_dim=number_input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=5, activation="selu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=5, activation="selu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="selu"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 5)                 100       
                                                                 
 dense_1 (Dense)             (None, 5)                 30        
                                                                 
 dense_2 (Dense)             (None, 5)                 30        
                                                                 
 dense_3 (Dense)             (None, 1)                 6         
                                                                 
Total params: 166
Trainable params: 166
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Compile the model
nn.compile(loss="MeanSquaredError", optimizer="adam", metrics=["MeanAbsoluteError"])

In [22]:
# Train the model
# fit_model = nn.fit(X_train_scaled, y_train, epochs=525)
fit_model = nn.fit(X_train, y_train, epochs=525)

Epoch 1/525
Epoch 2/525
Epoch 3/525
Epoch 4/525
Epoch 5/525
Epoch 6/525
Epoch 7/525
Epoch 8/525
Epoch 9/525
Epoch 10/525
Epoch 11/525
Epoch 12/525
Epoch 13/525
Epoch 14/525
Epoch 15/525
Epoch 16/525
Epoch 17/525
Epoch 18/525
Epoch 19/525
Epoch 20/525
Epoch 21/525
Epoch 22/525
Epoch 23/525
Epoch 24/525
Epoch 25/525
Epoch 26/525
Epoch 27/525
Epoch 28/525
Epoch 29/525
Epoch 30/525
Epoch 31/525
Epoch 32/525
Epoch 33/525
Epoch 34/525
Epoch 35/525
Epoch 36/525
Epoch 37/525
Epoch 38/525
Epoch 39/525
Epoch 40/525
Epoch 41/525
Epoch 42/525
Epoch 43/525
Epoch 44/525
Epoch 45/525
Epoch 46/525
Epoch 47/525
Epoch 48/525
Epoch 49/525
Epoch 50/525
Epoch 51/525
Epoch 52/525
Epoch 53/525
Epoch 54/525
Epoch 55/525
Epoch 56/525
Epoch 57/525
Epoch 58/525
Epoch 59/525
Epoch 60/525
Epoch 61/525
Epoch 62/525
Epoch 63/525
Epoch 64/525
Epoch 65/525
Epoch 66/525
Epoch 67/525
Epoch 68/525
Epoch 69/525
Epoch 70/525
Epoch 71/525
Epoch 72/525
Epoch 73/525
Epoch 74/525
Epoch 75/525
Epoch 76/525
Epoch 77/525
Epoch 78

In [23]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

7/7 - 0s - loss: 49.2674 - mean_absolute_error: 4.6724 - 267ms/epoch - 38ms/step
Loss: 49.26742172241211, Accuracy: 4.672400951385498
