# Artificial Neural Network (ANN)
By: Alexander Peltier, Parker Spaan and Matthew Powers

## Data Preprocessing

In [25]:
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the data from CSV file
data_unclean = pd.read_csv('nba_stats.csv')

# Now filter the data for the seasons from 2016-17 to 2021-22
filtered_data = data_unclean[data_unclean['season'].between('2016-17', '2021-22')]

# Get 100 random samples from the filtered data
data = filtered_data.sample(100)

## Extract required columns

In [26]:
players = data.iloc[:, 1]  # Player names
stats = data.iloc[:, 12:15]  # Player stats (points, rebounds, assists)

## Define "Optimal Team"

#### We have defined our optimal team as the top 5 players of this player_values column
#### The ANN will be predicting these player_values and then we will take the top 5

#####           Note: we added this column to give the ANN somehting to predict that we could evaluate

##### We are using the players stats of points, rebounds, and assists for this metric and stating that points are the most valuable at a weight of 0.5, followed by rebounds at 0.3 and then assists at 0.2

#### The goal is to have the ANN derive these values and the dot product of them

In [27]:
# Define player value as a weighted sum of their stats
weights = [0.5, 0.3, 0.2]
player_values = stats.dot(weights)

## Build a MLP

In [28]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(stats, player_values, test_size=0.2, random_state=42)

# Scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the MLP
mlp = MLPRegressor(hidden_layer_sizes=(100, 50, 25), max_iter=1000, random_state=42)
mlp.fit(X_train, y_train)

## Get The Results

In [29]:
# Predict player values using the trained model
predicted_values = mlp.predict(scaler.transform(stats))

print("#############################################")
# Rank the players based on predicted values and select the top 5
top_players_indices_predicted = predicted_values.argsort()[-5:][::-1]
top_players_predicted = players.iloc[top_players_indices_predicted]

print("Optimal Team Predicted:")
print(top_players_predicted)

print("#############################################")

# Rank the players based on actual values and select the top 5
top_players_indices_actual = player_values.argsort().iloc[-5:][::-1]
top_players_actual = players.iloc[top_players_indices_actual]

print("Optimal Team Actual:")
print(top_players_actual)
print("#############################################")

#############################################
Optimal Team Predicted:
12253         Nikola Jokic
10005         LeBron James
10284         Devin Booker
9662         DeMar DeRozan
12011    Russell Westbrook
Name: player_name, dtype: object
#############################################
Optimal Team Actual:
12253         Nikola Jokic
10005         LeBron James
10284         Devin Booker
9662         DeMar DeRozan
12011    Russell Westbrook
Name: player_name, dtype: object
#############################################


##### In Summary, the optimal predicted team based on the ANN player values is correct 

##### The reason we know this is that the players true values also agreed wth this conclusion as shown above when displaying the predicted and actual optimal teams