# Machine Learning - Predicting NBA positions

Open this notebook in [Callysto](https://hub.callysto.ca/jupyter/hub/user-redirect/git-pull?repo=https://github.com/pbeens/Data-Dunkers&branch=main&subPath=ArtificialIntelligence/predicting-nba.ipynb&depth=1) | [Colab](https://githubtocolab.com/pbeens/Data-Dunkers/blob/main/ArtificialIntelligence/predicting-nba.ipynb).

# Introduction

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV

In [None]:
nba_player_stats_url = 'https://raw.githubusercontent.com/Data-Dunkers/data-dunkers-modules/main/data-dunkers/Data/20232024nbaplayerstatsreg.csv'

nba_player_stats = pd.read_csv(nba_player_stats_url, delimiter=';', encoding='latin1')
nba_player_stats = nba_player_stats.iloc[::-1]
display(nba_player_stats)

In [None]:
nba_player_stats.columns

In [None]:
check_for_duplicate = 'Precious Achiuwa'
results = nba_player_stats.loc[nba_player_stats["Player"] == check_for_duplicate]
results

In [None]:
nba_player_stats = nba_player_stats.loc[nba_player_stats.groupby('Player')['PTS'].idxmax()]

nba_player_stats = nba_player_stats.reset_index(drop=True)
display(nba_player_stats)

In [None]:
check_for_duplicate = 'Precious Achiuwa'
results = nba_player_stats.loc[nba_player_stats["Player"] == check_for_duplicate]
results

In [None]:
nba_player_stats = nba_player_stats.drop(nba_player_stats[nba_player_stats['PTS'] <= 10].index)
nba_player_stats.reset_index(drop=True)

In [None]:
check_for_over_10 = 'Precious Achiuwa'
results = nba_player_stats.loc[nba_player_stats["Player"] == check_for_over_10]
results

In [None]:
position_mapping = {'PG': 'G', 'SG': 'G', 'PF': 'F', 'SF': 'F'}

nba_player_stats['Pos'] = nba_player_stats['Pos'].map(position_mapping).fillna(nba_player_stats['Pos'])
nba_player_stats = nba_player_stats.reset_index(drop=True)
display(nba_player_stats)

In [None]:
features = ['FG%', '3P', '3PA', '3P%', '2P%', 'FT%',
            'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']
target = 'Pos'

X = nba_player_stats[features]
y = nba_player_stats[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:}")

print(classification_report(y_test, y_pred))

In [None]:
param_grid = {'n_estimators': [50, 100, 200],'max_depth': [None, 10, 20, 30],'min_samples_split': [2, 5, 10],'min_samples_leaf': [1, 2, 4]}

model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Best model accuracy: {accuracy:}")

In [None]:
encoder = LabelEncoder()
y = encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

input_shape = X_train.shape[1]
model = tf.keras.Sequential([tf.keras.layers.Input(shape=(input_shape,)), tf.keras.layers.Dense(64, activation='relu'),tf.keras.layers.Dense(32, activation='relu'),tf.keras.layers.Dense(len(encoder.classes_), activation='softmax')  ])

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

temp, accuracy = model.evaluate(X_test, y_test)
print(f"Accuracy of the neural network: {accuracy:}")