In [30]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
from sqlalchemy import create_engine
import psycopg2
from config import db_password
from path import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

### Establishing connection to the database

In [31]:
# Establishing connection to the database (test)

db_string = f"postgres://postgres:{db_password}@127.0.0.1:5432/nba_data"
    
conn = create_engine(db_string)

In [32]:
# Read in dataframe
df = pd.read_csv("individual_player_stats.csv", encoding='ISO-8859-1')
df.head()

#ETL -- extract transform load
# df.columns = [i.replace("%", "") for i in df.columns]
# df.drop("Unnamed: 0", inplace =True, axis =1)


Unnamed: 0,player,Pos,Age,Team,Game,GameStarted,MinutesPlayed,FieldGoalMade,FieldGoalAttempt,FieldGoalPercentage,...,FreeThrowPercentage,OffensiveRebound,DeffensiveRebound,TotalRebound,Assist,SteaL,Block,Turnover,PersonalFoul,Points
0,Precious Achiuwa,PF,21,MIA,29,2,420,72,126,0.571,...,0.561,37,79,116,18,12,17,30,54,181
1,Jaylen Adams,PG,24,MIL,7,0,18,1,8,0.125,...,,0,3,3,2,0,0,0,1,2
2,Steven Adams,C,27,NOP,27,27,760,94,156,0.603,...,0.468,116,123,239,58,26,15,46,51,217
3,Bam Adebayo,C,23,MIA,27,27,908,198,347,0.571,...,0.845,53,199,252,149,25,27,82,69,534
4,LaMarcus Aldridge,C,35,SAS,18,18,480,107,225,0.476,...,0.762,15,63,78,35,7,16,16,27,254


### Exporting data to database

In [33]:
# Exporting dataframe to SQL (test)

#df = pd.read_csv("basketball_data.csv")

df.to_sql("individual_player", con = conn, index=False, if_exists="replace")

### Importing data from database

In [34]:
# Importing dataframe from SQL (test)

from_sql_df = pd.read_sql("individual_player", con = conn)

In [35]:
# Reading the dataframe
from_sql_df.head(10)

Unnamed: 0,player,Pos,Age,Team,Game,GameStarted,MinutesPlayed,FieldGoalMade,FieldGoalAttempt,FieldGoalPercentage,...,FreeThrowPercentage,OffensiveRebound,DeffensiveRebound,TotalRebound,Assist,SteaL,Block,Turnover,PersonalFoul,Points
0,Precious Achiuwa,PF,21,MIA,29,2,420,72,126,0.571,...,0.561,37,79,116,18,12,17,30,54,181
1,Jaylen Adams,PG,24,MIL,7,0,18,1,8,0.125,...,,0,3,3,2,0,0,0,1,2
2,Steven Adams,C,27,NOP,27,27,760,94,156,0.603,...,0.468,116,123,239,58,26,15,46,51,217
3,Bam Adebayo,C,23,MIA,27,27,908,198,347,0.571,...,0.845,53,199,252,149,25,27,82,69,534
4,LaMarcus Aldridge,C,35,SAS,18,18,480,107,225,0.476,...,0.762,15,63,78,35,7,16,16,27,254
5,Ty-Shon Alexander,SG,22,PHO,3,0,8,0,3,0.0,...,,0,1,1,1,0,0,0,1,0
6,Nickeil Alexander-Walker,SG,22,NOP,23,3,441,77,188,0.41,...,0.781,5,56,61,46,25,8,30,40,203
7,Grayson Allen,SG,25,MEM,19,8,454,60,140,0.429,...,0.892,7,48,55,39,19,3,20,24,197
8,Jarrett Allen,C,22,TOT,28,10,734,122,190,0.642,...,0.758,82,170,252,45,13,46,43,44,345
9,Jarrett Allen,C,22,BRK,12,5,320,44,65,0.677,...,0.754,38,87,125,20,7,19,22,21,134


In [36]:
df.dtypes

player                    object
Pos                       object
Age                        int64
Team                      object
Game                       int64
GameStarted                int64
MinutesPlayed              int64
FieldGoalMade              int64
FieldGoalAttempt           int64
FieldGoalPercentage      float64
ThreePointMade             int64
ThreePA                    int64
ThreePointAttempt        float64
TwoPointMade               int64
TwoPointAttempt            int64
TwoPointPercentage       float64
efficencyFGPercentage    float64
FreeThrowsMade             int64
FreeThrowAttempt           int64
FreeThrowPercentage      float64
OffensiveRebound           int64
DeffensiveRebound          int64
TotalRebound               int64
Assist                     int64
SteaL                      int64
Block                      int64
Turnover                   int64
PersonalFoul               int64
Points                     int64
dtype: object

In [37]:
# Dropping noisy data
df = df.drop(["player", "Pos", "Age", "GameStarted", "MinutesPlayed", "Game"], axis=1)


df.head(10)

Unnamed: 0,Team,FieldGoalMade,FieldGoalAttempt,FieldGoalPercentage,ThreePointMade,ThreePA,ThreePointAttempt,TwoPointMade,TwoPointAttempt,TwoPointPercentage,...,FreeThrowPercentage,OffensiveRebound,DeffensiveRebound,TotalRebound,Assist,SteaL,Block,Turnover,PersonalFoul,Points
0,MIA,72,126,0.571,0,0,,72,126,0.571,...,0.561,37,79,116,18,12,17,30,54,181
1,MIL,1,8,0.125,0,2,0.0,1,6,0.167,...,,0,3,3,2,0,0,0,1,2
2,NOP,94,156,0.603,0,1,0.0,94,155,0.606,...,0.468,116,123,239,58,26,15,46,51,217
3,MIA,198,347,0.571,2,5,0.4,196,342,0.573,...,0.845,53,199,252,149,25,27,82,69,534
4,SAS,107,225,0.476,24,67,0.358,83,158,0.525,...,0.762,15,63,78,35,7,16,16,27,254
5,PHO,0,3,0.0,0,1,0.0,0,2,0.0,...,,0,1,1,1,0,0,0,1,0
6,NOP,77,188,0.41,24,87,0.276,53,101,0.525,...,0.781,5,56,61,46,25,8,30,40,203
7,MEM,60,140,0.429,44,101,0.436,16,39,0.41,...,0.892,7,48,55,39,19,3,20,24,197
8,TOT,122,190,0.642,1,4,0.25,121,186,0.651,...,0.758,82,170,252,45,13,46,43,44,345
9,BRK,44,65,0.677,0,0,,44,65,0.677,...,0.754,38,87,125,20,7,19,22,21,134


In [38]:
# New df 
new_df = df[["Team", "Points", "TotalRebound", "Assist", "Block", "SteaL", "Turnover", "OffensiveRebound", "DeffensiveRebound"]]

In [39]:
new_df.head(10)

Unnamed: 0,Team,Points,TotalRebound,Assist,Block,SteaL,Turnover,OffensiveRebound,DeffensiveRebound
0,MIA,181,116,18,17,12,30,37,79
1,MIL,2,3,2,0,0,0,0,3
2,NOP,217,239,58,15,26,46,116,123
3,MIA,534,252,149,27,25,82,53,199
4,SAS,254,78,35,16,7,16,15,63
5,PHO,0,1,1,0,0,0,0,1
6,NOP,203,61,46,8,25,30,5,56
7,MEM,197,55,39,3,19,20,7,48
8,TOT,345,252,45,46,13,43,82,170
9,BRK,134,125,20,19,7,22,38,87


In [40]:
df2 = new_df.groupby("Team")
df2.head(10)

Unnamed: 0,Team,Points,TotalRebound,Assist,Block,SteaL,Turnover,OffensiveRebound,DeffensiveRebound
0,MIA,181,116,18,17,12,30,37,79
1,MIL,2,3,2,0,0,0,0,3
2,NOP,217,239,58,15,26,46,116,123
3,MIA,534,252,149,27,25,82,53,199
4,SAS,254,78,35,16,7,16,15,63
...,...,...,...,...,...,...,...,...,...
404,TOT,269,43,80,7,23,37,7,36
425,BOS,222,47,103,9,31,30,7,40
426,TOT,32,8,11,1,3,5,2,6
439,BOS,594,161,108,9,29,56,13,148


In [None]:
# Read in dataframe
df = pd.read_csv("individual_player_stats.csv", encoding='ISO-8859-1')
df.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df2 = df.copy()
df2['Team'] = le.fit_transform(df2['Team'])
df2.head(10)

In [None]:
df2 = pd.get_dummies(df2, columns=["player"])
df2.head()

### Creating a Support-vector machine

In [None]:
y = df2["RK"]
X = df2.drop(columns="RK")

### Split the dataset into Training and Testing sets:

In [1]:
from sklearn.model_selection import train_test_split

# X_train, X_test, y_train, y_test = train_test_split(X,
#    y, random_state=1, stratify=y)

X_train, X_test, y_train, y_test = train_test_split(X,
    y, test_size=0.33, random_state=42)

NameError: name 'X' is not defined

In [None]:
from sklearn.svm import SVC
model = SVC(kernel='linear')

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
results = pd.DataFrame({
   "Prediction": y_pred,
   "Actual": y_test
}).reset_index(drop=True)
results.head()

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

In [None]:
# Define the features set.
X = df2.copy()
X = X.drop("RK", axis=1)
X.head()

In [None]:
# Define the target set.
y = df2["RK"].values
y[:5]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=5)

In [None]:
# Determine the shape of our training and testing sets.
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
# Splitting into Train and Test sets into an 80/20 split.
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, random_state=78, train_size=0.80)

In [None]:
# Determine the shape of our training and testing sets.
print(X_train2.shape)
print(X_test2.shape)
print(y_train2.shape)
print(y_test2.shape)

In [None]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Creating the decision tree classifier instance.
model = tree.DecisionTreeClassifier()
# Fitting the model.
model = model.fit(X_train_scaled, y_train)

In [None]:
# Making predictions using the testing data.
predictions = model.predict(X_test_scaled)

In [None]:
predictions

In [None]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

In [None]:
# Random Forest
# Define the features set.
X = df2.copy()
X = X.drop("RK", axis=1)
X.head()

In [None]:
# Define the target set.
y = df2["RK"].ravel()
y[:5]

In [None]:
# Splitting into Train and Test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)

In [None]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 

In [None]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [None]:
# Making predictions using the testing data.
predictions = rf_model.predict(X_test_scaled)

In [None]:
# Calculating the confusion matrix.
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df