### 1) Importing Dependencies

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
from sqlalchemy import create_engine
import psycopg2
from config import db_password2
from path import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

### 2) Connecting to the Google Cloud Services Database

In [2]:
# Importing dataframe from SQL (test)
db_string = f"postgres://postgres:{db_password2}@34.94.69.49/NBA_Database"
conn = create_engine(db_string)

### 3) Team Abbreviations Datafile
###### NOTE: The Team Abbreviation csv file was manually exported to postgreSQL

In [91]:
# Importing dataframe from SQL to verify the data can be taken in.
team_abrv = pd.read_sql("team_abbreviations", con = conn)

In [92]:
# Inspecting columns
team_abrv.columns

Index(['team', 'franchise'], dtype='object')

In [93]:
# Reading the dataframe
team_abrv.head(10)

Unnamed: 0,team,franchise
0,ATL,Atlanta Hawks
1,BRK,Brooklyn Nets
2,BOS,Boston Celtics
3,CHA,Charlotte Hornets
4,CHI,Chicago Bulls
5,CLE,Cleveland Cavaliers
6,DAL,Dallas Mavericks
7,DEN,Denver Nuggets
8,DET,Detroit Pistons
9,GSW,Golden State Warriors


### 4) Individual Player Stats Datafile

In [76]:
# Read in dataframe
ind_plr_stats = pd.read_csv("./Resources/individual_player_stats2.csv", encoding='ISO-8859-1')

In [77]:
# Inspecting columns
ind_plr_stats.columns

Index(['player', 'pos', 'age', 'team', 'game', 'gamestarted', 'minutesplayed',
       'fieldgoalmade', 'fieldgoalattempt', 'fieldgoalpercentage',
       'threepointmade', 'Threepointattempt', 'Threepointpercentage',
       'twopointmade', 'twopointattempt', 'twopointpercentage',
       'efficencyfgpercentage', 'freethrowsmade', 'freethrowattempt',
       'freethrowpercentage', 'offensiverebound', 'defensiverebound',
       'totalrebound', 'assist', 'steal', 'block', 'turnover', 'personalfoul',
       'points'],
      dtype='object')

In [78]:
# Reading the dataframe
ind_plr_stats.head(10)

Unnamed: 0,player,pos,age,team,game,gamestarted,minutesplayed,fieldgoalmade,fieldgoalattempt,fieldgoalpercentage,...,freethrowpercentage,offensiverebound,defensiverebound,totalrebound,assist,steal,block,turnover,personalfoul,points
0,Steven Adams,C,27,NOP,27,27,760,94,156,0.603,...,0.468,116,123,239,58,26,15,46,51,217
1,Bam Adebayo,C,23,MIA,27,27,908,198,347,0.571,...,0.845,53,199,252,149,25,27,82,69,534
2,LaMarcus Aldridge,C,35,SAS,18,18,480,107,225,0.476,...,0.762,15,63,78,35,7,16,16,27,254
3,Nickeil Alexander-Walker,SG,22,NOP,23,3,441,77,188,0.41,...,0.781,5,56,61,46,25,8,30,40,203
4,Grayson Allen,SG,25,MEM,19,8,454,60,140,0.429,...,0.892,7,48,55,39,19,3,20,24,197
5,Jarrett Allen,C,22,TOT,28,10,734,122,190,0.642,...,0.758,82,170,252,45,13,46,43,44,345
6,Jarrett Allen,C,22,CLE,16,5,414,78,125,0.624,...,0.761,44,83,127,25,6,27,21,23,211
7,Kyle Anderson,PF,27,MEM,24,24,675,120,257,0.467,...,0.781,22,128,150,92,27,18,34,43,330
8,Giannis Antetokounmpo,PF,26,MIL,28,28,944,287,516,0.556,...,0.637,48,272,320,165,36,36,104,89,784
9,Thanasis Antetokounmpo,SF,28,MIL,19,0,157,19,32,0.594,...,0.556,16,18,34,15,5,3,15,27,45


In [79]:
# Changing column names to lowercase
ind_plr_stats.columns = [i.lower() for i in ind_plr_stats.columns]

In [80]:
# Exporting dataframe to SQL 
ind_plr_stats.to_sql("individual_player_stats", con = conn, index=False, if_exists="append")

In [26]:
# Importing dataframe from SQL to verify the data can be taken in.
ind_plr_stats = pd.read_sql("individual_player_stats", con = conn)

In [27]:
# Reading the dataframe
ind_plr_stats.head(10)

Unnamed: 0,player,pos,age,team,game,gamestarted,minutesplayed,fieldgoalmade,fieldgoalattempt,fieldgoalpercentage,...,freethrowpercentage,offensiverebound,defensiverebound,totalrebound,assist,steal,block,turnover,personalfoul,points
0,Steven Adams,C,27,NOP,27,27,760,94,156,1,...,0,116,123,239,58,26,15,46,51,217
1,Bam Adebayo,C,23,MIA,27,27,908,198,347,1,...,1,53,199,252,149,25,27,82,69,534
2,LaMarcus Aldridge,C,35,SAS,18,18,480,107,225,0,...,1,15,63,78,35,7,16,16,27,254
3,Nickeil Alexander-Walker,SG,22,NOP,23,3,441,77,188,0,...,1,5,56,61,46,25,8,30,40,203
4,Grayson Allen,SG,25,MEM,19,8,454,60,140,0,...,1,7,48,55,39,19,3,20,24,197
5,Jarrett Allen,C,22,TOT,28,10,734,122,190,1,...,1,82,170,252,45,13,46,43,44,345
6,Jarrett Allen,C,22,CLE,16,5,414,78,125,1,...,1,44,83,127,25,6,27,21,23,211
7,Kyle Anderson,PF,27,MEM,24,24,675,120,257,0,...,1,22,128,150,92,27,18,34,43,330
8,Giannis Antetokounmpo,PF,26,MIL,28,28,944,287,516,1,...,1,48,272,320,165,36,36,104,89,784
9,Thanasis Antetokounmpo,SF,28,MIL,19,0,157,19,32,1,...,1,16,18,34,15,5,3,15,27,45


In [28]:
# Verifying the data types
ind_plr_stats.dtypes

player                   object
pos                      object
age                       int64
team                     object
game                      int64
gamestarted               int64
minutesplayed             int64
fieldgoalmade             int64
fieldgoalattempt          int64
fieldgoalpercentage       int64
threepointmade            int64
threepointattempt         int64
threepointpercentage      int64
twopointmade              int64
twopointattempt           int64
twopointpercentage        int64
efficencyfgpercentage     int64
freethrowsmade            int64
freethrowattempt          int64
freethrowpercentage       int64
offensiverebound          int64
defensiverebound          int64
totalrebound              int64
assist                    int64
steal                     int64
block                     int64
turnover                  int64
personalfoul              int64
points                    int64
dtype: object

### 5) NBA Season Stats Datafile

In [53]:
# Read in dataframe
season_stats = pd.read_csv("./Resources/nba_season_stats2.csv", encoding='ISO-8859-1')
season_stats.head()

Unnamed: 0,Franchise,Conference,Win,Lost,WinPercentage,GamesBehind,ConferenceRecord,DivisionRecord,HomeRecord,RoadRecord,OverTimeRecord,Last10Record,Streak
0,Atlanta Hawks,Eastern,29,53,0.354,31.0,16-36,6-10,17-24,12-29,3-1,5-5,L 3
1,Atlanta Hawks,Eastern,20,47,0.299,33.0,11-32,6-7,14-20,6-27,2-4,4-6,L 1
2,Atlanta Hawks,Eastern,24,58,0.293,35.0,12-40,5-11,16-25,8-33,0-1,3-7,L 1
3,Atlanta Hawks,Eastern,48,34,0.585,9.0,29-23,8-8,27-14,21-20,0-5,6-4,L 2
4,Atlanta Hawks,Eastern,43,39,0.524,10.0,30-22,6-10,23-18,20-21,5-0,6-4,L 1


In [54]:
# Inspecting columns.
season_stats.columns

Index(['Franchise', 'Conference', 'Win', 'Lost', 'WinPercentage',
       'GamesBehind', 'ConferenceRecord', 'DivisionRecord', 'HomeRecord',
       'RoadRecord', 'OverTimeRecord', 'Last10Record', 'Streak'],
      dtype='object')

#### Cleaning the Dataset

In [81]:
# Change the column name
season_stats.rename(columns={season_stats.columns[0]:"Franchise"}, inplace=True)

In [82]:
season_stats.head(10)

Unnamed: 0,Franchise,conference,win,lost,winpercentage,gamesbehind,conferencerecord,divisionrecord,homerecord,roadrecord,overtimerecord,last10record,streak
0,Atlanta Hawks,Eastern,29,53,0.354,31.0,16-36,6-10,17-24,12-29,3-1,5-5,L 3
1,Atlanta Hawks,Eastern,20,47,0.299,33.0,11-32,6-7,14-20,6-27,2-4,4-6,L 1
2,Atlanta Hawks,Eastern,24,58,0.293,35.0,12-40,5-11,16-25,8-33,0-1,3-7,L 1
3,Atlanta Hawks,Eastern,48,34,0.585,9.0,29-23,8-8,27-14,21-20,0-5,6-4,L 2
4,Atlanta Hawks,Eastern,43,39,0.524,10.0,30-22,6-10,23-18,20-21,5-0,6-4,L 1
5,Atlanta Hawks,Eastern,12,16,0.429,6.5,8-9,1-2,6-9,6-7,1-1,3-7,W 1
6,Boston Celtics,Eastern,53,29,0.646,0.0,36-16,11-5,30-11,23-18,2-1,7-3,W 3
7,Boston Celtics,Eastern,55,27,0.671,4.0,33-19,12-4,27-14,28-13,2-2,6-4,W 1
8,Boston Celtics,Eastern,48,24,0.667,7.5,30-13,9-6,26-10,22-14,2-3,6-4,L 1
9,Boston Celtics,Eastern,49,33,0.598,11.0,35-17,10-6,28-13,21-20,4-0,6-4,W 1


In [83]:
# Changing column names to lowercase
season_stats.columns = [i.lower() for i in season_stats.columns]

In [84]:
# Exporting dataframe to SQL 
season_stats.to_sql("nba_season_stats", con = conn, index=False, if_exists="append")

In [85]:
# Importing dataframe from SQL to verify the data can be taken in.
season_stats = pd.read_sql("nba_season_stats", con = conn)

In [86]:
# Reading the dataframe
season_stats.head(10)

Unnamed: 0,franchise,conference,win,lost,winpercentage,gamesbehind,conferencerecord,divisionrecord,homerecord,roadrecord,overtimerecord,last10record,streak
0,Atlanta Hawks,Eastern,29,53,0.354,31.0,16-36,6-10,17-24,12-29,3-1,5-5,L 3
1,Atlanta Hawks,Eastern,20,47,0.299,33.0,11-32,6-7,14-20,6-27,2-4,4-6,L 1
2,Atlanta Hawks,Eastern,24,58,0.293,35.0,12-40,5-11,16-25,8-33,0-1,3-7,L 1
3,Atlanta Hawks,Eastern,48,34,0.585,9.0,29-23,8-8,27-14,21-20,0-5,6-4,L 2
4,Atlanta Hawks,Eastern,43,39,0.524,10.0,30-22,6-10,23-18,20-21,5-0,6-4,L 1
5,Atlanta Hawks,Eastern,12,16,0.429,6.5,8-9,1-2,6-9,6-7,1-1,3-7,W 1
6,Boston Celtics,Eastern,53,29,0.646,0.0,36-16,11-5,30-11,23-18,2-1,7-3,W 3
7,Boston Celtics,Eastern,55,27,0.671,4.0,33-19,12-4,27-14,28-13,2-2,6-4,W 1
8,Boston Celtics,Eastern,48,24,0.667,7.5,30-13,9-6,26-10,22-14,2-3,6-4,L 1
9,Boston Celtics,Eastern,49,33,0.598,11.0,35-17,10-6,28-13,21-20,4-0,6-4,W 1
