### Machine Learning Deployment

In [21]:
# imports
import psycopg2
import pandas as pd
import numpy as np
import pickle
from sqlalchemy import create_engine

import config as c

# import for multiple output per cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Load data from database

In [22]:
# connection to database
connection = psycopg2.connect(
    host = c.host,
    port = c.port,
    user = c.user,
    password = c.password,
    database = c.database
    )
cursor=connection.cursor()

In [28]:
data_sql = """
SELECT *
FROM comprehensive_dataset;
"""

#  load in tables as dataframes
data_df = pd.read_sql(data_sql, con=connection)
data_df.head()

Unnamed: 0,GUID,Name,HallOfFameStatus,HallofFameClass,YearDrafted,TO_YEAR,Years_Played,HOF_Elgibility_Year,Pick,Team,...,OREB,DREB,REB,AST,STL,BLK,TOV,EFF,AGE_ROOKIE_SEASON,numberRound
0,MIKENILES1980,Mike Niles,Not Inducted,,1980,1980,1,1985,,,...,0.6,0.7,1.3,0.3,0.2,0.0,0.6,1.4,,
1,JAREDSULLINGER2012,Jared Sullinger,Not Inducted,,2012,2016,5,2021,21.0,BOS,...,2.0,3.8,5.9,0.8,0.5,0.5,0.6,10.1,20.0,1.0
2,ARNETTMOULTRIE2012,Arnett Moultrie,Not Inducted,,2012,2013,2,2018,27.0,MIA,...,1.6,1.5,3.1,0.2,0.4,0.2,0.4,5.8,21.0,1.0
3,PERRYJONESIII2012,Perry Jones III,Not Inducted,,2012,2014,3,2019,,,...,0.3,1.3,1.6,0.3,0.1,0.2,0.4,2.4,21.0,1.0
4,FESTUSEZELI2012,Festus Ezeli,Not Inducted,,2012,2016,5,2021,30.0,GSW,...,1.8,2.2,4.0,0.3,0.3,0.9,0.8,5.5,22.0,1.0


### Input Prep

In [29]:
# drop features
data_df.drop(['GUID', 'Name', 'HallOfFameStatus', 'Team', 'College', # dropping object columns  
              
              'HallofFameClass', 'YearDrafted', 'TO_YEAR', 'Years_Played', 
              'HOF_Elgibility_Year', 'GP', 'MIN', 'AGE_ROOKIE_SEASON' # non performance stats
                ], axis=1, inplace=True)

# fill NaNs with 0
data_df = data_df.fillna(0)

data_df.head()
print(data_df.shape)

Unnamed: 0,Pick,PTS,FGM,FGA,FG%,3P_Made,3PA,3P%,FTM,FTA,FT%,OREB,DREB,REB,AST,STL,BLK,TOV,EFF,numberRound
0,0.0,2.6,1.1,3.1,34.8,0.0,0.1,50.0,0.4,0.8,45.9,0.6,0.7,1.3,0.3,0.2,0.0,0.6,1.4,0.0
1,21.0,6.0,2.5,5.0,49.3,0.0,0.1,20.0,1.0,1.4,74.6,2.0,3.8,5.9,0.8,0.5,0.5,0.6,10.1,1.0
2,27.0,3.7,1.7,2.9,58.2,0.0,0.0,0.0,0.4,0.6,64.3,1.6,1.5,3.1,0.2,0.4,0.2,0.4,5.8,1.0
3,0.0,2.3,1.0,2.6,39.4,0.0,0.1,0.0,0.3,0.4,66.7,0.3,1.3,1.6,0.3,0.1,0.2,0.4,2.4,1.0
4,30.0,2.4,0.9,2.2,43.8,0.0,0.0,0.0,0.6,1.0,53.1,1.8,2.2,4.0,0.3,0.3,0.9,0.8,5.5,1.0


(1217, 20)


## Deploy Model

In [16]:
# open file and load model
with open('ml_model.sav', 'rb') as machine_learning:
    model = pickle.load(machine_learning)

In [17]:
# generate predictions on ml_input_df
predictions = model.predict(ml_input_df)
ml_output = np.array(predictions)

In [18]:
data_df['ML_Prediction'] = ml_output.tolist()
data_df.head()

Unnamed: 0,GUID,Name,HallOfFameStatus,HallofFameClass,YearDrafted,TO_YEAR,Years_Played,HOF_Elgibility_Year,Pick,Team,...,FT%,OREB,DREB,REB,AST,STL,BLK,TOV,EFF,ML_Prediction
0,MIKENILES1980,Mike Niles,Not Inducted,,1980,1980,1,1985,,,...,45.9,0.6,0.7,1.3,0.3,0.2,0.0,0.6,1.4,0
1,WAYNEROBINSON1980,Wayne Robinson,Not Inducted,,1980,1980,1,1985,31.0,LAL,...,72.9,1.4,2.2,3.6,1.4,0.6,0.3,1.8,7.8,0
2,BILLYREID1980,Billy Reid,Not Inducted,,1980,1980,1,1985,182.0,GSW,...,56.4,0.5,0.6,1.0,1.2,0.6,0.1,1.3,2.8,0
3,ALEXBRADLEY1981,Alex Bradley,Not Inducted,,1981,1981,1,1986,86.0,NYK,...,60.4,0.8,0.9,1.7,0.3,0.3,0.1,0.7,3.4,0
4,GARRYWITTS1981,Garry Witts,Not Inducted,,1981,1981,1,1986,103.0,WSB,...,82.5,0.6,0.7,1.3,0.8,0.4,0.1,0.8,3.8,0


## Write to Databse as a Table

In [20]:
# create engine to connect to db
engine = create_engine(c.engine)

# load df into db
data_df.to_sql('HOF_Predictions', engine)

ValueError: Table 'HOF_Predictions' already exists.