In [1]:
# imports
import psycopg2
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

import config as c

# import for multiple output per cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Load data from database

In [2]:
# connection to database
connection = psycopg2.connect(
    host = c.host,
    port = c.port,
    user = c.user,
    password = c.password,
    database = c.database
    )
cursor=connection.cursor()

In [3]:
# SQL query
data_sql = """
SELECT *
FROM final_dataset;
"""

# load in tables as dataframes
data_df = pd.read_sql(data_sql, con=connection)

### Input Preperation

In [4]:
# drop coulmns/features
input_df = data_df.drop(['GUID',
                         'Name',
                         'HallofFameClass',
                         'YearDrafted',
                         'TO_YEAR',
                         'Years_Played',
                         'HOF_Elgibility_Year',
                         'Team',
                         'College',
                         'GP',
                         'MIN',
                         'idPlayer',
                         'BIRTHDATE',
                         'POSITION',
                         'AGE_ROOKIE_SEASON',
                         'Draft_Decade',
                         'numBallotsBeforeInduct',
                         'HallOfFameStatus'], 
                        axis=1)
input_df.head()
print(input_df.shape)

Unnamed: 0,Pick,PTS,FGM,FGA,FG%,3P_Made,3PA,3P%,FTM,FTA,FT%,OREB,DREB,REB,AST,STL,BLK,TOV,EFF,numberRound
0,21,6.3,2.8,5.2,53.3,0.0,0.1,0.0,0.8,1.3,62.5,1.7,3.5,5.2,0.4,0.5,0.5,1.0,9.0,1
1,26,5.6,1.9,6.0,32.3,1.1,3.6,30.1,0.7,0.8,86.1,0.5,1.6,2.0,0.5,0.5,0.3,0.5,4.3,1
2,32,7.9,2.8,7.0,39.6,0.7,2.4,28.7,1.6,2.1,75.2,1.0,2.2,3.2,1.2,0.7,1.1,1.7,7.6,2
3,34,5.4,1.9,5.5,35.5,0.6,2.2,26.2,0.9,1.2,75.0,0.6,1.9,2.5,0.9,0.6,0.3,1.0,4.8,2
4,60,3.7,1.5,3.3,44.9,0.0,0.3,13.3,0.6,1.1,57.4,0.9,2.0,2.9,0.3,0.2,0.4,0.4,4.8,2


(1217, 20)


### Deploy Model on Input Data

In [5]:
# import
import pickle

# open file and load model
with open('ml_model.sav', 'rb') as machine_learning:
    model = pickle.load(machine_learning)

In [6]:
# generate predictions from input_df
predictions = model.predict(input_df)
ml_output = np.array(predictions)

print(predictions)

[0 0 0 ... 0 0 0]


In [7]:
# added prediction as column to orignal dataset
data_df['ML_Prediction'] = ml_output.tolist()
data_df.head()

Unnamed: 0,GUID,Name,HallOfFameStatus,HallofFameClass,YearDrafted,TO_YEAR,Years_Played,HOF_Elgibility_Year,Pick,Team,...,TOV,EFF,idPlayer,numberRound,BIRTHDATE,POSITION,AGE_ROOKIE_SEASON,Draft_Decade,numBallotsBeforeInduct,ML_Prediction
0,MITCHMCGARY2014,Mitch McGary,Not Inducted,,2014,2015,2,2020,21,OKC,...,1.0,9.0,203956.0,1,1992-06-06,Forward,22.36,2010s,,0
1,PJHAIRSTON2014,PJ Hairston,Not Inducted,,2014,2015,2,2020,26,MIA,...,0.5,4.3,203798.0,1,1992-12-24,Forward,21.81,2010s,,0
2,KJMCDANIELS2014,KJ McDaniels,Not Inducted,,2014,2016,3,2021,32,PHI,...,1.7,7.6,203909.0,2,1993-02-09,Guard,21.68,2010s,,0
3,CLEANTHONYEARLY2014,Cleanthony Early,Not Inducted,,2014,2015,2,2020,34,NYK,...,1.0,4.8,203921.0,2,1991-04-17,Forward,23.5,2010s,,0
4,CORYJEFFERSON2014,Cory Jefferson,Not Inducted,,2014,2015,2,2020,60,SAS,...,0.4,4.8,203928.0,2,1990-12-26,Forward,23.8,2010s,,0


### Append Table in Database, Add Predictions

In [None]:
# connection to database
connection = psycopg2.connect(
    host = c.host,
    port = c.port,
    user = c.user,
    password = c.password,
    database = c.database
    )
cur=connection.cursor()

# define rows for join
rows = zip(data_df.GUID, data_df.ML_Prediction)

# SQL query, create temp table
cur.execute("""CREATE TEMP TABLE "bdb_Draft_test" ("GUID" text, "ML_Prediction" INTEGER) ON COMMIT DROP""")

# SQL query, join 
cur.executemany("""INSERT INTO "bdb_Draft_test" ("GUID", "ML_Prediction") VALUES(%s, %s)""", rows)

# SQL query, append table
cur.execute("""
    ALTER TABLE final_dataset
    DROP COLUMN IF EXISTS "ML_Prediction";
    ALTER TABLE final_dataset
    ADD COLUMN "ML_Prediction" integer;
    UPDATE final_dataset
    SET "ML_Prediction" = "bdb_Draft_test"."ML_Prediction"
    FROM "bdb_Draft_test"
    WHERE final_dataset."GUID" = "bdb_Draft_test"."GUID";
    """)

# commit queries
cur.rowcount
connection.commit()

# close connection
cur.close()
connection.close()

### Confirm Appended Table

In [None]:
# connection to database
connection = psycopg2.connect(
    host = c.host,
    port = c.port,
    user = c.user,
    password = c.password,
    database = c.database
    )
cur=connection.cursor()

# SQL query
test_sql = """
SELECT *
FROM final_dataset;
"""

# load in tables as dataframes
test_df = pd.read_sql(test_sql, con=connection)
test_df.head()