In [3]:
# load in libraries
import os
import json
import psycopg2
import pandas as pd
import pickle
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from pandas import json_normalize 

In [74]:
# load in env variables
db_host = os.environ['HEROKU_DB_HOST']
db_name = os.environ['HEROKU_DB']
db_user = os.environ['HEROKU_DB_USER']
db_password = os.environ['HEROKU_DB_PASSWORD']

# connect to database
conn = psycopg2.connect(database=db_name, user=db_user, password=db_password, host=db_host, port="5432")
cur = conn.cursor()

In [8]:
# import model
with open('../models/nfl_predictor_rf.pkl', 'rb') as f:
    model = pickle.load(f)

In [9]:
# import gamedata csv
games_dataframe_data = {}
seasons = [2017,2018,2019,2020]
# loop through directory 
for season in seasons:
    # append regular saeason data
    df_reg_season = pd.read_csv('./data/custom_games_by_season/{0}_data.csv'.format(season)) 
    games_dataframe_data[season] = df_reg_season
    

In [10]:
# let model loose to make predictions on all games for each season 
selected_features = ['AwayAverage','AwayFirstDowns', 'AwayTime', 'AwayThirdDowns', 'HomeAverage','HomeFirstDowns', 'HomeTime', 'HomeThirdDowns']

In [13]:
for season, data in games_dataframe_data.items():
    # get values we want
    values = data[selected_features].values
    
    # make predictions
    predictions = model.predict(values)
    
    for index, row in data.iterrows():
        data.at[index, 'PredictHomeTeamWin'] = predictions[index]
        
    # save updated data in csv
    data.to_csv('./data/custom_games_by_season/{0}_data.csv'.format(season), header=True,  encoding='utf-8', index=False) 

    games_dataframe_data[season] = data
    
    

In [58]:
# insert each dataframe into database
for season in seasons:
    data = games_dataframe_data[season].to_json(orient="index")
    
    # create the SQL string
    sql_string = 'INSERT INTO games_data(year, games) VALUES (%s, %s)'
    cur.execute(sql_string, (season,json.dumps(data)))
    conn.commit()

conn.close()

In [144]:
# try fetching data
sql_string = 'select games from games_data where year = 2020'
cur.execute(sql_string)
response = cur.fetchall()[1]

In [145]:
df = pd.read_json(response[0]).T

In [146]:
df

Unnamed: 0,HomeTeam,AwayTeam,HomeScore,AwayScore,Week,HomeResult,AwayResult,AwayAverage,HomeAverage,AwayFirstDowns,HomeFirstDowns,AwayTime,HomeTime,AwayThirdDowns,HomeThirdDowns,PredictHomeTeamWin
0,KC,HOU,34.0,20.0,1,1,0,23.625,28.1875,21.625,21.875,30.02,29.27,43.5,47.6,1
1,ATL,SEA,25.0,38.0,1,0,1,25.3125,23.8125,21.3125,23.9375,31.26,31.56,39.5,42.0,0
2,BAL,CLE,38.0,6.0,1,1,0,20.9375,33.1875,19.0625,24.125,30.42,35.47,36.2,47.1,1
3,BUF,NYJ,27.0,17.0,1,1,0,17.25,19.625,15.8125,19.625,29.53,30.59,30.7,35.8,1
4,CAR,LV,30.0,34.0,1,0,1,19.5625,21.25,19.6875,20.9375,31.01,29.55,43.8,31.9,0
5,DET,CHI,23.0,27.0,1,0,1,17.5,21.3125,18.5625,19.5625,30.16,29.09,35.6,40.9,0
6,JAX,IND,27.0,20.0,1,1,0,22.5625,18.75,21.25,18.625,30.11,30.29,41.5,34.5,1
7,MIN,GB,34.0,43.0,1,0,1,23.5,25.4375,20.0,19.625,31.28,29.5,36.0,42.8,0
8,NE,MIA,21.0,11.0,1,1,0,19.125,26.25,19.6875,21.125,29.55,32.42,34.3,38.3,1
9,WAS,PHI,27.0,17.0,1,1,0,24.0625,16.625,22.125,15.5,33.06,27.12,45.4,29.1,1


In [147]:
df = df[df.Week == 1]

In [151]:
for row in df.itertuples():
    print(row.Week)

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
