In [2]:
# load libraries
import os
import json
import psycopg2
import pandas as pd
import pickle
import base64
import csv
from github import Github
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import requests

In [3]:
# get api key
api_key = os.environ['SPORTS_DATA_IO_API']
api_key_2 = os.environ['SPORTS_DATA_IO_API_2']
github_token = os.environ['GITHUB_TOKEN']

In [85]:
# import model
with open('../models/nfl_predictor_rf.pkl', 'rb') as f:
    model = pickle.load(f)

In [4]:
# load in env variables
db_host = os.environ['HEROKU_DB_HOST']
db_name = os.environ['HEROKU_DB']
db_user = os.environ['HEROKU_DB_USER']
db_password = os.environ['HEROKU_DB_PASSWORD']

# connect to database
conn = psycopg2.connect(database=db_name, user=db_user, password=db_password, host=db_host, port="5432")
cur = conn.cursor()

In [87]:
# get both dataframes we need from api
response = requests.get('https://api.sportsdata.io/v3/nfl/scores/json/CurrentWeek?key={0}'.format(api_key_2))
week = response.json()

# season stats per team
response = requests.get('https://api.sportsdata.io/api/nfl/odds/json/ScoresByWeek/2020REG/{0}?key={1}'.format(week, api_key))
current_games = pd.DataFrame.from_dict(response.json())



# season stats per team
response = requests.get('https://api.sportsdata.io/api/nfl/odds/json/TeamSeasonStats/2020REG?key={0}'.format(api_key))
team_stats_2020 = pd.DataFrame.from_dict(response.json())

In [120]:
# bring in the database dataframes
# create the SQL string for games data 
sql_string = 'SELECT games from games_data where year = 2020'
cur.execute(sql_string)
db_games_data = cur.fetchone()[0]
db_games_data = pd.read_json(db_games_data).T


# create the SQL string for team stats data 
sql_string = 'SELECT stats from team_stats where year = 2020'
cur.execute(sql_string)
db_team_stats = cur.fetchone()[0]
db_team_stats = pd.read_json(db_team_stats).T


In [89]:
# update year team stats
for index, row in team_stats_2020.iterrows():
    db_team_stats.at[index, 'avg_up_to_week_{0}'.format(week)] = row.Score / row.Games
    db_team_stats.at[index, 'first_downs_up_to_week_{0}'.format(week)] = row.FirstDowns / row.Games
    db_team_stats.at[index, 'third_down_percentage_up_to_week_{0}'.format(week)] = row.ThirdDownPercentage
    
    # convert string time of possession to float
    top = row.TimeOfPossession.split(':')
    top = float('{0}.{1}'.format(top[0],top[1]))
    db_team_stats.at[index, 'time_of_possession_up_to_week_{0}'.format(week)] = top
    
    
    
# update database for team stats
data = db_team_stats.to_json(orient="index")
sql_string = 'UPDATE team_stats SET stats = %s WHERE year = 2020'
cur.execute(sql_string, (json.dumps(data),))
conn.commit()


In [78]:
# update this weeks games with stats
current_games = current_games[['Week', 'AwayTeam', 'HomeTeam', 'AwayScore', 'HomeScore']]

db_team_stats = db_team_stats.set_index('Team')

for index, row in current_games.iterrows():
    current_games.at[index,'HomeAverage'] = db_team_stats.loc[row.HomeTeam,'avg_up_to_week_{0}'.format(week)]
    current_games.at[index,'HomeFirstDowns'] = db_team_stats.loc[row.HomeTeam,'first_downs_up_to_week_{0}'.format(week)]
    current_games.at[index,'HomeTime'] = db_team_stats.loc[row.HomeTeam,'time_of_possession_up_to_week_{0}'.format(week)]
    current_games.at[index,'HomeThirdDowns'] = db_team_stats.loc[row.HomeTeam,'third_down_percentage_up_to_week_{0}'.format(week)]

    # team 2 stats
    current_games.at[index,'AwayAverage'] = db_team_stats.loc[row.AwayTeam,'avg_up_to_week_{0}'.format(week)]
    current_games.at[index,'AwayFirstDowns'] = db_team_stats.loc[row.AwayTeam,'first_downs_up_to_week_{0}'.format(week)]
    current_games.at[index,'AwayTime'] = db_team_stats.loc[row.AwayTeam,'time_of_possession_up_to_week_{0}'.format(week)]
    current_games.at[index,'AwayThirdDowns'] = db_team_stats.loc[row.AwayTeam,'third_down_percentage_up_to_week_{0}'.format(week)]

# let model make predictions on this weeks games
selected_features = ['AwayAverage','AwayFirstDowns', 'AwayTime', 'AwayThirdDowns', 'HomeAverage','HomeFirstDowns', 'HomeTime', 'HomeThirdDowns']
# get values we want
values = current_games[selected_features].values

# make predictions
predictions = model.predict(values)

for index, row in current_games.iterrows():
    current_games.at[index, 'PredictHomeTeamWin'] = predictions[index]
    
# fill scores as games arent played yet
current_games = current_games.fillna(-1)
    
# append to all games
db_games_data = db_games_data.append(current_games, ignore_index=True)

db_games_data.to_csv('./2020_data.csv', header=True,  encoding='utf-8', index=False) 


# update database for season games
data = db_games_data.to_json(orient="index")
sql_string = 'UPDATE games_data SET games = %s WHERE year = 2020'
cur.execute(sql_string, (json.dumps(data),))
conn.commit()

In [79]:
# write csv to github for the sake of it
g = Github(github_token)

# get csv 
with open('./2020_data.csv', newline='') as csvfile:
    data = csvfile.read()



# get project and update file
repo = g.get_repo('Sports-Outcome-Analyzer/nfl_sports_analyzer')

contents = repo.get_contents("custom_games_by_season/2020_data.csv", ref="development")

repo.update_file(contents.path, "update csv file with new games", data, contents.sha, branch="development")



{'commit': Commit(sha="204c60b4fef08f2646d23999f2ddef9b7174de71"),
 'content': ContentFile(path="custom_games_by_season/2020_data.csv")}

In [5]:
# update games score on friday,monday,tuesday
# get both dataframes we need from api
response = requests.get('https://api.sportsdata.io/v3/nfl/scores/json/CurrentWeek?key={0}'.format(api_key_2))
week = response.json()

# season stats per team
response = requests.get('https://api.sportsdata.io/api/nfl/odds/json/ScoresByWeek/2020REG/{0}?key={1}'.format(week, api_key))
current_games = pd.DataFrame.from_dict(response.json())




In [6]:
current_games

Unnamed: 0,GameKey,SeasonType,Season,Week,Date,AwayTeam,HomeTeam,AwayScore,HomeScore,PointSpread,OverUnder,AwayScoreQuarter1,AwayScoreQuarter2,AwayScoreQuarter3,AwayScoreQuarter4,AwayScoreOvertime,HomeScoreQuarter1,HomeScoreQuarter2,HomeScoreQuarter3,HomeScoreQuarter4,HomeScoreOvertime,StadiumID,ForecastTempLow,ForecastTempHigh,ForecastDescription,ForecastWindChill,ForecastWindSpeed,AwayTeamMoneyLine,HomeTeamMoneyLine,AwayTeamID,HomeTeamID,ScoreID,Status,HomeRotationNumber,AwayRotationNumber,NeutralVenue,StadiumDetails
0,202011303,1,2020,13,2020-12-08T20:05:00,DAL,BAL,,,-7.5,45.0,,,,,,,,,,,7,38,38,Clear Sky,29,14,271,-341,9,3,17440,Scheduled,484,483,False,"{'StadiumID': 7, 'Name': 'M&T Bank Stadium', '..."
1,202011302,1,2020,13,2020-12-06T13:00:00,NO,ATL,21.0,16.0,3.0,46.0,7.0,7.0,7.0,0.0,0.0,3.0,6.0,0.0,7.0,0.0,45,49,50,Overcast Clouds,45,9,-153,130,22,2,17441,Final,464,463,False,"{'StadiumID': 45, 'Name': 'Mercedes-Benz Stadi..."
2,202011306,1,2020,13,2020-12-06T13:00:00,DET,CHI,34.0,30.0,-3.0,44.0,6.0,7.0,7.0,14.0,0.0,9.0,14.0,0.0,7.0,0.0,20,35,35,Light Snow,35,3,133,-158,11,6,17442,Final,452,451,False,"{'StadiumID': 20, 'Name': 'Soldier Field', 'Ci..."
3,202011334,1,2020,13,2020-12-06T13:00:00,CLE,TEN,41.0,35.0,-4.0,54.0,10.0,28.0,3.0,0.0,0.0,0.0,7.0,14.0,14.0,0.0,12,41,42,Overcast Clouds,36,9,176,-212,8,34,17443,Final,466,465,False,"{'StadiumID': 12, 'Name': 'Nissan Stadium', 'C..."
4,202011319,1,2020,13,2020-12-06T13:00:00,CIN,MIA,7.0,19.0,-10.5,43.0,7.0,0.0,0.0,0.0,0.0,0.0,6.0,10.0,3.0,0.0,2,75,76,Light Rain,75,9,411,-540,7,19,17444,Final,454,453,False,"{'StadiumID': 2, 'Name': 'Hard Rock Stadium', ..."
5,202011320,1,2020,13,2020-12-06T13:00:00,JAX,MIN,24.0,27.0,-10.0,51.0,9.0,0.0,7.0,8.0,0.0,0.0,6.0,13.0,5.0,3.0,42,32,33,Scattered Clouds,33,2,366,-470,15,20,17445,F/OT,458,457,False,"{'StadiumID': 42, 'Name': 'U.S. Bank Stadium',..."
6,202011324,1,2020,13,2020-12-06T13:00:00,LV,NYJ,31.0,28.0,7.0,48.0,7.0,10.0,7.0,7.0,0.0,7.0,6.0,0.0,15.0,0.0,3,32,33,Clear Sky,24,10,-370,295,25,24,17446,Final,462,461,False,"{'StadiumID': 3, 'Name': 'MetLife Stadium', 'C..."
7,202011328,1,2020,13,2020-12-07T17:00:00,WAS,PIT,23.0,17.0,-6.0,43.5,0.0,3.0,7.0,13.0,0.0,0.0,14.0,0.0,3.0,0.0,8,29,29,Overcast Clouds,22,8,233,-282,35,28,17447,Final,488,487,False,"{'StadiumID': 8, 'Name': 'Heinz Field', 'City'..."
8,202011313,1,2020,13,2020-12-06T13:00:00,IND,HOU,26.0,20.0,3.0,50.0,14.0,10.0,0.0,2.0,0.0,10.0,10.0,0.0,0.0,0.0,11,53,54,Clear Sky,53,8,-179,150,14,13,17448,Final,456,455,False,"{'StadiumID': 11, 'Name': 'NRG Stadium', 'City..."
9,202011301,1,2020,13,2020-12-06T16:05:00,LAR,ARI,38.0,28.0,3.0,49.0,0.0,14.0,3.0,21.0,0.0,7.0,0.0,7.0,14.0,0.0,29,56,61,Few Clouds,56,2,-152,129,32,1,17449,Final,470,469,False,"{'StadiumID': 29, 'Name': 'State Farm Stadium'..."


In [9]:
# create the SQL string for games data 
sql_string = 'SELECT games from games_data where year = 2020'
cur.execute(sql_string)
db_games_data = cur.fetchone()[0]
db_games_data = pd.read_json(db_games_data).T