In [None]:
#This workbook is about cleaning the data that was gathered from LTHOI player data and NFL game data.

In [None]:
import pandas as pd  #For Bringing in the data and manipulating it
import boto3 #For storing to and retreiving from s3
import numpy as np #For using mathmatical functions to create the target fields

In [None]:
#Pull in the data... If you're not in the same workspace that you're using for creating the data, you'll need to get the CSV from S3.
game_data = pd.read_csv('gathered_data.csv')

In [None]:
#Trim the start times because they don't matter.  They were only in the data to assist with querying the database.
game_data.drop(labels=['originalStartTime', 'startTime'], axis=1, inplace=True)

In [None]:
#trim the extra column with the two columns that kept track of week and game in week
game_data.drop(labels=['week'], axis=1, inplace=True)
game_data = game_data.loc[:, ~game_data.columns.str.contains('Unnamed')]

In [None]:
#Create seperate Pandas for over bets, under bets, home bets, and away bets
over_bet_inputs = game_data
under_bet_inputs = game_data
home_bet_inputs = game_data
away_bet_inputs = game_data

In [None]:
#For over_bet_inputs create a target field that shows whether an over bet would have been smart (would have won by more than a point)
over_bet_inputs['bet_is_smart'] = np.where((over_bet_inputs['awayScore']+over_bet_inputs['homeScore'])>(over_bet_inputs['over_under']+1), 1, 0)

In [None]:
#For under_bet_inputs create a target field that shows whether an under bet would have been smart (would have won by more than a point)
under_bet_inputs['bet_is_smart'] = np.where((under_bet_inputs['awayScore']+under_bet_inputs['homeScore'])<(under_bet_inputs['over_under']-1), 1, 0)

In [None]:
#For home_bet_inputs create a target field that shows whether a home bet would have been smart (would have won by more than a point)
home_bet_inputs['bet_is_smart'] = np.where((home_bet_inputs['homeScore'] + home_bet_inputs['home_line'] - 1) > home_bet_inputs['awayScore'], 1, 0)

In [None]:
#For away_bets_inputs create a target field that shows whether an away bet would have been smart (would have won by more than a point)
away_bet_inputs['bet_is_smart'] = np.where((away_bet_inputs['homeScore'] + away_bet_inputs['home_line'] + 1) < away_bet_inputs['awayScore'], 1, 0)

In [None]:
#Triplicate the data so that we meet the minimum number of fields
home_bet_inputs = home_bet_inputs.loc[np.repeat(home_bet_inputs.index.values, 3)]   
away_bet_inputs = away_bet_inputs.loc[np.repeat(away_bet_inputs.index.values, 3)]  
over_bet_inputs = home_bet_inputs.loc[np.repeat(home_bet_inputs.index.values, 3)]  
home_bet_inputs = home_bet_inputs.loc[np.repeat(home_bet_inputs.index.values, 3)]  

In [None]:
#Store the data to CSV
bucketname = "burgherjon-football-data"
home_key = "input_data/home_bet_inputs.csv"
away_key = "input_data/away_bet_inputs.csv"
over_key = "input_data/over_bet_inputs.csv"
under_key = "input_data/under_bet_inputs.csv"

s3 = boto3.resource('s3')

over_bet_inputs.to_csv('over_bet_inputs.csv')
s3.meta.client.upload_file('over_bet_inputs.csv', bucketname, over_key)

under_bet_inputs.to_csv('under_bet_inputs.csv')
s3.meta.client.upload_file('under_bet_inputs.csv', bucketname, under_key)

away_bet_inputs.to_csv('away_bet_inputs.csv')
s3.meta.client.upload_file('away_bet_inputs.csv', bucketname, away_key)

home_bet_inputs.to_csv('home_bet_inputs.csv')
s3.meta.client.upload_file('home_bet_inputs.csv', bucketname, home_key)
