In [None]:
#This workbook is about cleaning the data that was gathered from LTHOI player data and NFL game data.

In [14]:
import pandas as pd  #For Bringing in the data and manipulating it
import boto3 #For storing to and retreiving from s3
import numpy as np #For using mathmatical functions to create the target fields

In [35]:
#Pull in the data... If you're not in the same workspace that you're using for creating the data, you'll need to get the CSV from S3.
game_data = pd.read_csv('gathered_data.csv')

In [36]:
#Trim the start times because they don't matter.  They were only in the data to assist with querying the database.
game_data.drop(labels=['originalStartTime', 'startTime'], axis=1, inplace=True)

In [37]:
#trim the extra column with the two columns that kept track of week and game in week
game_data.drop(labels=['week'], axis=1, inplace=True)
game_data = game_data.loc[:, ~game_data.columns.str.contains('Unnamed')]

In [38]:
#Create seperate Pandas for over bets, under bets, home bets, and away bets
over_bet_inputs = game_data
under_bet_inputs = game_data
home_bet_inputs = game_data
away_bet_inputs = game_data

In [39]:
#For over_bet_inputs create a target field that shows whether an over bet would have been smart (would have won by more than a point)
over_bet_inputs['bet_is_smart'] = np.where((over_bet_inputs['awayScore']+over_bet_inputs['homeScore'])>(over_bet_inputs['over_under']+1), 1, 0)

In [40]:
#For under_bet_inputs create a target field that shows whether an under bet would have been smart (would have won by more than a point)
under_bet_inputs['bet_is_smart'] = np.where((under_bet_inputs['awayScore']+under_bet_inputs['homeScore'])<(under_bet_inputs['over_under']-1), 1, 0)

In [41]:
#For home_bet_inputs create a target field that shows whether a home bet would have been smart (would have won by more than a point)
home_bet_inputs['bet_is_smart'] = np.where((home_bet_inputs['homeScore'] + home_bet_inputs['home_line'] - 1) > home_bet_inputs['awayScore'], 1, 0)

In [42]:
#For away_bets_inputs create a target field that shows whether an away bet would have been smart (would have won by more than a point)
away_bet_inputs['bet_is_smart'] = np.where((away_bet_inputs['homeScore'] + away_bet_inputs['home_line'] + 1) < away_bet_inputs['awayScore'], 1, 0)

In [49]:
#Triplicate the data so that we meet the minimum number of fields
home_bet_inputs = home_bet_inputs.loc[np.repeat(home_bet_inputs.index.values, 3)]   
away_bet_inputs = away_bet_inputs.loc[np.repeat(away_bet_inputs.index.values, 3)]  
over_bet_inputs = home_bet_inputs.loc[np.repeat(home_bet_inputs.index.values, 3)]  
home_bet_inputs = home_bet_inputs.loc[np.repeat(home_bet_inputs.index.values, 3)]  

In [50]:
#Store the data to CSV
bucketname = "burgherjon-football-data"
home_key = "input_data/home_bet_inputs.csv"
away_key = "input_data/away_bet_inputs.csv"
over_key = "input_data/over_bet_inputs.csv"
under_key = "input_data/under_bet_inputs.csv"

s3 = boto3.resource('s3')

over_bet_inputs.to_csv('over_bet_inputs.csv')
s3.meta.client.upload_file('over_bet_inputs.csv', bucketname, over_key)

under_bet_inputs.to_csv('under_bet_inputs.csv')
s3.meta.client.upload_file('under_bet_inputs.csv', bucketname, under_key)

away_bet_inputs.to_csv('away_bet_inputs.csv')
s3.meta.client.upload_file('away_bet_inputs.csv', bucketname, away_key)

home_bet_inputs.to_csv('home_bet_inputs.csv')
s3.meta.client.upload_file('home_bet_inputs.csv', bucketname, home_key)


In [45]:
home_bet_inputs.head()

Unnamed: 0,game_id,awayTeam_ID,homeTeam_ID,venueAliegiance,awayScore,homeScore,over_under,home_line,home_team_average_for,home_team_average_against,away_team_average_for,away_team_average_against,over_bets,under_bets,home_bets,away_bets,final_line,final_over_under,bet_is_smart
0,64927,69,64,HOME,24,9,43.0,8.0,29.0,26.0,22.5,10.5,1,1,0,1,9.0,43.5,1
1,64931,65,67,HOME,16,25,47.0,-6.0,23.0,34.0,20.0,27.5,0,0,0,1,-4.5,47.0,0
2,64932,68,53,HOME,17,14,47.5,-3.0,21.0,28.5,15.5,40.0,0,0,2,1,-3.0,47.5,1
3,64935,75,73,HOME,30,24,54.5,-7.0,34.0,32.5,18.5,18.0,2,0,0,1,-7.0,55.0,1
4,64936,57,59,HOME,24,10,43.5,-2.5,20.0,21.0,22.0,22.0,0,0,1,1,-3.0,42.0,1


In [33]:
away_bet_inputs.describe()

Unnamed: 0,game_id,awayTeam_ID,homeTeam_ID,awayScore,homeScore,over_under,home_line,home_team_average_for,home_team_average_against,away_team_average_for,away_team_average_against,over_bets,under_bets,home_bets,away_bets,final_line,final_over_under,over_bet_is_smart,under_bet_is_smart,bet_is_smart
count,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0,208.0
mean,65032.375,63.447115,63.552885,22.033654,23.264423,47.810096,-1.822115,25.6875,24.519231,22.293269,23.987981,0.504808,0.269231,0.625,1.076923,-1.550481,47.824519,0.413462,0.504808,0.471154
std,64.376262,9.234837,9.27555,10.174507,10.548818,4.515004,5.806634,4.610525,8.006164,8.36591,7.178443,0.621651,0.44463,0.744545,1.122388,5.839736,4.745629,0.493642,0.501183,0.500371
min,64927.0,48.0,48.0,0.0,0.0,42.0,-10.5,17.0,11.5,8.5,10.5,0.0,0.0,0.0,0.0,-10.0,41.5,0.0,0.0,0.0
25%,64978.75,55.0,56.0,14.0,16.0,43.5,-7.0,21.0,19.0,17.0,22.0,0.0,0.0,0.0,0.0,-7.0,43.5,0.0,0.0,0.0
50%,65031.5,63.0,64.0,22.0,22.0,47.0,-3.0,25.5,26.0,22.0,24.5,0.0,0.0,0.0,1.0,-3.0,47.0,0.0,1.0,0.0
75%,65084.25,71.0,72.0,30.0,30.0,51.5,2.0,29.5,30.5,29.0,27.0,1.0,1.0,1.0,1.0,2.5,51.5,1.0,1.0,1.0
max,65346.0,79.0,79.0,45.0,56.0,55.0,8.0,34.0,38.0,39.5,40.0,2.0,1.0,2.0,4.0,9.0,55.5,1.0,1.0,1.0
