# Variable Engineering

In [78]:
## Reading the csv file

import pandas as pd
import numpy as np

pd.set_option('display.max_rows', None, 'display.max_columns', None)

nfl = pd.read_csv('aggregated_cleaned_spreadspoke_scores.csv')

nfl.head()

Unnamed: 0,team_home,team_home_name,team_home_id,team_away,team_away_name,team_away_id,schedule_date,schedule_season,schedule_week,schedule_playoff,score_home,score_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,team_home_ppg,team_home_oppg,team_home_ypg,team_home_oypg,team_home_fdpg,team_home_ofdpg,team_home_tapg,team_home_tpg,team_home_pypg,team_home_opypg,team_home_asm,team_away_ppg,team_away_oppg,team_away_ypg,team_away_oypg,team_away_fdpg,team_away_ofdpg,team_away_tapg,team_away_tpg,team_away_pypg,team_away_opypg,team_away_asm
0,Arizona Cardinals,Arizona,ARI,Dallas Cowboys,Dallas,DAL,12/25/2010,2010,16,0,27,26,DAL,-7.5,45.0,University of Phoenix Stadium,0,18.1,27.1,269.3,373.6,15.4,21.7,1.9,2.2,54.4,55.9,-9.1,24.6,27.2,364.1,351.8,20.2,19.2,1.9,1.9,53.9,40.1,-2.6
1,Arizona Cardinals,Arizona,ARI,Denver Broncos,Denver,DEN,12/12/2010,2010,14,0,43,13,DEN,-4.0,44.0,University of Phoenix Stadium,0,18.1,27.1,269.3,373.6,15.4,21.7,1.9,2.2,54.4,55.9,-9.1,21.5,29.4,348.9,390.8,19.2,20.7,1.1,1.7,59.0,57.6,-7.9
2,Arizona Cardinals,Arizona,ARI,Las Vegas Raiders,Las Vegas,LVR,9/26/2010,2010,3,0,24,23,ARI,-5.5,39.0,University of Phoenix Stadium,0,18.1,27.1,269.3,373.6,15.4,21.7,1.9,2.2,54.4,55.9,-9.1,25.6,23.2,354.6,322.9,19.0,18.1,1.5,1.6,79.8,72.6,2.4
3,Arizona Cardinals,Arizona,ARI,Los Angeles Rams,LA Rams,LAR,12/5/2010,2010,13,0,6,19,LAR,-3.5,44.0,University of Phoenix Stadium,0,18.1,27.1,269.3,373.6,15.4,21.7,1.9,2.2,54.4,55.9,-9.1,18.1,20.5,302.9,336.8,18.0,18.8,1.6,1.3,54.8,63.0,-2.4
4,Arizona Cardinals,Arizona,ARI,New Orleans Saints,New Orleans,NO,10/10/2010,2010,5,0,30,20,NO,-7.0,45.5,University of Phoenix Stadium,0,18.1,27.1,269.3,373.6,15.4,21.7,1.9,2.2,54.4,55.9,-9.1,24.7,20.5,378.5,312.6,22.5,17.2,1.5,1.9,54.3,43.5,4.2


In [79]:
## Subsetting the data to remove all observations without a favored team

nfl = nfl[nfl['team_favorite_id'] != 'PICK']

### Real Over/Under Line

In [80]:
## Real Over Under Line is the sum of points from both teams

## Changing over_under_line to numeric
nfl['over_under_line'] = pd.to_numeric(nfl['over_under_line'])

nfl['real_over_under_line'] = nfl['score_home'] + nfl['score_away']

In [81]:
## over_under is a categorical variable (0 for under and 1 for over the projected score)
## Over Under Push when real points = estimated points

nfl['over_under'] = np.where(nfl['over_under_line'] < nfl['real_over_under_line'], 1, 0)
nfl['over_under_push'] = np.where(nfl['over_under_line'] == nfl['real_over_under_line'], 1, 0)

### Money Line

In [82]:
nfl['favorite_team'] = np.where(nfl['team_favorite_id'] == nfl['team_home_id'], nfl['team_home'], nfl['team_away'])

nfl['favorite_team_score'] = np.where(nfl['favorite_team'] == nfl['team_home'], nfl['score_home'], nfl['score_away'])

nfl['underdog_team'] = np.where(nfl['team_favorite_id'] == nfl['team_home_id'], nfl['team_away'], nfl['team_home'])

nfl['underdog_team_score'] = np.where(nfl['favorite_team'] == nfl['team_home'], nfl['score_away'], nfl['score_home'])

nfl['favorite_team_win'] = np.where(nfl['favorite_team_score'] > nfl['underdog_team_score'], 1, 0)


### Favorite Spread

In [83]:
nfl['favorite_team_cover'] = np.where((nfl['favorite_team_score'] + nfl['spread_favorite']) > nfl['underdog_team_score'], 1, 0)

### Dropping unnecessary columns following variable engineering

In [84]:
nfl = nfl.drop(columns = ['team_favorite_id', 'team_home_name', 'team_home_id', 'team_away_name', 'team_away_id'])

In [86]:
nfl = nfl[['schedule_date', 'schedule_season', 'schedule_week', 'schedule_playoff', 'team_home', 'team_away', 'score_home', 'score_away',
          'stadium', 'stadium_neutral', 'team_home_ppg', 'team_home_oppg', 'team_home_ypg', 'team_home_oypg', 'team_home_fdpg',
          'team_home_ofdpg', 'team_home_tapg', 'team_home_tpg', 'team_home_pypg', 'team_home_opypg', 'team_home_asm', 'team_away_ppg', 
           'team_away_oppg', 'team_away_ypg', 'team_away_oypg', 'team_away_fdpg', 'team_away_ofdpg', 'team_away_tapg', 'team_away_tpg', 
           'team_away_pypg', 'team_away_opypg', 'team_away_asm', 'over_under_line', 'real_over_under_line', 'over_under', 'over_under_push',
          'favorite_team', 'favorite_team_score', 'underdog_team', 'underdog_team_score', 'spread_favorite', 'favorite_team_win',
          'favorite_team_cover']]


In [87]:
nfl.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,team_away,score_home,score_away,stadium,stadium_neutral,team_home_ppg,team_home_oppg,team_home_ypg,team_home_oypg,team_home_fdpg,team_home_ofdpg,team_home_tapg,team_home_tpg,team_home_pypg,team_home_opypg,team_home_asm,team_away_ppg,team_away_oppg,team_away_ypg,team_away_oypg,team_away_fdpg,team_away_ofdpg,team_away_tapg,team_away_tpg,team_away_pypg,team_away_opypg,team_away_asm,over_under_line,real_over_under_line,over_under,over_under_push,favorite_team,favorite_team_score,underdog_team,underdog_team_score,spread_favorite,favorite_team_win,favorite_team_cover
0,12/25/2010,2010,16,0,Arizona Cardinals,Dallas Cowboys,27,26,University of Phoenix Stadium,0,18.1,27.1,269.3,373.6,15.4,21.7,1.9,2.2,54.4,55.9,-9.1,24.6,27.2,364.1,351.8,20.2,19.2,1.9,1.9,53.9,40.1,-2.6,45.0,53,1,0,Dallas Cowboys,26,Arizona Cardinals,27,-7.5,0,0
1,12/12/2010,2010,14,0,Arizona Cardinals,Denver Broncos,43,13,University of Phoenix Stadium,0,18.1,27.1,269.3,373.6,15.4,21.7,1.9,2.2,54.4,55.9,-9.1,21.5,29.4,348.9,390.8,19.2,20.7,1.1,1.7,59.0,57.6,-7.9,44.0,56,1,0,Denver Broncos,13,Arizona Cardinals,43,-4.0,0,0
2,9/26/2010,2010,3,0,Arizona Cardinals,Las Vegas Raiders,24,23,University of Phoenix Stadium,0,18.1,27.1,269.3,373.6,15.4,21.7,1.9,2.2,54.4,55.9,-9.1,25.6,23.2,354.6,322.9,19.0,18.1,1.5,1.6,79.8,72.6,2.4,39.0,47,1,0,Arizona Cardinals,24,Las Vegas Raiders,23,-5.5,1,0
3,12/5/2010,2010,13,0,Arizona Cardinals,Los Angeles Rams,6,19,University of Phoenix Stadium,0,18.1,27.1,269.3,373.6,15.4,21.7,1.9,2.2,54.4,55.9,-9.1,18.1,20.5,302.9,336.8,18.0,18.8,1.6,1.3,54.8,63.0,-2.4,44.0,25,0,0,Los Angeles Rams,19,Arizona Cardinals,6,-3.5,1,1
4,10/10/2010,2010,5,0,Arizona Cardinals,New Orleans Saints,30,20,University of Phoenix Stadium,0,18.1,27.1,269.3,373.6,15.4,21.7,1.9,2.2,54.4,55.9,-9.1,24.7,20.5,378.5,312.6,22.5,17.2,1.5,1.9,54.3,43.5,4.2,45.5,50,1,0,New Orleans Saints,20,Arizona Cardinals,30,-7.0,0,0


### Exporting NFL as final data set for analysis

In [89]:
nfl.to_csv('final_spreadspoke_scores.csv', index = False, header = True)