# 03 Data Prep

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# common imports
import numpy as np
import pandas as pd
from typing import List
# import matplotlib.pyplot as plt 
# import seaborn as sns
# import sys
# from datetime import datetime
# import sklearn

# will display all the columns in the df moving forward
pd.set_option('display.max_columns', 500)

In [4]:
from src.data_preparation import load_scraped_data_from_disk
data = load_scraped_data_from_disk(file_name='scraped_data.csv')

## Data Transformations

In [5]:
from src.data_preparation import fix_opponent_names
data = fix_opponent_names(data)

In [6]:
from src.data_preparation import map_team_abbreviations_to_names
data = map_team_abbreviations_to_names(data)

In [7]:
from src.data_preparation import add_home_or_away_column
data = add_home_or_away_column(data)

In [8]:
data.loc[data['team'] == 'Kansas City Chiefs'].head(21)
# since there in a 'N' in the @ column for super bowls, both teams are being listed as the HOME team
# this is incorrect and leads to super bowls being removed from the final dataframe
# issue fixed 04/30/2023

Unnamed: 0,season,team,week,day,date,time,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,home_or_away
211,2022,Kansas City Chiefs,1,Sun,September 11,4:25PM ET,W,,1-0,@,Arizona Cardinals,44.0,21.0,33.0,488.0,360.0,128.0,1.0,18.0,282.0,179.0,103.0,,33.41,-2.29,-6.88,AWAY
212,2022,Kansas City Chiefs,2,Thu,September 15,8:15PM ET,W,,2-0,,Los Angeles Chargers,27.0,24.0,15.0,319.0,226.0,93.0,,21.0,401.0,326.0,75.0,1.0,5.54,-4.12,3.72,HOME
213,2022,Kansas City Chiefs,3,Sun,September 25,1:00PM ET,L,,2-1,@,Indianapolis Colts,17.0,20.0,20.0,315.0,257.0,58.0,2.0,19.0,259.0,177.0,82.0,1.0,1.78,7.1,-13.31,AWAY
214,2022,Kansas City Chiefs,4,Sun,October 2,8:20PM ET,W,,3-1,@,Tampa Bay Buccaneers,41.0,31.0,27.0,417.0,228.0,189.0,1.0,27.0,376.0,373.0,3.0,2.0,17.12,-14.15,5.33,AWAY
215,2022,Kansas City Chiefs,5,Mon,October 10,8:15PM ET,W,,4-1,,Las Vegas Raiders,30.0,29.0,29.0,368.0,265.0,103.0,,18.0,378.0,223.0,155.0,,14.21,-11.65,0.35,HOME
216,2022,Kansas City Chiefs,6,Sun,October 16,4:25PM ET,L,,4-2,,Buffalo Bills,20.0,24.0,23.0,387.0,319.0,68.0,2.0,26.0,443.0,318.0,125.0,1.0,6.45,-11.13,-0.56,HOME
217,2022,Kansas City Chiefs,7,Sun,October 23,4:25PM ET,W,,5-2,@,San Francisco 49ers,44.0,23.0,24.0,529.0,417.0,112.0,2.0,25.0,444.0,343.0,101.0,3.0,30.58,-3.59,-8.5,AWAY
218,2022,Kansas City Chiefs,9,Sun,November 6,8:20PM ET,W,OT,6-2,,Tennessee Titans,20.0,17.0,29.0,499.0,422.0,77.0,1.0,9.0,229.0,57.0,172.0,,5.59,3.85,-5.59,HOME
219,2022,Kansas City Chiefs,10,Sun,November 13,1:00PM ET,W,,7-2,,Jacksonville Jaguars,27.0,17.0,26.0,486.0,331.0,155.0,3.0,17.0,315.0,240.0,75.0,,23.27,-3.86,-7.32,HOME
220,2022,Kansas City Chiefs,11,Sun,November 20,8:20PM ET,W,,8-2,@,Los Angeles Chargers,30.0,27.0,23.0,485.0,322.0,163.0,1.0,22.0,365.0,250.0,115.0,2.0,19.64,-8.61,-2.61,AWAY


In [9]:
from src.data_preparation import add_datetime_column
data = add_datetime_column(data)
# season = year is having issues 
# issue has been fixed 04/25/23 (added 1 to the year if games were played in Jan/Feb)

In [10]:
#data.drop(data[data['passyd'] == 'Canceled'].index, inplace = True)
data.loc[data['passyd'] == 'Canceled']
# one game was canceled last year so I dropped the canceled game in the scraping.py file
# this line of code is verifying that it has been removed
# this was preventing me from engineering additional features from previous game stats

Unnamed: 0,season,team,week,day,date,time,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,home_or_away,month,year,hour,date_time


In [11]:
data

Unnamed: 0,season,team,week,day,date,time,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,home_or_away,month,year,hour,date_time
0,2022,Buffalo Bills,1,8,September 8,8:20PM ET,W,,1-0,@,Los Angeles Rams,31.0,10.0,23.0,413.0,292.0,121.0,4.0,19.0,243.0,191.0,52.0,3.0,13.89,10.29,-3.96,AWAY,9,2022,20,2022-09-08 20:00:00
1,2022,Buffalo Bills,2,19,September 19,7:15PM ET,W,,2-0,,Tennessee Titans,41.0,7.0,23.0,414.0,313.0,101.0,,12.0,187.0,107.0,80.0,4.0,17.69,18.01,1.55,HOME,9,2022,19,2022-09-19 19:00:00
2,2022,Buffalo Bills,3,25,September 25,1:00PM ET,L,,2-1,@,Miami Dolphins,19.0,21.0,31.0,497.0,382.0,115.0,1.0,15.0,212.0,171.0,41.0,,15.88,-7.45,-4.86,AWAY,9,2022,13,2022-09-25 13:00:00
3,2022,Buffalo Bills,4,2,October 2,1:00PM ET,W,,3-1,@,Baltimore Ravens,23.0,20.0,22.0,326.0,201.0,125.0,2.0,22.0,296.0,134.0,162.0,2.0,2.10,2.66,-1.69,AWAY,10,2022,13,2022-10-02 13:00:00
4,2022,Buffalo Bills,5,9,October 9,1:00PM ET,W,,4-1,,Pittsburgh Steelers,38.0,3.0,21.0,552.0,432.0,120.0,2.0,23.0,364.0,310.0,54.0,2.0,20.66,9.42,3.54,HOME,10,2022,13,2022-10-09 13:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15315,1994,Los Angeles Rams,13,27,November 27,4:00PM ET,L,,4-8,@,Los Angeles Chargers,17.0,31.0,17.0,326.0,278.0,48.0,5.0,16.0,243.0,129.0,114.0,1.0,-15.08,12.27,-6.03,AWAY,11,1994,16,1994-11-27 16:00:00
15316,1994,Los Angeles Rams,14,4,December 4,4:00PM ET,L,,4-9,,New Orleans Saints,15.0,31.0,20.0,333.0,258.0,75.0,4.0,20.0,328.0,191.0,137.0,1.0,-1.17,-3.52,-13.22,HOME,12,1994,16,1994-12-04 16:00:00
15317,1994,Los Angeles Rams,15,11,December 11,1:00PM ET,L,,4-10,@,Tampa Bay Buccaneers,14.0,24.0,19.0,261.0,198.0,63.0,2.0,17.0,355.0,230.0,125.0,,-11.84,-12.38,5.25,AWAY,12,1994,13,1994-12-11 13:00:00
15318,1994,Los Angeles Rams,16,18,December 18,1:00PM ET,L,,4-11,@,Chicago Bears,13.0,27.0,13.0,243.0,206.0,37.0,1.0,19.0,298.0,135.0,163.0,,-1.42,-6.17,-8.99,AWAY,12,1994,13,1994-12-18 13:00:00


## Feature Engineering

In [12]:
from src.data_preparation import add_win_rates_last_n_games
data = add_win_rates_last_n_games(data, n_games=[1, 4, 8])

In [13]:
from src.data_preparation import add_passing_rates_last_n_games
data = add_passing_rates_last_n_games(data, n_games=[1, 4, 8])

In [14]:
from src.data_preparation import add_rushing_rates_last_n_games
data = add_rushing_rates_last_n_games(data, n_games=[1, 4, 8])

In [15]:
from src.data_preparation import add_passing_allowed_rates_last_n_games
data = add_passing_allowed_rates_last_n_games(data, n_games=[1, 4, 8])

In [16]:
from src.data_preparation import add_rushing_allowed_rates_last_n_games
data = add_rushing_allowed_rates_last_n_games(data, n_games=[1, 4, 8])

In [17]:
from src.data_preparation import add_ot_rates_last_n_games
data = add_ot_rates_last_n_games(data, n_games=[1, 4, 8])

In [18]:
from src.data_preparation import add_to_rates_last_n_games
data = add_to_rates_last_n_games(data, n_games=[1, 4, 8])

In [19]:
from src.data_preparation import add_to_forced_rates_last_n_games
data = add_to_forced_rates_last_n_games(data, n_games=[1, 4, 8])

In [20]:
from src.data_preparation import add_points_scored_rates_last_n_games
data = add_points_scored_rates_last_n_games(data, n_games=[1, 4, 8])

In [21]:
from src.data_preparation import add_points_allowed_rates_last_n_games
data = add_points_allowed_rates_last_n_games(data, n_games=[1, 4, 8])

In [22]:
from src.data_preparation import add_1st_down_rates_last_n_games
data = add_1st_down_rates_last_n_games(data, n_games=[1, 4, 8])

In [23]:
from src.data_preparation import add_1st_down_allowed_rates_last_n_games
data = add_1st_down_allowed_rates_last_n_games(data, n_games=[1, 4, 8])

In [24]:
data

Unnamed: 0,season,team,week,day,date,time,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,home_or_away,month,year,hour,date_time,win,win_rate_last_1_games,win_rate_last_4_games,win_rate_last_8_games,pass_rate_last_1_games,pass_rate_last_4_games,pass_rate_last_8_games,rush_rate_last_1_games,rush_rate_last_4_games,rush_rate_last_8_games,pass_allowed_rate_last_1_games,pass_allowed_rate_last_4_games,pass_allowed_rate_last_8_games,rush_allowed_rate_last_1_games,rush_allowed_rate_last_4_games,rush_allowed_rate_last_8_games,ot_rate_last_1_games,ot_rate_last_4_games,ot_rate_last_8_games,to_rate_last_1_games,to_rate_last_4_games,to_rate_last_8_games,to_forced_rate_last_1_games,to_forced_rate_last_4_games,to_forced_rate_last_8_games,points_scored_rate_last_1_games,points_scored_rate_last_4_games,points_scored_rate_last_8_games,points_allowed_rate_last_1_games,points_allowed_rate_last_4_games,points_allowed_rate_last_8_games,1st_down_rate_last_1_games,1st_down_rate_last_4_games,1st_down_rate_last_8_games,1st_down_allowed_rate_last_1_games,1st_down_allowed_rate_last_4_games,1st_down_allowed_rate_last_8_games
0,1994,Arizona Cardinals,1,4,September 4,4:00PM ET,L,0,0-1,@,Los Angeles Rams,12.0,14.0,23.0,234.0,128.0,106.0,3.0,9.0,152.0,102.0,50.0,2.0,-15.09,17.92,1.36,AWAY,9,1994,16,1994-09-04 16:00:00,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1994,Arizona Cardinals,2,11,September 11,8:00PM ET,L,0,0-2,,New York Giants,17.0,20.0,11.0,174.0,135.0,39.0,3.0,19.0,206.0,88.0,118.0,2.0,-17.99,1.70,6.10,HOME,9,1994,20,1994-09-11 20:00:00,0,0.0,0.00,0.000,128.0,128.000000,128.000000,106.0,106.000000,106.000000,102.0,102.000000,102.000000,50.0,50.000000,50.000000,0.0,0.00,0.000,3.0,3.00,3.000,2.0,2.00,2.000,12.0,12.000000,12.000000,14.0,14.00,14.000,23.0,23.000000,23.000000,9.0,9.00,9.000
2,1994,Arizona Cardinals,3,18,September 18,1:00PM ET,L,0,0-3,@,Cleveland Browns,0.0,32.0,21.0,318.0,255.0,63.0,3.0,17.0,322.0,243.0,79.0,2.0,-23.88,-2.64,1.52,AWAY,9,1994,13,1994-09-18 13:00:00,0,0.0,0.00,0.000,135.0,131.500000,131.500000,39.0,72.500000,72.500000,88.0,95.000000,95.000000,118.0,84.000000,84.000000,0.0,0.00,0.000,3.0,3.00,3.000,2.0,2.00,2.000,17.0,14.500000,14.500000,20.0,17.00,17.000,11.0,17.000000,17.000000,19.0,14.00,14.000
3,1994,Arizona Cardinals,5,2,October 2,4:00PM ET,W,0,1-3,,Minnesota Vikings,17.0,7.0,21.0,309.0,200.0,109.0,2.0,19.0,358.0,340.0,18.0,4.0,0.47,13.72,2.86,HOME,10,1994,16,1994-10-02 16:00:00,1,0.0,0.00,0.000,255.0,172.666667,172.666667,63.0,69.333333,69.333333,243.0,144.333333,144.333333,79.0,82.333333,82.333333,0.0,0.00,0.000,3.0,3.00,3.000,2.0,2.00,2.000,0.0,9.666667,9.666667,32.0,22.00,22.000,21.0,18.333333,18.333333,17.0,15.00,15.000
4,1994,Arizona Cardinals,6,9,October 9,4:00PM ET,L,0,1-4,@,Dallas Cowboys,3.0,38.0,10.0,221.0,168.0,53.0,5.0,22.0,351.0,273.0,78.0,0.0,-26.39,-11.70,5.29,AWAY,10,1994,16,1994-10-09 16:00:00,0,1.0,0.25,0.250,200.0,179.500000,179.500000,109.0,79.250000,79.250000,340.0,193.250000,193.250000,18.0,66.250000,66.250000,0.0,0.00,0.000,2.0,2.75,2.750,4.0,2.50,2.500,17.0,11.500000,11.500000,7.0,18.25,18.250,21.0,19.000000,19.000000,19.0,16.00,16.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15315,2022,Washington Commanders,13,4,December 4,1:00PM ET,T,1,7-5-1,@,New York Giants,20.0,20.0,25.0,411.0,246.0,165.0,1.0,20.0,316.0,182.0,134.0,1.0,2.87,2.71,-5.65,AWAY,12,2022,13,2022-12-04 13:00:00,0,1.0,0.75,0.750,138.0,158.250000,190.625000,176.0,154.500000,131.375000,165.0,176.750000,172.500000,167.0,84.500000,106.625000,0.0,0.00,0.000,1.0,1.00,0.875,1.0,2.00,1.625,19.0,22.750000,20.000000,13.0,16.00,16.125,20.0,20.750000,19.500000,18.0,16.00,16.250
15316,2022,Washington Commanders,15,18,December 18,8:15PM ET,L,0,7-6-1,,New York Giants,12.0,20.0,20.0,387.0,228.0,159.0,2.0,19.0,288.0,160.0,128.0,0.0,-3.08,-0.95,-2.25,HOME,12,2022,20,2022-12-18 20:00:00,0,0.0,0.75,0.750,246.0,188.250000,178.625000,165.0,161.500000,146.625000,182.0,161.000000,178.250000,134.0,104.000000,110.250000,1.0,0.25,0.125,1.0,1.00,0.875,1.0,2.00,1.750,20.0,23.500000,20.375000,20.0,16.00,16.000,25.0,22.500000,20.500000,20.0,16.75,16.875
15317,2022,Washington Commanders,16,24,December 24,4:05PM ET,L,0,7-7-1,@,San Francisco 49ers,20.0,37.0,21.0,349.0,270.0,79.0,2.0,14.0,371.0,218.0,153.0,1.0,-2.45,-9.41,-3.02,AWAY,12,2022,16,2022-12-24 16:00:00,0,0.0,0.50,0.625,228.0,200.750000,196.375000,159.0,163.250000,150.500000,160.0,158.500000,179.000000,128.0,112.500000,96.625000,0.0,0.25,0.125,2.0,1.00,1.125,0.0,1.00,1.500,12.0,18.500000,20.375000,20.0,15.75,17.625,20.0,21.250000,21.250000,19.0,17.00,16.750
15318,2022,Washington Commanders,17,1,January 1,1:00PM ET,L,0,7-8-1,,Cleveland Browns,10.0,24.0,17.0,260.0,124.0,136.0,3.0,16.0,301.0,155.0,146.0,0.0,-7.50,-7.26,0.54,HOME,1,2023,13,2023-01-01 13:00:00,0,0.0,0.25,0.500,270.0,220.500000,205.375000,79.0,144.750000,139.625000,218.0,181.250000,182.000000,153.0,145.500000,111.000000,0.0,0.25,0.125,2.0,1.50,1.250,1.0,0.75,1.500,20.0,17.750000,20.000000,37.0,22.50,19.625,21.0,21.500000,21.125000,14.0,17.75,16.500


In [25]:
data.loc[data['team'] == 'Philadelphia Eagles'].tail(20)
# should engineered features (rolling averages) reset after each season???

Unnamed: 0,season,team,week,day,date,time,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,home_or_away,month,year,hour,date_time,win,win_rate_last_1_games,win_rate_last_4_games,win_rate_last_8_games,pass_rate_last_1_games,pass_rate_last_4_games,pass_rate_last_8_games,rush_rate_last_1_games,rush_rate_last_4_games,rush_rate_last_8_games,pass_allowed_rate_last_1_games,pass_allowed_rate_last_4_games,pass_allowed_rate_last_8_games,rush_allowed_rate_last_1_games,rush_allowed_rate_last_4_games,rush_allowed_rate_last_8_games,ot_rate_last_1_games,ot_rate_last_4_games,ot_rate_last_8_games,to_rate_last_1_games,to_rate_last_4_games,to_rate_last_8_games,to_forced_rate_last_1_games,to_forced_rate_last_4_games,to_forced_rate_last_8_games,points_scored_rate_last_1_games,points_scored_rate_last_4_games,points_scored_rate_last_8_games,points_allowed_rate_last_1_games,points_allowed_rate_last_4_games,points_allowed_rate_last_8_games,1st_down_rate_last_1_games,1st_down_rate_last_4_games,1st_down_rate_last_8_games,1st_down_allowed_rate_last_1_games,1st_down_allowed_rate_last_4_games,1st_down_allowed_rate_last_8_games
12365,2022,Philadelphia Eagles,1,11,September 11,1:00PM ET,W,0,1-0,@,Detroit Lions,38.0,35.0,27.0,455.0,239.0,216.0,0.0,23.0,386.0,205.0,181.0,1.0,21.15,-15.67,-0.78,AWAY,9,2022,13,2022-09-11 13:00:00,1,0.0,0.5,0.625,244.0,204.0,199.0,95.0,123.0,170.625,243.0,218.25,208.25,106.0,113.75,95.875,0.0,0.0,0.0,3.0,1.0,1.375,0.0,0.75,0.875,15.0,23.75,25.25,31.0,27.0,23.125,14.0,17.75,20.0,23.0,21.0,19.25
12366,2022,Philadelphia Eagles,2,19,September 19,8:30PM ET,W,0,2-0,,Minnesota Vikings,24.0,7.0,25.0,486.0,323.0,163.0,1.0,20.0,264.0,202.0,62.0,3.0,14.26,7.86,-7.37,HOME,9,2022,20,2022-09-19 20:00:00,1,1.0,0.5,0.625,239.0,215.25,211.625,216.0,144.5,167.375,205.0,242.5,207.125,181.0,138.0,104.875,0.0,0.0,0.0,0.0,1.0,1.25,1.0,0.5,0.625,38.0,24.75,25.0,35.0,33.25,23.875,27.0,20.25,20.75,23.0,23.0,19.875
12367,2022,Philadelphia Eagles,3,25,September 25,1:00PM ET,W,0,3-0,@,Washington Commanders,24.0,8.0,21.0,400.0,328.0,72.0,0.0,20.0,240.0,153.0,87.0,1.0,9.65,18.29,-8.92,AWAY,9,2022,13,2022-09-25 13:00:00,1,1.0,0.5,0.75,323.0,243.0,236.5,163.0,155.75,161.75,202.0,238.5,208.125,62.0,130.0,103.875,0.0,0.0,0.0,1.0,1.25,0.875,3.0,1.0,1.0,24.0,25.75,27.125,7.0,31.0,23.125,25.0,22.0,21.5,20.0,22.0,20.25
12368,2022,Philadelphia Eagles,4,2,October 2,1:00PM ET,W,0,4-0,,Jacksonville Jaguars,29.0,21.0,25.0,401.0,191.0,210.0,1.0,13.0,219.0,148.0,71.0,5.0,-0.18,11.06,-1.97,HOME,10,2022,13,2022-10-02 13:00:00,1,1.0,0.75,0.75,328.0,283.5,248.375,72.0,136.5,147.625,153.0,200.75,200.875,87.0,109.0,106.0,0.0,0.0,0.0,0.0,1.0,0.875,1.0,1.25,1.0,24.0,25.25,26.0,8.0,20.25,21.875,21.0,21.75,20.875,20.0,21.5,20.125
12369,2022,Philadelphia Eagles,5,9,October 9,4:25PM ET,W,0,5-0,@,Arizona Cardinals,20.0,17.0,24.0,357.0,218.0,139.0,0.0,23.0,363.0,239.0,124.0,1.0,10.63,-12.33,5.16,AWAY,10,2022,16,2022-10-09 16:00:00,1,1.0,1.0,0.75,191.0,270.25,237.125,210.0,165.25,144.125,148.0,177.0,197.625,71.0,100.25,107.0,0.0,0.0,0.0,1.0,0.5,0.75,5.0,2.5,1.625,29.0,28.75,26.25,21.0,17.75,22.375,25.0,24.5,21.125,13.0,19.0,20.0
12370,2022,Philadelphia Eagles,6,16,October 16,8:20PM ET,W,0,6-0,,Dallas Cowboys,26.0,17.0,22.0,268.0,132.0,136.0,0.0,21.0,315.0,181.0,134.0,3.0,7.95,0.04,3.76,HOME,10,2022,20,2022-10-16 20:00:00,1,1.0,1.0,0.75,218.0,265.0,240.125,139.0,146.0,145.25,239.0,185.5,214.0,124.0,86.0,112.0,0.0,0.0,0.0,0.0,0.5,0.75,1.0,2.5,1.5,20.0,24.25,24.5,17.0,13.25,23.25,24.0,23.75,22.0,23.0,19.0,21.0
12371,2022,Philadelphia Eagles,8,30,October 30,1:00PM ET,W,0,7-0,,Pittsburgh Steelers,35.0,13.0,20.0,401.0,290.0,111.0,0.0,21.0,302.0,158.0,144.0,2.0,23.31,4.96,-2.01,HOME,10,2022,13,2022-10-30 13:00:00,1,1.0,1.0,0.75,132.0,217.25,230.125,136.0,139.25,147.5,181.0,180.25,209.375,134.0,104.0,117.0,0.0,0.0,0.0,0.0,0.25,0.75,3.0,2.5,1.75,26.0,24.75,25.25,17.0,15.75,23.375,22.0,23.0,22.5,21.0,19.25,20.625
12372,2022,Philadelphia Eagles,9,3,November 3,8:15PM ET,W,0,8-0,@,Houston Texans,29.0,17.0,24.0,360.0,217.0,143.0,1.0,20.0,303.0,135.0,168.0,2.0,17.84,-1.89,-2.38,AWAY,11,2022,20,2022-11-03 20:00:00,1,1.0,1.0,0.875,290.0,207.75,245.625,111.0,149.0,142.75,158.0,181.5,191.125,144.0,118.25,113.625,0.0,0.0,0.0,0.0,0.25,0.625,2.0,2.75,2.0,35.0,27.5,26.375,13.0,17.0,18.625,20.0,22.75,22.25,21.0,19.5,20.5
12373,2022,Philadelphia Eagles,10,14,November 14,8:15PM ET,L,0,8-1,,Washington Commanders,21.0,32.0,18.0,264.0,170.0,94.0,4.0,25.0,330.0,178.0,152.0,2.0,-1.89,-4.13,-5.98,HOME,11,2022,20,2022-11-14 20:00:00,0,1.0,1.0,1.0,217.0,214.25,242.25,143.0,132.25,148.75,135.0,178.25,177.625,168.0,142.5,121.375,0.0,0.0,0.0,1.0,0.25,0.375,2.0,2.0,2.25,29.0,27.5,28.125,17.0,16.0,16.875,24.0,22.5,23.5,20.0,21.25,20.125
12374,2022,Philadelphia Eagles,11,20,November 20,1:00PM ET,W,0,9-1,@,Indianapolis Colts,17.0,16.0,18.0,314.0,173.0,141.0,2.0,14.0,284.0,185.0,99.0,1.0,-3.73,9.33,-1.97,AWAY,11,2022,13,2022-11-20 13:00:00,1,0.0,0.75,0.875,170.0,202.25,233.625,94.0,121.0,133.5,178.0,163.0,174.25,152.0,149.5,117.75,0.0,0.0,0.0,4.0,1.25,0.875,2.0,2.25,2.375,21.0,27.75,26.0,32.0,19.75,16.5,18.0,21.0,22.375,25.0,21.75,20.375


### Reducing the number of rows per game from two to one

In [28]:
print(f'{len(data)=}')

len(data)=15320


In [26]:
columns_to_keep = [
    # these are basically ids
    # we need these columns to join the dataframes for home and away teams
    'season',
    'week',
    'team',
    'opp',
    'date_time',

    # features, aka info we can use to predict the target
    'win_rate_last_1_games', 'win_rate_last_4_games', 'win_rate_last_8_games', 
    'pass_rate_last_1_games', 'pass_rate_last_4_games', 'pass_rate_last_8_games', 
    'rush_rate_last_1_games', 'rush_rate_last_4_games', 'rush_rate_last_8_games', 
    'pass_allowed_rate_last_1_games', 'pass_allowed_rate_last_4_games', 'pass_allowed_rate_last_8_games', 
    'rush_allowed_rate_last_1_games', 'rush_allowed_rate_last_4_games', 'rush_allowed_rate_last_8_games', 
    'ot_rate_last_1_games', 'ot_rate_last_4_games', 'ot_rate_last_8_games', 
    'to_rate_last_1_games', 'to_rate_last_4_games', 'to_rate_last_8_games', 
    'to_forced_rate_last_1_games', 'to_forced_rate_last_4_games', 'to_forced_rate_last_8_games',
    'points_scored_rate_last_1_games', 'points_scored_rate_last_4_games', 'points_scored_rate_last_8_games', 
    'points_allowed_rate_last_1_games', 'points_allowed_rate_last_4_games', 'points_allowed_rate_last_8_games',
    '1st_down_rate_last_1_games', '1st_down_rate_last_4_games', '1st_down_rate_last_8_games', 
    '1st_down_allowed_rate_last_1_games', '1st_down_allowed_rate_last_4_games', '1st_down_allowed_rate_last_8_games',
    
    # target, aka what we want to predict
    'win',
]

home_team_data = data[data['home_or_away'] == 'HOME'][columns_to_keep]
away_team_data = data[data['home_or_away'] == 'AWAY'][columns_to_keep]

game_level_data = home_team_data.merge(
    away_team_data,
    how='right',
    left_on=['opp', 'date_time'],
    right_on=['team', 'date_time'],
    suffixes=('_home', '_away')
)

In [27]:
len(away_team_data)

7660

In [29]:
# we should have 15320/2 = 7660 rows in the game_level_data
# we don't because of SuperBowl games, where the `home_or_away` column is not properly defined
# instead, we get 7631, which has a difference of 29 when subtracted from 7660
# 29 also represents the number of seasons reflected in this dataset so 7631 is expected
# also note that one entire regular season game was canceled last year (bills & bengals 2022 season)
# issue fixed 04/30/2023
print(f'{len(game_level_data)=}')

len(game_level_data)=7660


In [30]:
game_level_data = game_level_data.sort_values(by=['date_time'], ascending=[True], ignore_index=True)#.reset_index(drop=True)

In [32]:
# drop the `win_away` columnn, because it's the same as `win_home`, and would cause data leakage
# also drop 'opp_home' and 'opp_away' columns since they are the same as 'team_home' and 'team_away'
game_level_data.drop(columns=['win_away', 'opp_home', 'opp_away', 'season_away', 'week_away'], inplace=True)

In [33]:
game_level_data.columns

Index(['season_home', 'week_home', 'team_home', 'date_time',
       'win_rate_last_1_games_home', 'win_rate_last_4_games_home',
       'win_rate_last_8_games_home', 'pass_rate_last_1_games_home',
       'pass_rate_last_4_games_home', 'pass_rate_last_8_games_home',
       'rush_rate_last_1_games_home', 'rush_rate_last_4_games_home',
       'rush_rate_last_8_games_home', 'pass_allowed_rate_last_1_games_home',
       'pass_allowed_rate_last_4_games_home',
       'pass_allowed_rate_last_8_games_home',
       'rush_allowed_rate_last_1_games_home',
       'rush_allowed_rate_last_4_games_home',
       'rush_allowed_rate_last_8_games_home', 'ot_rate_last_1_games_home',
       'ot_rate_last_4_games_home', 'ot_rate_last_8_games_home',
       'to_rate_last_1_games_home', 'to_rate_last_4_games_home',
       'to_rate_last_8_games_home', 'to_forced_rate_last_1_games_home',
       'to_forced_rate_last_4_games_home', 'to_forced_rate_last_8_games_home',
       'points_scored_rate_last_1_games_home',
  

In [34]:
game_level_data

Unnamed: 0,season_home,week_home,team_home,date_time,win_rate_last_1_games_home,win_rate_last_4_games_home,win_rate_last_8_games_home,pass_rate_last_1_games_home,pass_rate_last_4_games_home,pass_rate_last_8_games_home,rush_rate_last_1_games_home,rush_rate_last_4_games_home,rush_rate_last_8_games_home,pass_allowed_rate_last_1_games_home,pass_allowed_rate_last_4_games_home,pass_allowed_rate_last_8_games_home,rush_allowed_rate_last_1_games_home,rush_allowed_rate_last_4_games_home,rush_allowed_rate_last_8_games_home,ot_rate_last_1_games_home,ot_rate_last_4_games_home,ot_rate_last_8_games_home,to_rate_last_1_games_home,to_rate_last_4_games_home,to_rate_last_8_games_home,to_forced_rate_last_1_games_home,to_forced_rate_last_4_games_home,to_forced_rate_last_8_games_home,points_scored_rate_last_1_games_home,points_scored_rate_last_4_games_home,points_scored_rate_last_8_games_home,points_allowed_rate_last_1_games_home,points_allowed_rate_last_4_games_home,points_allowed_rate_last_8_games_home,1st_down_rate_last_1_games_home,1st_down_rate_last_4_games_home,1st_down_rate_last_8_games_home,1st_down_allowed_rate_last_1_games_home,1st_down_allowed_rate_last_4_games_home,1st_down_allowed_rate_last_8_games_home,win_home,team_away,win_rate_last_1_games_away,win_rate_last_4_games_away,win_rate_last_8_games_away,pass_rate_last_1_games_away,pass_rate_last_4_games_away,pass_rate_last_8_games_away,rush_rate_last_1_games_away,rush_rate_last_4_games_away,rush_rate_last_8_games_away,pass_allowed_rate_last_1_games_away,pass_allowed_rate_last_4_games_away,pass_allowed_rate_last_8_games_away,rush_allowed_rate_last_1_games_away,rush_allowed_rate_last_4_games_away,rush_allowed_rate_last_8_games_away,ot_rate_last_1_games_away,ot_rate_last_4_games_away,ot_rate_last_8_games_away,to_rate_last_1_games_away,to_rate_last_4_games_away,to_rate_last_8_games_away,to_forced_rate_last_1_games_away,to_forced_rate_last_4_games_away,to_forced_rate_last_8_games_away,points_scored_rate_last_1_games_away,points_scored_rate_last_4_games_away,points_scored_rate_last_8_games_away,points_allowed_rate_last_1_games_away,points_allowed_rate_last_4_games_away,points_allowed_rate_last_8_games_away,1st_down_rate_last_1_games_away,1st_down_rate_last_4_games_away,1st_down_rate_last_8_games_away,1st_down_allowed_rate_last_1_games_away,1st_down_allowed_rate_last_4_games_away,1st_down_allowed_rate_last_8_games_away
0,1994,1,Indianapolis Colts,1994-09-04 13:00:00,,0.333333,0.142857,,177.666667,178.285714,,82.666667,71.571429,,189.666667,205.714286,,180.666667,151.714286,,0.333333,0.142857,,1.000000,2.000000,,2.333333,1.857143,,15.333333,15.428571,,25.000000,26.000000,,15.333333,14.714286,,23.000000,21.285714,1,Tennessee Titans,,0.666667,0.428571,,276.000000,269.714286,,89.333333,85.857143,,246.000000,218.142857,,123.000000,126.571429,,0.333333,0.285714,,1.333333,1.857143,,2.333333,1.428571,,22.000000,18.571429,,23.333333,25.428571,,19.333333,20.285714,,20.000000,20.000000
1,1994,1,Detroit Lions,1994-09-04 13:00:00,,0.333333,0.142857,,188.333333,187.714286,,129.666667,114.000000,,241.666667,244.428571,,92.333333,109.000000,,0.000000,0.142857,,2.666667,1.714286,,1.666667,1.571429,,20.666667,17.857143,,31.000000,26.000000,,20.666667,17.714286,,20.333333,20.000000,1,Atlanta Falcons,,0.000000,0.142857,,194.666667,206.571429,,106.666667,108.428571,,201.000000,213.142857,,138.333333,115.428571,,0.333333,0.142857,,2.000000,1.714286,,1.666667,1.142857,,16.666667,17.714286,,21.000000,24.285714,,17.000000,18.142857,,21.333333,20.428571
2,1994,1,Chicago Bears,1994-09-04 13:00:00,,0.333333,0.571429,,254.666667,186.142857,,138.333333,155.857143,,305.333333,232.571429,,89.333333,98.285714,,0.000000,0.000000,,1.000000,1.000000,,0.666667,1.142857,,25.666667,22.571429,,25.666667,19.857143,,21.333333,19.714286,,23.000000,19.714286,1,Tampa Bay Buccaneers,,0.666667,0.428571,,184.333333,236.571429,,176.000000,114.142857,,182.333333,186.285714,,99.333333,163.571429,,0.333333,0.285714,,1.000000,1.428571,,1.333333,1.142857,,17.333333,21.428571,,15.333333,22.857143,,20.333333,20.000000,,16.000000,19.142857
3,1994,1,Cincinnati Bengals,1994-09-04 13:00:00,,0.000000,0.000000,,83.333333,135.285714,,145.666667,162.428571,,240.000000,224.142857,,210.333333,172.571429,,0.000000,0.000000,,1.666667,1.571429,,2.000000,1.142857,,14.333333,18.000000,,33.666667,31.142857,,11.666667,14.714286,,22.000000,21.714286,0,Cleveland Browns,,1.000000,1.000000,,208.333333,240.571429,,92.666667,98.857143,,248.666667,251.142857,,109.666667,98.714286,,0.000000,0.000000,,0.666667,1.000000,,2.333333,2.000000,,26.000000,26.285714,,14.333333,16.857143,,21.000000,22.857143,,20.666667,20.000000
4,1994,1,New Orleans Saints,1994-09-04 13:00:00,,0.333333,0.428571,,169.666667,211.142857,,114.333333,92.857143,,275.666667,229.285714,,85.333333,88.142857,,0.000000,0.000000,,0.666667,0.428571,,2.000000,1.428571,,21.666667,19.714286,,24.333333,20.857143,,16.000000,15.714286,,22.333333,19.571429,0,Kansas City Chiefs,,1.000000,0.857143,,214.666667,244.571429,,101.666667,114.142857,,203.666667,225.571429,,99.333333,107.857143,,0.000000,0.142857,,2.666667,1.857143,,1.000000,1.571429,,27.333333,27.285714,,16.333333,21.142857,,19.000000,19.714286,,16.666667,19.571429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7655,2022,Division,Buffalo Bills,2023-01-22 15:00:00,1.0,1.000000,1.000000,316.0,255.250000,224.625000,107.0,150.250000,146.250000,189.0,192.250000,216.250000,42.0,104.250000,91.125000,0.0,0.000000,0.000000,3.0,2.500000,1.500000,2.0,1.750000,1.500000,34.0,34.000000,29.875000,31.0,24.000000,20.750000,25.0,23.000000,22.250000,16.0,16.750000,18.625000,0,Cincinnati Bengals,1.0,1.000000,1.000000,183.0,234.500000,257.000000,51.0,58.000000,86.250000,209.0,253.250000,253.375000,155.0,104.500000,99.000000,0.0,0.000000,0.000000,1.0,1.500000,1.125000,2.0,2.750000,1.625000,24.0,26.750000,26.750000,17.0,18.500000,19.250000,18.0,20.250000,21.625000,23.0,20.750000,19.125000
7656,2022,Division,San Francisco 49ers,2023-01-22 18:00:00,1.0,1.000000,1.000000,324.0,242.000000,228.125000,181.0,168.250000,158.625000,228.0,264.250000,248.625000,104.0,94.750000,76.750000,0.0,0.250000,0.125000,0.0,0.500000,0.500000,2.0,2.500000,2.500000,41.0,38.250000,31.875000,23.0,22.500000,15.875000,24.0,21.500000,21.250000,22.0,20.750000,18.125000,1,Dallas Cowboys,1.0,0.750000,0.750000,297.0,248.250000,242.375000,128.0,98.500000,133.000000,334.0,266.750000,250.500000,52.0,96.750000,111.125000,0.0,0.000000,0.125000,0.0,1.500000,1.750000,1.0,2.000000,2.250000,31.0,26.000000,30.875000,14.0,21.750000,23.625000,26.0,21.750000,22.625000,24.0,20.500000,20.375000
7657,2022,Conf. Champ.,Philadelphia Eagles,2023-01-29 15:00:00,1.0,0.500000,0.750000,148.0,239.000000,246.500000,268.0,139.250000,169.000000,109.0,188.000000,172.750000,118.0,122.750000,120.500000,0.0,0.000000,0.000000,0.0,1.500000,1.250000,1.0,0.750000,0.875000,38.0,26.000000,31.500000,7.0,20.750000,21.000000,26.0,21.250000,23.375000,13.0,18.750000,17.250000,1,San Francisco 49ers,1.0,1.000000,1.000000,199.0,237.250000,225.375000,113.0,158.250000,160.750000,206.0,248.250000,249.750000,76.0,94.000000,78.375000,0.0,0.250000,0.125000,1.0,0.500000,0.625000,2.0,2.500000,2.500000,19.0,33.750000,32.625000,12.0,20.500000,17.375000,21.0,23.250000,21.250000,15.0,19.250000,18.250000
7658,2022,Conf. Champ.,Kansas City Chiefs,2023-01-29 18:00:00,1.0,1.000000,0.875000,218.0,236.750000,266.625000,144.0,108.750000,121.000000,205.0,193.750000,184.375000,144.0,123.250000,120.125000,0.0,0.000000,0.125000,0.0,0.500000,1.250000,2.0,1.750000,1.500000,27.0,27.250000,27.875000,20.0,16.750000,19.500000,23.0,20.000000,22.750000,20.0,20.250000,19.375000,1,Cincinnati Bengals,1.0,1.000000,1.000000,240.0,248.500000,243.750000,172.0,87.750000,100.000000,261.0,242.500000,254.875000,64.0,97.500000,94.250000,0.0,0.000000,0.000000,0.0,1.250000,0.875000,1.0,2.000000,1.750000,27.0,25.000000,25.500000,10.0,15.250000,16.750000,30.0,22.750000,22.625000,19.0,19.250000,19.375000


In [42]:
# verifying the data types in the df
game_level_data.dtypes

season_home                                         int64
week_home                                          object
team_home                                          object
date_time                                  datetime64[ns]
win_rate_last_1_games_home                        float64
                                                ...      
1st_down_rate_last_4_games_away                   float64
1st_down_rate_last_8_games_away                   float64
1st_down_allowed_rate_last_1_games_away           float64
1st_down_allowed_rate_last_4_games_away           float64
1st_down_allowed_rate_last_8_games_away           float64
Length: 78, dtype: object

In [37]:
game_level_data.loc[game_level_data['team_home'] == 'Kansas City Chiefs'].tail(21)

Unnamed: 0,season_home,week_home,team_home,date_time,win_rate_last_1_games_home,win_rate_last_4_games_home,win_rate_last_8_games_home,pass_rate_last_1_games_home,pass_rate_last_4_games_home,pass_rate_last_8_games_home,rush_rate_last_1_games_home,rush_rate_last_4_games_home,rush_rate_last_8_games_home,pass_allowed_rate_last_1_games_home,pass_allowed_rate_last_4_games_home,pass_allowed_rate_last_8_games_home,rush_allowed_rate_last_1_games_home,rush_allowed_rate_last_4_games_home,rush_allowed_rate_last_8_games_home,ot_rate_last_1_games_home,ot_rate_last_4_games_home,ot_rate_last_8_games_home,to_rate_last_1_games_home,to_rate_last_4_games_home,to_rate_last_8_games_home,to_forced_rate_last_1_games_home,to_forced_rate_last_4_games_home,to_forced_rate_last_8_games_home,points_scored_rate_last_1_games_home,points_scored_rate_last_4_games_home,points_scored_rate_last_8_games_home,points_allowed_rate_last_1_games_home,points_allowed_rate_last_4_games_home,points_allowed_rate_last_8_games_home,1st_down_rate_last_1_games_home,1st_down_rate_last_4_games_home,1st_down_rate_last_8_games_home,1st_down_allowed_rate_last_1_games_home,1st_down_allowed_rate_last_4_games_home,1st_down_allowed_rate_last_8_games_home,win_home,team_away,win_rate_last_1_games_away,win_rate_last_4_games_away,win_rate_last_8_games_away,pass_rate_last_1_games_away,pass_rate_last_4_games_away,pass_rate_last_8_games_away,rush_rate_last_1_games_away,rush_rate_last_4_games_away,rush_rate_last_8_games_away,pass_allowed_rate_last_1_games_away,pass_allowed_rate_last_4_games_away,pass_allowed_rate_last_8_games_away,rush_allowed_rate_last_1_games_away,rush_allowed_rate_last_4_games_away,rush_allowed_rate_last_8_games_away,ot_rate_last_1_games_away,ot_rate_last_4_games_away,ot_rate_last_8_games_away,to_rate_last_1_games_away,to_rate_last_4_games_away,to_rate_last_8_games_away,to_forced_rate_last_1_games_away,to_forced_rate_last_4_games_away,to_forced_rate_last_8_games_away,points_scored_rate_last_1_games_away,points_scored_rate_last_4_games_away,points_scored_rate_last_8_games_away,points_allowed_rate_last_1_games_away,points_allowed_rate_last_4_games_away,points_allowed_rate_last_8_games_away,1st_down_rate_last_1_games_away,1st_down_rate_last_4_games_away,1st_down_rate_last_8_games_away,1st_down_allowed_rate_last_1_games_away,1st_down_allowed_rate_last_4_games_away,1st_down_allowed_rate_last_8_games_away
7169,2021,5,Kansas City Chiefs,2021-10-10 20:00:00,1.0,0.5,0.5,271.0,297.25,286.125,200.0,130.25,114.5,358.0,291.75,259.125,103.0,146.0,138.125,0.0,0.0,0.0,1.0,1.75,1.5,0.0,1.0,0.875,42.0,33.5,28.0,30.0,31.25,29.375,31.0,26.75,25.0,30.0,26.5,25.75,0,Buffalo Bills,1.0,0.75,0.75,251.0,258.75,266.875,199.0,145.25,114.5,61.0,148.75,225.375,48.0,68.0,96.125,0.0,0.0,0.0,1.0,1.0,0.875,5.0,2.75,2.125,40.0,33.5,32.25,0.0,11.0,16.875,26.0,24.5,23.125,6.0,12.0,18.75
7212,2021,8,Kansas City Chiefs,2021-11-01 20:00:00,0.0,0.5,0.375,257.0,297.5,293.875,77.0,126.5,116.75,266.0,280.25,265.625,103.0,105.25,130.875,0.0,0.0,0.0,3.0,2.75,2.375,1.0,0.75,0.875,3.0,24.0,24.625,27.0,27.0,29.25,22.0,27.75,26.125,24.0,22.25,23.875,1,New York Giants,1.0,0.5,0.375,199.0,274.0,254.375,103.0,79.75,95.875,117.0,225.0,237.625,56.0,139.5,123.375,0.0,0.25,0.125,0.0,1.75,1.375,1.0,1.5,1.25,25.0,20.75,20.25,3.0,26.5,24.875,21.0,20.75,20.75,11.0,20.0,21.625
7224,2021,9,Kansas City Chiefs,2021-11-07 16:00:00,1.0,0.5,0.5,261.0,295.0,296.125,107.0,103.25,116.75,228.0,247.75,269.75,72.0,97.5,121.75,0.0,0.0,0.0,2.0,3.0,2.375,1.0,1.0,1.0,20.0,18.5,26.0,17.0,23.75,27.5,29.0,27.25,27.0,18.0,19.25,22.875,1,Green Bay Packers,1.0,1.0,0.875,184.0,233.25,229.375,151.0,123.75,108.125,260.0,224.0,216.75,74.0,128.0,115.0,0.0,0.25,0.125,0.0,0.5,0.75,3.0,2.0,1.75,24.0,24.25,24.0,21.0,16.75,20.875,24.0,21.5,21.125,22.0,21.5,21.25
7251,2021,11,Kansas City Chiefs,2021-11-21 16:00:00,1.0,0.75,0.625,422.0,275.0,285.5,94.0,88.75,121.25,249.0,230.5,256.5,50.0,86.75,92.75,0.0,0.0,0.0,1.0,1.5,2.25,2.0,1.5,1.0,41.0,19.25,24.25,14.0,16.25,22.0,29.0,23.5,27.0,15.0,19.0,20.25,1,Dallas Cowboys,1.0,0.75,0.875,317.0,328.75,282.25,114.0,98.0,149.5,111.0,180.0,237.0,103.0,128.5,107.375,0.0,0.25,0.125,1.0,1.75,1.375,3.0,1.25,1.625,43.0,28.5,31.875,3.0,19.5,20.5,22.0,22.75,24.125,11.0,17.0,18.5
7283,2021,13,Kansas City Chiefs,2021-12-05 20:00:00,1.0,1.0,0.75,244.0,271.75,284.625,126.0,101.0,113.75,194.0,212.5,246.375,82.0,81.5,93.375,0.0,0.0,0.0,2.0,1.25,2.0,3.0,2.0,1.375,19.0,23.25,23.625,9.0,11.75,19.375,22.0,23.5,25.625,16.0,17.0,19.625,1,Denver Broncos,1.0,0.75,0.375,155.0,193.5,210.125,147.0,129.0,110.125,285.0,224.25,247.375,72.0,119.5,124.375,0.0,0.0,0.0,1.0,0.75,1.25,2.0,1.75,1.0,28.0,22.0,19.0,13.0,17.25,21.25,23.0,20.0,19.0,20.0,19.25,19.375
7291,2021,14,Kansas City Chiefs,2021-12-12 13:00:00,1.0,1.0,0.75,178.0,251.0,273.0,89.0,96.5,99.875,250.0,218.0,232.875,154.0,102.0,99.75,0.0,0.0,0.0,1.0,1.0,2.0,3.0,2.5,1.75,22.0,23.75,21.125,9.0,9.75,16.75,15.0,20.0,23.625,22.0,18.0,18.625,1,Las Vegas Raiders,0.0,0.25,0.375,234.0,263.75,274.0,76.0,85.25,91.75,186.0,277.5,231.125,112.0,107.25,121.0,0.0,0.25,0.125,0.0,1.0,1.125,1.0,0.75,1.25,15.0,19.5,21.25,17.0,30.75,26.5,22.0,19.5,19.625,23.0,24.5,22.5
7326,2021,16,Kansas City Chiefs,2021-12-26 16:00:00,1.0,1.0,0.875,410.0,268.0,271.5,86.0,108.25,98.5,236.0,231.5,231.0,192.0,118.0,102.375,1.0,0.25,0.125,2.0,1.25,1.375,2.0,3.25,2.375,34.0,30.75,25.0,28.0,13.75,15.0,26.0,21.0,22.25,28.0,21.25,20.125,1,Pittsburgh Steelers,1.0,0.5,0.5,133.0,222.0,225.625,35.0,69.25,87.125,117.0,181.0,207.875,201.0,187.0,171.0,0.0,0.0,0.125,0.0,1.0,1.0,4.0,2.0,1.5,19.0,19.25,21.75,13.0,27.25,25.375,12.0,16.75,19.0,22.0,22.75,22.5
7367,2021,Wild Card,Kansas City Chiefs,2022-01-16 20:00:00,1.0,0.75,0.875,255.0,294.5,282.75,135.0,125.75,118.0,173.0,249.25,242.0,191.0,143.25,112.875,0.0,0.25,0.125,0.0,0.5,0.75,1.0,1.5,2.375,28.0,32.25,32.375,24.0,24.0,17.125,28.0,25.5,23.625,18.0,22.25,20.125,1,Pittsburgh Steelers,1.0,0.75,0.5,235.0,162.5,206.25,79.0,108.5,91.375,132.0,160.5,202.875,249.0,167.5,172.0,1.0,0.25,0.125,1.0,1.25,1.125,3.0,2.25,1.75,16.0,17.75,20.75,13.0,19.0,26.625,19.0,17.75,18.5,20.0,20.5,23.0
7372,2021,Division,Kansas City Chiefs,2022-01-23 18:00:00,1.0,0.75,0.875,372.0,285.0,276.5,106.0,130.75,119.5,201.0,240.5,236.0,56.0,109.25,113.625,0.0,0.0,0.125,2.0,0.5,0.875,1.0,1.25,2.25,42.0,34.25,32.5,21.0,22.25,18.0,26.0,25.5,23.25,19.0,20.0,20.625,1,Buffalo Bills,1.0,1.0,0.75,308.0,248.5,232.375,174.0,172.75,149.375,216.0,132.25,146.125,89.0,95.5,117.0,0.0,0.0,0.125,0.0,0.75,1.0,2.0,1.25,1.0,47.0,34.0,29.375,17.0,15.75,16.25,29.0,27.5,24.375,20.0,15.5,16.25
7373,2021,Conf. Champ.,Kansas City Chiefs,2022-01-30 15:00:00,1.0,0.75,0.875,370.0,314.0,292.25,182.0,144.5,126.5,313.0,275.5,250.875,109.0,104.0,117.0,1.0,0.25,0.25,0.0,0.5,0.625,0.0,0.5,1.875,42.0,35.75,35.375,36.0,28.75,21.375,30.0,26.75,24.25,23.0,20.75,21.5,0,Cincinnati Bengals,1.0,0.75,0.625,280.0,255.75,278.5,65.0,71.75,82.375,213.0,231.25,239.75,140.0,150.75,119.25,0.0,0.0,0.125,1.0,0.25,0.875,3.0,1.75,1.5,19.0,23.75,24.5,16.0,21.75,23.125,17.0,17.25,19.625,16.0,21.5,20.625


In [None]:
# super bowl games are not being included in the final df because both teams are being designated as the home team
# this is happening because we used '@' to determine away teams with everything else considered to be the home team
# this doesn't take into account for 'N' values in the @ column which designateds a neutral field
# even season yr = AFC team is the home team for the SB, odd season yr = NFC team is the home team for the SB
# the game is played on neutral ground where the location (typically a new stadium) is determined years in advance
# issue fixed 04/30/2023, utilized np.where to create conditions

In [None]:
# Questions - 


## should engineered features (rolling averages) reset after each season?

## do you think this dataset is ready to be prepared for modeling? 
## do you have any suggestions on how to convert categorical data types?

##


In [39]:
from src.data_preparation import export_transformed_data_to_csv
export_transformed_data_to_csv(game_level_data)