# 02 Data Prep

In [77]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [78]:
# common imports
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt 
# import seaborn as sns
# import sys
# from datetime import datetime
# import sklearn

# will display all the columns in the df moving forward
pd.set_option('display.max_columns', 500)

In [79]:
from src.data_preparation import load_scrapped_data_from_disk
data = load_scrapped_data_from_disk(file_name='scraped_data.csv')

In [80]:
from src.data_preparation import fix_opponent_names
data = fix_opponent_names(data)

In [81]:
from src.data_preparation import map_team_abbreviations_to_names
data = map_team_abbreviations_to_names(data)

In [82]:
from src.data_preparation import add_home_or_away_column
data = add_home_or_away_column(data)

In [83]:
from src.data_preparation import add_datetime_column
data = add_datetime_column(data)

## Feature engineering using lagged statistics

In [88]:
from src.data_preparation import add_win_rates_last_n_matches
data = add_win_rates_last_n_matches(data, n_matches=[1, 5, 10])
data.head(10)

Unnamed: 0,season,team,week,day,date,time,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,home_or_away,year,month,hour,date_time,win,win_rate_last_1_matches,win_rate_last_5_matches,win_rate_last_10_matches
0,1994,Arizona Cardinals,1,4,September 4,4:00PM ET,L,,0-1,@,Los Angeles Rams,12.0,14.0,23.0,234.0,128.0,106.0,3.0,9.0,152.0,102.0,50.0,2.0,-15.09,17.92,1.36,AWAY,1994,9,16,1994-09-04 16:00:00,0,,,
1,1994,Arizona Cardinals,2,11,September 11,8:00PM ET,L,,0-2,,New York Giants,17.0,20.0,11.0,174.0,135.0,39.0,3.0,19.0,206.0,88.0,118.0,2.0,-17.99,1.7,6.1,HOME,1994,9,20,1994-09-11 20:00:00,0,0.0,0.0,0.0
2,1994,Arizona Cardinals,3,18,September 18,1:00PM ET,L,,0-3,@,Cleveland Browns,0.0,32.0,21.0,318.0,255.0,63.0,3.0,17.0,322.0,243.0,79.0,2.0,-23.88,-2.64,1.52,AWAY,1994,9,13,1994-09-18 13:00:00,0,0.0,0.0,0.0
3,1994,Arizona Cardinals,5,2,October 2,4:00PM ET,W,,1-3,,Minnesota Vikings,17.0,7.0,21.0,309.0,200.0,109.0,2.0,19.0,358.0,340.0,18.0,4.0,0.47,13.72,2.86,HOME,1994,10,16,1994-10-02 16:00:00,1,0.0,0.0,0.0
4,1994,Arizona Cardinals,6,9,October 9,4:00PM ET,L,,1-4,@,Dallas Cowboys,3.0,38.0,10.0,221.0,168.0,53.0,5.0,22.0,351.0,273.0,78.0,,-26.39,-11.7,5.29,AWAY,1994,10,16,1994-10-09 16:00:00,0,1.0,0.25,0.25
5,1994,Arizona Cardinals,7,16,October 16,1:00PM ET,W,OT,2-4,@,Washington Commanders,19.0,16.0,19.0,324.0,173.0,151.0,3.0,11.0,234.0,149.0,85.0,5.0,-20.42,30.17,-2.17,AWAY,1994,10,13,1994-10-16 13:00:00,1,0.0,0.2,0.2
6,1994,Arizona Cardinals,8,23,October 23,4:00PM ET,L,,2-5,,Dallas Cowboys,21.0,28.0,22.0,315.0,208.0,107.0,,14.0,312.0,237.0,75.0,,3.98,-10.71,3.24,HOME,1994,10,16,1994-10-23 16:00:00,0,1.0,0.4,0.333333
7,1994,Arizona Cardinals,9,30,October 30,8:00PM ET,W,OT,3-5,,Pittsburgh Steelers,20.0,17.0,16.0,335.0,236.0,99.0,1.0,12.0,317.0,232.0,85.0,3.0,2.29,6.86,-3.33,HOME,1994,10,20,1994-10-30 20:00:00,1,0.0,0.4,0.285714
8,1994,Arizona Cardinals,10,6,November 6,4:00PM ET,L,,3-6,@,Philadelphia Eagles,7.0,17.0,21.0,254.0,181.0,73.0,2.0,18.0,322.0,172.0,150.0,1.0,-6.64,-1.13,-2.76,AWAY,1994,11,16,1994-11-06 16:00:00,0,1.0,0.6,0.375
9,1994,Arizona Cardinals,11,13,November 13,1:00PM ET,W,,4-6,@,New York Giants,10.0,9.0,18.0,239.0,173.0,66.0,1.0,17.0,231.0,81.0,150.0,1.0,-10.2,5.81,5.56,AWAY,1994,11,13,1994-11-13 13:00:00,1,0.0,0.4,0.333333


In [101]:
print(f'{len(data)=}')

len(data)=15080


### Reduce the number of rows per game from two to one

In [103]:
columns_to_keep = [
    # these are basically ids
    # we need these columns to join the dataframes for home and away teams
    'team',
    'opp',
    'date_time',

    # features, aka info we can use to predict the target
    'win_rate_last_1_matches',
    'win_rate_last_5_matches',
    'win_rate_last_10_matches',
    
    # target, aka what we want to predict
    'win',
]

home_team_data = data[data['home_or_away'] == 'HOME'][columns_to_keep]
away_team_data = data[data['home_or_away'] == 'AWAY'][columns_to_keep]

match_level_data = home_team_data.merge(
    away_team_data,
    how='right',
    left_on=['opp', 'date_time'],
    right_on=['team', 'date_time'],
    suffixes=('_home', '_away')
)

# we should have 15080/2 = 7540 rows in the match_level_data
# we don't, probably because of SuperBowl matches, where the `home_or_away` column
# is not properly defined
print(f'{len(match_level_data)=}')

match_level_data.head(10)

len(match_level_data)=7512


Unnamed: 0,team_home,opp_home,date_time,win_rate_last_1_matches_home,win_rate_last_5_matches_home,win_rate_last_10_matches_home,win_home,team_away,opp_away,win_rate_last_1_matches_away,win_rate_last_5_matches_away,win_rate_last_10_matches_away,win_away
0,Los Angeles Rams,Arizona Cardinals,1994-09-04 16:00:00,,0.5,0.555556,1,Arizona Cardinals,Los Angeles Rams,,,,0
1,Cleveland Browns,Arizona Cardinals,1994-09-18 13:00:00,0.0,0.5,0.555556,1,Arizona Cardinals,Cleveland Browns,0.0,0.0,0.0,0
2,Dallas Cowboys,Arizona Cardinals,1994-10-09 16:00:00,1.0,0.6,0.555556,1,Arizona Cardinals,Dallas Cowboys,1.0,0.25,0.25,0
3,Washington Commanders,Arizona Cardinals,1994-10-16 13:00:00,0.0,0.2,0.333333,0,Arizona Cardinals,Washington Commanders,0.0,0.2,0.2,1
4,Philadelphia Eagles,Arizona Cardinals,1994-11-06 16:00:00,1.0,0.8,0.777778,1,Arizona Cardinals,Philadelphia Eagles,1.0,0.6,0.375,0
5,New York Giants,Arizona Cardinals,1994-11-13 13:00:00,0.0,0.0,0.333333,0,Arizona Cardinals,New York Giants,0.0,0.4,0.333333,1
6,Tennessee Titans,Arizona Cardinals,1994-12-04 16:00:00,0.0,0.0,0.1,0,Arizona Cardinals,Tennessee Titans,0.0,0.6,0.5,1
7,Atlanta Falcons,Arizona Cardinals,1994-12-24 13:00:00,0.0,0.2,0.3,1,Arizona Cardinals,Atlanta Falcons,1.0,0.8,0.7,0
8,Washington Commanders,Arizona Cardinals,1995-09-03 16:00:00,1.0,0.2,0.2,1,Arizona Cardinals,Washington Commanders,0.0,0.6,0.6,0
9,Detroit Lions,Arizona Cardinals,1995-09-17 13:00:00,0.0,0.2,0.5,0,Arizona Cardinals,Detroit Lions,0.0,0.4,0.5,1


In [105]:
# drop the `win_away` columnn, because it's the same as `win_home`, and would
# cause data leakage
match_level_data.drop(columns=['win_away'], inplace=True)

In [106]:
match_level_data.columns

Index(['team_home', 'opp_home', 'date_time', 'win_rate_last_1_matches_home',
       'win_rate_last_5_matches_home', 'win_rate_last_10_matches_home',
       'win_home', 'team_away', 'opp_away', 'win_rate_last_1_matches_away',
       'win_rate_last_5_matches_away', 'win_rate_last_10_matches_away'],
      dtype='object')

## Exercise

Generating good features is about leveraging expert knowledge, to build metrics
that carry predictive power.

To practice feature engineering, I suggest you create a few more features, so
you really understand the code I wrote.

For example:
- Can you engineer a few more features, like average `passyd` in the last 3 matches
(if that even makes sense? I am no NFL expert here ;-))


In [107]:
# verifying the data types in the df
match_level_data.dtypes

team_home                                object
opp_home                                 object
date_time                        datetime64[ns]
win_rate_last_1_matches_home            float64
win_rate_last_5_matches_home            float64
win_rate_last_10_matches_home           float64
win_home                                  int64
team_away                                object
opp_away                                 object
win_rate_last_1_matches_away            float64
win_rate_last_5_matches_away            float64
win_rate_last_10_matches_away           float64
dtype: object

### Converting Date and Time columns

In [None]:
# "day" of the week needs to be converted into an integer.
day_dict = {"Tue":1, "Wed":2, "Thu":3, "Fri":4, "Sat":5, "Sun":6, "Mon":7}

# using the manually created day_dictionary to replace the day of the week.
df["day_of_week"] = df["day"].map(day_dict)

In [None]:
df

Unnamed: 0,season,team,week,day,date,time,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,away_t,home_t,day_of_week
0,2022,MIA,1,Sun,September 11,1:00PM ET,W,0,1-0,1,New England Patriots,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0,6.08,8.28,1.98,New England Patriots,MIA,6
10,2022,BUF,1,Thu,September 8,8:20PM ET,W,0,1-0,0,Los Angeles Rams,31.0,10.0,23.0,413.0,292.0,121.0,4.0,19.0,243.0,191.0,52.0,3.0,13.89,10.29,-3.96,BUF,Los Angeles Rams,3
20,2022,NE,1,Sun,September 11,1:00PM ET,L,0,0-1,0,Miami Dolphins,7.0,20.0,17.0,271.0,193.0,78.0,3.0,18.0,307.0,242.0,65.0,0.0,-8.28,-6.08,-1.98,NE,Miami Dolphins,6
30,2022,NYJ,1,Sun,September 11,1:00PM ET,L,0,0-1,1,Baltimore Ravens,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0,-8.07,-3.65,-5.04,Baltimore Ravens,NYJ,6
40,2022,BAL,1,Sun,September 11,1:00PM ET,W,0,1-0,0,New York Jets,24.0,9.0,13.0,274.0,211.0,63.0,1.0,24.0,380.0,297.0,83.0,2.0,3.65,8.07,5.04,BAL,New York Jets,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14796,1994,KC,Wild Card,Sat,December 31,4:00PM ET,L,0,9-8,0,Miami Dolphins,17.0,27.0,24.0,414.0,314.0,100.0,2.0,22.0,381.0,249.0,132.0,0.0,8.31,-15.96,-1.16,KC,Miami Dolphins,5
14943,1994,MIN,Wild Card,Sun,January 1,4:00PM ET,L,0,10-7,1,Chicago Bears,18.0,35.0,22.0,389.0,340.0,49.0,4.0,18.0,308.0,214.0,94.0,2.0,-9.54,-6.07,0.65,Chicago Bears,MIN,6
14960,1994,GB,Wild Card,Sat,December 31,12:30PM ET,W,0,10-7,1,Detroit Lions,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0,-2.93,11.53,0.69,Detroit Lions,GB,5
14978,1994,DET,Wild Card,Sat,December 31,12:30PM ET,L,0,9-8,0,Green Bay Packers,12.0,16.0,9.0,171.0,175.0,-4.0,0.0,18.0,336.0,255.0,81.0,0.0,-11.53,2.93,-0.69,DET,Green Bay Packers,5


In [None]:
# creating a year column out of the season column for future date transformations
df['year'] = df['season'] 

In [None]:
# Split the 'date' column into two new columns: 'month' and 'day'
df[['month', 'day']] = df['date'].str.split(' ', expand=True)

# Convert the 'day' column to integer type
df['day'] = df['day'].astype(int)

In [None]:
df

Unnamed: 0,season,team,week,day,date,time,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,away_t,home_t,day_of_week,year,month
0,2022,MIA,1,11,September 11,1:00PM ET,W,0,1-0,1,New England Patriots,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0,6.08,8.28,1.98,New England Patriots,MIA,6,2022,September
10,2022,BUF,1,8,September 8,8:20PM ET,W,0,1-0,0,Los Angeles Rams,31.0,10.0,23.0,413.0,292.0,121.0,4.0,19.0,243.0,191.0,52.0,3.0,13.89,10.29,-3.96,BUF,Los Angeles Rams,3,2022,September
20,2022,NE,1,11,September 11,1:00PM ET,L,0,0-1,0,Miami Dolphins,7.0,20.0,17.0,271.0,193.0,78.0,3.0,18.0,307.0,242.0,65.0,0.0,-8.28,-6.08,-1.98,NE,Miami Dolphins,6,2022,September
30,2022,NYJ,1,11,September 11,1:00PM ET,L,0,0-1,1,Baltimore Ravens,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0,-8.07,-3.65,-5.04,Baltimore Ravens,NYJ,6,2022,September
40,2022,BAL,1,11,September 11,1:00PM ET,W,0,1-0,0,New York Jets,24.0,9.0,13.0,274.0,211.0,63.0,1.0,24.0,380.0,297.0,83.0,2.0,3.65,8.07,5.04,BAL,New York Jets,6,2022,September
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14796,1994,KC,Wild Card,31,December 31,4:00PM ET,L,0,9-8,0,Miami Dolphins,17.0,27.0,24.0,414.0,314.0,100.0,2.0,22.0,381.0,249.0,132.0,0.0,8.31,-15.96,-1.16,KC,Miami Dolphins,5,1994,December
14943,1994,MIN,Wild Card,1,January 1,4:00PM ET,L,0,10-7,1,Chicago Bears,18.0,35.0,22.0,389.0,340.0,49.0,4.0,18.0,308.0,214.0,94.0,2.0,-9.54,-6.07,0.65,Chicago Bears,MIN,6,1994,January
14960,1994,GB,Wild Card,31,December 31,12:30PM ET,W,0,10-7,1,Detroit Lions,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0,-2.93,11.53,0.69,Detroit Lions,GB,5,1994,December
14978,1994,DET,Wild Card,31,December 31,12:30PM ET,L,0,9-8,0,Green Bay Packers,12.0,16.0,9.0,171.0,175.0,-4.0,0.0,18.0,336.0,255.0,81.0,0.0,-11.53,2.93,-0.69,DET,Green Bay Packers,5,1994,December


In [None]:
# converted to vectorized solution instead ^
# date need to be converted into a numerical value (month and day_of_month)
###c = []
###v = []
###for i in df['date']:
###    c.append((i.split(' ')[0]))
###    v.append(int(i.split(' ')[1]))
###df = df.assign(month = c)
###df = df.assign(day = v)

In [None]:
# "month of the year needs to be converted into an integer. NFL games start in August (preseason) and end in February (super bowl).
month_dict = {"January":1, "February":2, "March":3, "April":4, "May":5, "June":6, 
            "July":7, "August":8, "September":9, "October":10, "November":11, "December":12}

# using the manually created day_dictionary to replace the day of the week.
df = df.replace({"month": month_dict}) 

In [None]:
# we can drop the date column after converting it
df = df.drop('date', axis=1)

In [None]:
# Extract the hour from the 'time' column
df['hour'] = df['time'].str[0].astype(int)

# Add 12 to the 'hour' column for times in the PM
df['hour'] += df['time'].str.contains('PM').astype(int) * 12

In [None]:
# converted into vectorized solution ^
# time needs to be converted into a numerical value
###b = []
###for i in df['time']:
###    if 'PM' in i:
###        b.append(int(i[0]) + 12)
###    else:
###        b.append(int(i[0]))
###df = df.assign(hour = b)

In [None]:
df

Unnamed: 0,season,team,week,day,time,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,away_t,home_t,day_of_week,year,month,hour
0,2022,MIA,1,11,1:00PM ET,W,0,1-0,1,New England Patriots,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0,6.08,8.28,1.98,New England Patriots,MIA,6,2022,9,13
10,2022,BUF,1,8,8:20PM ET,W,0,1-0,0,Los Angeles Rams,31.0,10.0,23.0,413.0,292.0,121.0,4.0,19.0,243.0,191.0,52.0,3.0,13.89,10.29,-3.96,BUF,Los Angeles Rams,3,2022,9,20
20,2022,NE,1,11,1:00PM ET,L,0,0-1,0,Miami Dolphins,7.0,20.0,17.0,271.0,193.0,78.0,3.0,18.0,307.0,242.0,65.0,0.0,-8.28,-6.08,-1.98,NE,Miami Dolphins,6,2022,9,13
30,2022,NYJ,1,11,1:00PM ET,L,0,0-1,1,Baltimore Ravens,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0,-8.07,-3.65,-5.04,Baltimore Ravens,NYJ,6,2022,9,13
40,2022,BAL,1,11,1:00PM ET,W,0,1-0,0,New York Jets,24.0,9.0,13.0,274.0,211.0,63.0,1.0,24.0,380.0,297.0,83.0,2.0,3.65,8.07,5.04,BAL,New York Jets,6,2022,9,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14796,1994,KC,Wild Card,31,4:00PM ET,L,0,9-8,0,Miami Dolphins,17.0,27.0,24.0,414.0,314.0,100.0,2.0,22.0,381.0,249.0,132.0,0.0,8.31,-15.96,-1.16,KC,Miami Dolphins,5,1994,12,16
14943,1994,MIN,Wild Card,1,4:00PM ET,L,0,10-7,1,Chicago Bears,18.0,35.0,22.0,389.0,340.0,49.0,4.0,18.0,308.0,214.0,94.0,2.0,-9.54,-6.07,0.65,Chicago Bears,MIN,6,1994,1,16
14960,1994,GB,Wild Card,31,12:30PM ET,W,0,10-7,1,Detroit Lions,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0,-2.93,11.53,0.69,Detroit Lions,GB,5,1994,12,13
14978,1994,DET,Wild Card,31,12:30PM ET,L,0,9-8,0,Green Bay Packers,12.0,16.0,9.0,171.0,175.0,-4.0,0.0,18.0,336.0,255.0,81.0,0.0,-11.53,2.93,-0.69,DET,Green Bay Packers,5,1994,12,13


In [None]:
# we can drop the time column after converting it
df = df.drop('time', axis=1)

In [None]:
# creating an a datetime type column
df['date'] = pd.to_datetime(df[['year', 'month', 'day', 'hour']])

In [None]:
df

Unnamed: 0,season,team,week,day,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,away_t,home_t,day_of_week,year,month,hour,date
0,2022,MIA,1,11,W,0,1-0,1,New England Patriots,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0,6.08,8.28,1.98,New England Patriots,MIA,6,2022,9,13,2022-09-11 13:00:00
10,2022,BUF,1,8,W,0,1-0,0,Los Angeles Rams,31.0,10.0,23.0,413.0,292.0,121.0,4.0,19.0,243.0,191.0,52.0,3.0,13.89,10.29,-3.96,BUF,Los Angeles Rams,3,2022,9,20,2022-09-08 20:00:00
20,2022,NE,1,11,L,0,0-1,0,Miami Dolphins,7.0,20.0,17.0,271.0,193.0,78.0,3.0,18.0,307.0,242.0,65.0,0.0,-8.28,-6.08,-1.98,NE,Miami Dolphins,6,2022,9,13,2022-09-11 13:00:00
30,2022,NYJ,1,11,L,0,0-1,1,Baltimore Ravens,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0,-8.07,-3.65,-5.04,Baltimore Ravens,NYJ,6,2022,9,13,2022-09-11 13:00:00
40,2022,BAL,1,11,W,0,1-0,0,New York Jets,24.0,9.0,13.0,274.0,211.0,63.0,1.0,24.0,380.0,297.0,83.0,2.0,3.65,8.07,5.04,BAL,New York Jets,6,2022,9,13,2022-09-11 13:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14796,1994,KC,Wild Card,31,L,0,9-8,0,Miami Dolphins,17.0,27.0,24.0,414.0,314.0,100.0,2.0,22.0,381.0,249.0,132.0,0.0,8.31,-15.96,-1.16,KC,Miami Dolphins,5,1994,12,16,1994-12-31 16:00:00
14943,1994,MIN,Wild Card,1,L,0,10-7,1,Chicago Bears,18.0,35.0,22.0,389.0,340.0,49.0,4.0,18.0,308.0,214.0,94.0,2.0,-9.54,-6.07,0.65,Chicago Bears,MIN,6,1994,1,16,1994-01-01 16:00:00
14960,1994,GB,Wild Card,31,W,0,10-7,1,Detroit Lions,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0,-2.93,11.53,0.69,Detroit Lions,GB,5,1994,12,13,1994-12-31 13:00:00
14978,1994,DET,Wild Card,31,L,0,9-8,0,Green Bay Packers,12.0,16.0,9.0,171.0,175.0,-4.0,0.0,18.0,336.0,255.0,81.0,0.0,-11.53,2.93,-0.69,DET,Green Bay Packers,5,1994,12,13,1994-12-31 13:00:00


In [None]:
# verifying the data types in the df
df.dtypes

season                        int64
team                         object
week                         object
day                           int64
result                       object
ot                            int64
record                       object
@                             int64
opp                          object
points_scored               float64
points_allowed              float64
1st_downs                   float64
totyd                       float64
passyd                      float64
rushyd                      float64
to                          float64
1st_downs_allowed           float64
totyd_allowed               float64
passyd_allowed              float64
rushyd_allowed              float64
to_forced                   float64
off_exp_pts                 float64
def_exp_pts                 float64
sts_exp_pts                 float64
away_t                       object
home_t                       object
day_of_week                   int64
year                        

In [None]:
# we now have to create columns so that it is home_team_pts_scored, home_team_pts_allowed, etc.
# since we created home/away team columns, the stats for those games need to be converted accordingly
# those stats were based on the initial data source where we had two records for each game
# creating columns containing game stats from the home_team's point of view
ht_pts_scored = []
ht_pts_allowed = [] 
ht_1st_downs = []
ht_1st_downs_allowed = []
ht_totyd = []
ht_passyd = []
ht_rushyd = [] 
ht_to = []
ht_totyd_allowed = []
ht_passyd_allowed = []
ht_rushyd_allowed = []
ht_to_forced = []

counter = 0

for i in df['team']:
    if df['team'].iloc[counter] == df['home_t'].iloc[counter]:
        ht_pts_scored.append(df['points_scored'].iloc[counter])
        ht_pts_allowed.append(df['points_allowed'].iloc[counter])
        ht_1st_downs.append(df['1st_downs'].iloc[counter])
        ht_1st_downs_allowed.append(df['1st_downs_allowed'].iloc[counter])
        ht_totyd.append(df['totyd'].iloc[counter])
        ht_totyd_allowed.append(df['totyd_allowed'].iloc[counter])
        ht_passyd.append(df['passyd'].iloc[counter])
        ht_passyd_allowed.append(df['passyd_allowed'].iloc[counter])
        ht_rushyd.append(df['rushyd'].iloc[counter])
        ht_rushyd_allowed.append(df['rushyd_allowed'].iloc[counter])
        ht_to.append(df['to'].iloc[counter])
        ht_to_forced.append(df['to_forced'].iloc[counter])
    else:
        ht_pts_scored.append(df['points_allowed'].iloc[counter])
        ht_pts_allowed.append(df['points_scored'].iloc[counter])
        ht_1st_downs.append(df['1st_downs_allowed'].iloc[counter])
        ht_1st_downs_allowed.append(df['1st_downs'].iloc[counter])
        ht_totyd.append(df['totyd_allowed'].iloc[counter])
        ht_totyd_allowed.append(df['totyd'].iloc[counter])
        ht_passyd.append(df['passyd_allowed'].iloc[counter])
        ht_passyd_allowed.append(df['passyd'].iloc[counter])
        ht_rushyd.append(df['rushyd_allowed'].iloc[counter])
        ht_rushyd_allowed.append(df['rushyd'].iloc[counter])
        ht_to.append(df['to_forced'].iloc[counter])
        ht_to_forced.append(df['to'].iloc[counter])
    counter += 1

In [None]:
df

Unnamed: 0,season,team,week,day,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,away_t,home_t,day_of_week,year,month,hour,date
0,2022,MIA,1,11,W,0,1-0,1,New England Patriots,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0,6.08,8.28,1.98,New England Patriots,MIA,6,2022,9,13,2022-09-11 13:00:00
10,2022,BUF,1,8,W,0,1-0,0,Los Angeles Rams,31.0,10.0,23.0,413.0,292.0,121.0,4.0,19.0,243.0,191.0,52.0,3.0,13.89,10.29,-3.96,BUF,Los Angeles Rams,3,2022,9,20,2022-09-08 20:00:00
20,2022,NE,1,11,L,0,0-1,0,Miami Dolphins,7.0,20.0,17.0,271.0,193.0,78.0,3.0,18.0,307.0,242.0,65.0,0.0,-8.28,-6.08,-1.98,NE,Miami Dolphins,6,2022,9,13,2022-09-11 13:00:00
30,2022,NYJ,1,11,L,0,0-1,1,Baltimore Ravens,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0,-8.07,-3.65,-5.04,Baltimore Ravens,NYJ,6,2022,9,13,2022-09-11 13:00:00
40,2022,BAL,1,11,W,0,1-0,0,New York Jets,24.0,9.0,13.0,274.0,211.0,63.0,1.0,24.0,380.0,297.0,83.0,2.0,3.65,8.07,5.04,BAL,New York Jets,6,2022,9,13,2022-09-11 13:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14796,1994,KC,Wild Card,31,L,0,9-8,0,Miami Dolphins,17.0,27.0,24.0,414.0,314.0,100.0,2.0,22.0,381.0,249.0,132.0,0.0,8.31,-15.96,-1.16,KC,Miami Dolphins,5,1994,12,16,1994-12-31 16:00:00
14943,1994,MIN,Wild Card,1,L,0,10-7,1,Chicago Bears,18.0,35.0,22.0,389.0,340.0,49.0,4.0,18.0,308.0,214.0,94.0,2.0,-9.54,-6.07,0.65,Chicago Bears,MIN,6,1994,1,16,1994-01-01 16:00:00
14960,1994,GB,Wild Card,31,W,0,10-7,1,Detroit Lions,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0,-2.93,11.53,0.69,Detroit Lions,GB,5,1994,12,13,1994-12-31 13:00:00
14978,1994,DET,Wild Card,31,L,0,9-8,0,Green Bay Packers,12.0,16.0,9.0,171.0,175.0,-4.0,0.0,18.0,336.0,255.0,81.0,0.0,-11.53,2.93,-0.69,DET,Green Bay Packers,5,1994,12,13,1994-12-31 13:00:00


In [None]:
ht_pts_scored[:5]

[20.0, 10.0, 20.0, 9.0, 9.0]

In [None]:
df = df.assign(ht_pts_scored = ht_pts_scored)
df = df.assign(ht_pts_allowed = ht_pts_allowed)
df = df.assign(ht_1st_downs = ht_1st_downs)
df = df.assign(ht_totyd = ht_totyd)
df = df.assign(ht_passyd = ht_passyd)
df = df.assign(ht_rushyd = ht_rushyd)
df = df.assign(ht_to = ht_to)
df = df.assign(ht_1st_downs_allowed = ht_1st_downs_allowed)
df = df.assign(ht_totyd_allowed = ht_totyd_allowed)
df = df.assign(ht_passyd_allowed = ht_passyd_allowed)
df = df.assign(ht_rushyd_allowed = ht_rushyd_allowed)
df = df.assign(ht_to_forced = ht_to_forced)

In [None]:
df

Unnamed: 0,season,team,week,day,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,away_t,home_t,day_of_week,year,month,hour,date,ht_pts_scored,ht_pts_allowed,ht_1st_downs,ht_totyd,ht_passyd,ht_rushyd,ht_to,ht_1st_downs_allowed,ht_totyd_allowed,ht_passyd_allowed,ht_rushyd_allowed,ht_to_forced
0,2022,MIA,1,11,W,0,1-0,1,New England Patriots,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0,6.08,8.28,1.98,New England Patriots,MIA,6,2022,9,13,2022-09-11 13:00:00,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0
10,2022,BUF,1,8,W,0,1-0,0,Los Angeles Rams,31.0,10.0,23.0,413.0,292.0,121.0,4.0,19.0,243.0,191.0,52.0,3.0,13.89,10.29,-3.96,BUF,Los Angeles Rams,3,2022,9,20,2022-09-08 20:00:00,10.0,31.0,19.0,243.0,191.0,52.0,3.0,23.0,413.0,292.0,121.0,4.0
20,2022,NE,1,11,L,0,0-1,0,Miami Dolphins,7.0,20.0,17.0,271.0,193.0,78.0,3.0,18.0,307.0,242.0,65.0,0.0,-8.28,-6.08,-1.98,NE,Miami Dolphins,6,2022,9,13,2022-09-11 13:00:00,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0
30,2022,NYJ,1,11,L,0,0-1,1,Baltimore Ravens,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0,-8.07,-3.65,-5.04,Baltimore Ravens,NYJ,6,2022,9,13,2022-09-11 13:00:00,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0
40,2022,BAL,1,11,W,0,1-0,0,New York Jets,24.0,9.0,13.0,274.0,211.0,63.0,1.0,24.0,380.0,297.0,83.0,2.0,3.65,8.07,5.04,BAL,New York Jets,6,2022,9,13,2022-09-11 13:00:00,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14796,1994,KC,Wild Card,31,L,0,9-8,0,Miami Dolphins,17.0,27.0,24.0,414.0,314.0,100.0,2.0,22.0,381.0,249.0,132.0,0.0,8.31,-15.96,-1.16,KC,Miami Dolphins,5,1994,12,16,1994-12-31 16:00:00,27.0,17.0,22.0,381.0,249.0,132.0,0.0,24.0,414.0,314.0,100.0,2.0
14943,1994,MIN,Wild Card,1,L,0,10-7,1,Chicago Bears,18.0,35.0,22.0,389.0,340.0,49.0,4.0,18.0,308.0,214.0,94.0,2.0,-9.54,-6.07,0.65,Chicago Bears,MIN,6,1994,1,16,1994-01-01 16:00:00,18.0,35.0,22.0,389.0,340.0,49.0,4.0,18.0,308.0,214.0,94.0,2.0
14960,1994,GB,Wild Card,31,W,0,10-7,1,Detroit Lions,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0,-2.93,11.53,0.69,Detroit Lions,GB,5,1994,12,13,1994-12-31 13:00:00,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0
14978,1994,DET,Wild Card,31,L,0,9-8,0,Green Bay Packers,12.0,16.0,9.0,171.0,175.0,-4.0,0.0,18.0,336.0,255.0,81.0,0.0,-11.53,2.93,-0.69,DET,Green Bay Packers,5,1994,12,13,1994-12-31 13:00:00,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0


In [None]:
# we probably need to develop 'home_team_record' and 'away_team_record' columns, but it might be too difficult to engineer


### Creating the Target feature

In [None]:
# Create the conditions
conditions = [
    (df['team'] == df['home_t']) & (df['result'] == 'W'),
    (df['team'] != df['home_t']) & (df['result'] == 'L')
]

# Create the choices
choices = [1, 0]

# Use np.select to create the 'home_team_wins' column
df['home_team_wins'] = np.select(conditions, choices)

In [None]:
# created a vectorized solution instead ^
# we have to create a target feature 'home_team_wins', where 0 = home team did not win, 1 = home team won
###ht_wins = []
###counter = 0
###
###for i in df['result']:
###    if df['team'].iloc[counter] == df['home_t'].iloc[counter]:
###        if i == 'W':
###            ht_wins.append(1)
###        else:
###            ht_wins.append(0)
###        counter += 1
###    else:
###        if i == 'W':
###            ht_wins.append(0)
###        else:
###            ht_wins.append(1)
###        counter += 1   

###df = df.assign(ht_wins = ht_wins)

In [None]:
df

Unnamed: 0,season,team,week,day,result,ot,record,@,opp,points_scored,points_allowed,1st_downs,totyd,passyd,rushyd,to,1st_downs_allowed,totyd_allowed,passyd_allowed,rushyd_allowed,to_forced,off_exp_pts,def_exp_pts,sts_exp_pts,away_t,home_t,day_of_week,year,month,hour,date,ht_pts_scored,ht_pts_allowed,ht_1st_downs,ht_totyd,ht_passyd,ht_rushyd,ht_to,ht_1st_downs_allowed,ht_totyd_allowed,ht_passyd_allowed,ht_rushyd_allowed,ht_to_forced,home_team_wins
0,2022,MIA,1,11,W,0,1-0,1,New England Patriots,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0,6.08,8.28,1.98,New England Patriots,MIA,6,2022,9,13,2022-09-11 13:00:00,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0,1
10,2022,BUF,1,8,W,0,1-0,0,Los Angeles Rams,31.0,10.0,23.0,413.0,292.0,121.0,4.0,19.0,243.0,191.0,52.0,3.0,13.89,10.29,-3.96,BUF,Los Angeles Rams,3,2022,9,20,2022-09-08 20:00:00,10.0,31.0,19.0,243.0,191.0,52.0,3.0,23.0,413.0,292.0,121.0,4.0,0
20,2022,NE,1,11,L,0,0-1,0,Miami Dolphins,7.0,20.0,17.0,271.0,193.0,78.0,3.0,18.0,307.0,242.0,65.0,0.0,-8.28,-6.08,-1.98,NE,Miami Dolphins,6,2022,9,13,2022-09-11 13:00:00,20.0,7.0,18.0,307.0,242.0,65.0,0.0,17.0,271.0,193.0,78.0,3.0,0
30,2022,NYJ,1,11,L,0,0-1,1,Baltimore Ravens,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0,-8.07,-3.65,-5.04,Baltimore Ravens,NYJ,6,2022,9,13,2022-09-11 13:00:00,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0,0
40,2022,BAL,1,11,W,0,1-0,0,New York Jets,24.0,9.0,13.0,274.0,211.0,63.0,1.0,24.0,380.0,297.0,83.0,2.0,3.65,8.07,5.04,BAL,New York Jets,6,2022,9,13,2022-09-11 13:00:00,9.0,24.0,24.0,380.0,297.0,83.0,2.0,13.0,274.0,211.0,63.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14796,1994,KC,Wild Card,31,L,0,9-8,0,Miami Dolphins,17.0,27.0,24.0,414.0,314.0,100.0,2.0,22.0,381.0,249.0,132.0,0.0,8.31,-15.96,-1.16,KC,Miami Dolphins,5,1994,12,16,1994-12-31 16:00:00,27.0,17.0,22.0,381.0,249.0,132.0,0.0,24.0,414.0,314.0,100.0,2.0,0
14943,1994,MIN,Wild Card,1,L,0,10-7,1,Chicago Bears,18.0,35.0,22.0,389.0,340.0,49.0,4.0,18.0,308.0,214.0,94.0,2.0,-9.54,-6.07,0.65,Chicago Bears,MIN,6,1994,1,16,1994-01-01 16:00:00,18.0,35.0,22.0,389.0,340.0,49.0,4.0,18.0,308.0,214.0,94.0,2.0,0
14960,1994,GB,Wild Card,31,W,0,10-7,1,Detroit Lions,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0,-2.93,11.53,0.69,Detroit Lions,GB,5,1994,12,13,1994-12-31 13:00:00,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0,1
14978,1994,DET,Wild Card,31,L,0,9-8,0,Green Bay Packers,12.0,16.0,9.0,171.0,175.0,-4.0,0.0,18.0,336.0,255.0,81.0,0.0,-11.53,2.93,-0.69,DET,Green Bay Packers,5,1994,12,13,1994-12-31 13:00:00,16.0,12.0,18.0,336.0,255.0,81.0,0.0,9.0,171.0,175.0,-4.0,0.0,0


In [None]:
df.dtypes

season                           int64
team                            object
week                            object
day                              int64
result                          object
ot                               int64
record                          object
@                                int64
opp                             object
points_scored                  float64
points_allowed                 float64
1st_downs                      float64
totyd                          float64
passyd                         float64
rushyd                         float64
to                             float64
1st_downs_allowed              float64
totyd_allowed                  float64
passyd_allowed                 float64
rushyd_allowed                 float64
to_forced                      float64
off_exp_pts                    float64
def_exp_pts                    float64
sts_exp_pts                    float64
away_t                          object
home_t                   

### Dropping unnecessary columns 

In [None]:
df = df.drop(['record', 'team', 'result', '@', 'opp', 'points_scored', 'points_allowed', '1st_downs', 
              'totyd', 'passyd', 'rushyd', 'to', '1st_downs_allowed', 'totyd_allowed', 'passyd_allowed', 
              'rushyd_allowed', 'to_forced', 'off_exp_pts', 'def_exp_pts', 'sts_exp_pts', 'year'], axis=1)

In [None]:
df.dtypes

season                           int64
week                            object
day                              int64
ot                               int64
away_t                          object
home_t                          object
day_of_week                      int64
month                            int64
hour                             int64
date                    datetime64[ns]
ht_pts_scored                  float64
ht_pts_allowed                 float64
ht_1st_downs                   float64
ht_totyd                       float64
ht_passyd                      float64
ht_rushyd                      float64
ht_to                          float64
ht_1st_downs_allowed           float64
ht_totyd_allowed               float64
ht_passyd_allowed              float64
ht_rushyd_allowed              float64
ht_to_forced                   float64
home_team_wins                   int64
dtype: object

### Feature Engineering

In [None]:
# the goal is to obtain a new column that contains rolling_averages for a team over the last four games.
# should the end of a season reset the rolling percentage??? Yes
# creating a df that is organized at the team and season level
grouped_teams = df.groupby("home_t")

In [None]:
# viewing one instance
group_arz = grouped_teams.get_group("Arizona Cardinals")
group_arz

Unnamed: 0,season,week,day,ot,away_t,home_t,day_of_week,month,hour,date,ht_pts_scored,ht_pts_allowed,ht_1st_downs,ht_totyd,ht_passyd,ht_rushyd,ht_to,ht_1st_downs_allowed,ht_totyd_allowed,ht_passyd_allowed,ht_rushyd_allowed,ht_to_forced,home_team_wins
121,2022,1,11,0,KC,Arizona Cardinals,6,9,16,2022-09-11 16:00:00,21.0,44.0,18.0,282.0,179.0,103.0,0.0,33.0,488.0,360.0,128.0,1.0,0
296,2022,11,21,0,SF,Arizona Cardinals,7,11,20,2022-11-21 20:00:00,10.0,38.0,19.0,314.0,247.0,67.0,2.0,21.0,387.0,228.0,159.0,0.0,0
320,2022,3,25,0,LAR,Arizona Cardinals,6,9,16,2022-09-25 16:00:00,12.0,20.0,23.0,365.0,295.0,70.0,0.0,15.0,339.0,239.0,100.0,1.0,0
165,2022,5,9,0,PHI,Arizona Cardinals,6,10,16,2022-10-09 16:00:00,17.0,20.0,23.0,363.0,239.0,124.0,1.0,24.0,357.0,218.0,139.0,0.0,0
271,2022,7,20,0,NO,Arizona Cardinals,3,10,20,2022-10-20 20:00:00,42.0,34.0,21.0,326.0,189.0,137.0,0.0,25.0,494.0,409.0,85.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14743,1994,16,18,0,CIN,Arizona Cardinals,6,12,16,1994-12-18 16:00:00,28.0,7.0,24.0,364.0,212.0,152.0,0.0,12.0,189.0,125.0,64.0,3.0,0
14864,1994,2,11,0,NYG,Arizona Cardinals,6,9,20,1994-09-11 20:00:00,17.0,20.0,11.0,174.0,135.0,39.0,3.0,19.0,206.0,88.0,118.0,2.0,0
14931,1994,5,2,0,MIN,Arizona Cardinals,6,10,16,1994-10-02 16:00:00,17.0,7.0,21.0,309.0,200.0,109.0,2.0,19.0,358.0,340.0,18.0,4.0,0
14851,1994,8,23,0,DAL,Arizona Cardinals,6,10,16,1994-10-23 16:00:00,21.0,28.0,22.0,315.0,208.0,107.0,0.0,14.0,312.0,237.0,75.0,0.0,0


def rolling_averages(group, cols, new_cols):
    group = group.sort_values("date")
    rolling_stats = group[cols].rolling(4).mean() # closed='left' inside rolling parameters
    # the problem is that it is using the fourth games' info to predict the fourth game and beyond.
    # closed='left' would solve this problem, but that requires datetime. How do I resolve this???
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols)
    return group

In [None]:
def rolling_averages(group, cols, new_cols):
    """
    Compute rolling averages of the given columns for a pandas DataFrame grouped by some categorical variable.

    Parameters:
    group (pandas.DataFrame): The DataFrame to group by.
    cols (list of str): The names of the columns to compute rolling averages for.
    new_cols (list of str): The names of the new columns to create with the rolling averages.

    Returns:
    pandas.DataFrame: The original DataFrame with the new columns added, and rows with missing values dropped.
    """
    group = group.sort_values("date")
    rolling_stats = group[cols].rolling(4).mean() # Use the last 4 rows of each group to compute the rolling average.
    # Note that by default, rolling() uses a "right closed" window, meaning that the last value in the window is included.
    # We want to use a "left closed" window, meaning that the first value in the window is included, so we pass the parameter
    # closed='left' to rolling(). However, this requires that the 'date' column be of type datetime, so make sure to convert
    # it beforehand if it isn't already.
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols) # Drop rows with missing values in the new columns.
    return group

In [None]:
# creating a list of the columns I plan on finding the rolling averages for
cols = ["home_team_wins", "ht_pts_scored", "ht_pts_allowed",
        "ht_totyd", "ht_to", "ht_totyd_allowed", "ht_to_forced",
        "ht_passyd", "ht_rushyd", "ht_passyd_allowed", "ht_rushyd_allowed",
        "ht_1st_downs", "ht_1st_downs_allowed", "ot"]

new_cols = [f"{c}_rolling" for c in cols]

In [None]:
# first four weeks are being dropped because of na values from rolling(4)
# should we bring datetime back??
# testing the function on one group (arz)
rolling_averages(group_arz, cols, new_cols)

Unnamed: 0,season,week,day,ot,away_t,home_t,day_of_week,month,hour,date,ht_pts_scored,ht_pts_allowed,ht_1st_downs,ht_totyd,ht_passyd,ht_rushyd,ht_to,ht_1st_downs_allowed,ht_totyd_allowed,ht_passyd_allowed,ht_rushyd_allowed,ht_to_forced,home_team_wins,home_team_wins_rolling,ht_pts_scored_rolling,ht_pts_allowed_rolling,ht_totyd_rolling,ht_to_rolling,ht_totyd_allowed_rolling,ht_to_forced_rolling,ht_passyd_rolling,ht_rushyd_rolling,ht_passyd_allowed_rolling,ht_rushyd_allowed_rolling,ht_1st_downs_rolling,ht_1st_downs_allowed_rolling,ot_rolling
14700,1994,9,30,1,PIT,Arizona Cardinals,6,10,20,1994-10-30 20:00:00,20.0,17.0,16.0,335.0,236.0,99.0,1.0,12.0,317.0,232.0,85.0,3.0,0,0.0,18.75,18.00,283.25,1.50,298.25,2.25,194.75,88.50,224.25,74.00,17.50,16.00,0.25
14905,1994,12,20,0,PHI,Arizona Cardinals,6,11,16,1994-11-20 16:00:00,12.0,6.0,16.0,281.0,123.0,158.0,1.0,14.0,185.0,110.0,75.0,2.0,0,0.0,17.50,14.50,310.00,1.00,293.00,2.25,191.75,118.25,229.75,63.25,18.75,14.75,0.25
14990,1994,13,27,1,CHI,Arizona Cardinals,6,11,16,1994-11-27 16:00:00,16.0,19.0,15.0,244.0,177.0,67.0,1.0,20.0,318.0,186.0,132.0,2.0,0,0.0,17.25,17.50,293.75,0.75,283.00,1.75,186.00,107.75,191.25,91.75,17.25,15.00,0.50
14924,1994,15,11,0,WAS,Arizona Cardinals,6,12,16,1994-12-11 16:00:00,17.0,15.0,14.0,278.0,194.0,84.0,1.0,19.0,406.0,283.0,123.0,2.0,0,0.0,16.25,14.25,284.50,1.00,306.50,2.25,182.50,102.00,202.75,103.75,15.25,16.25,0.50
14743,1994,16,18,0,CIN,Arizona Cardinals,6,12,16,1994-12-18 16:00:00,28.0,7.0,24.0,364.0,212.0,152.0,0.0,12.0,189.0,125.0,64.0,3.0,0,0.0,18.25,11.75,291.75,0.75,274.50,2.25,176.50,115.25,176.00,98.50,17.25,16.25,0.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
320,2022,3,25,0,LAR,Arizona Cardinals,6,9,16,2022-09-25 16:00:00,12.0,20.0,23.0,365.0,295.0,70.0,0.0,15.0,339.0,239.0,100.0,1.0,0,0.0,18.00,29.00,368.00,0.50,382.25,0.50,265.75,102.25,271.50,110.75,20.75,21.50,0.00
165,2022,5,9,0,PHI,Arizona Cardinals,6,10,16,2022-10-09 16:00:00,17.0,20.0,23.0,363.0,239.0,124.0,1.0,24.0,357.0,218.0,139.0,0.0,0,0.0,16.50,26.50,347.00,0.25,382.50,0.50,239.50,107.50,259.25,123.25,21.00,23.00,0.00
271,2022,7,20,0,NO,Arizona Cardinals,3,10,20,2022-10-20 20:00:00,42.0,34.0,21.0,326.0,189.0,137.0,0.0,25.0,494.0,409.0,85.0,3.0,0,0.0,23.00,29.50,334.00,0.25,419.50,1.25,225.50,108.50,306.50,113.00,21.25,24.25,0.00
305,2022,9,6,0,SEA,Arizona Cardinals,6,11,16,2022-11-06 16:00:00,21.0,31.0,15.0,262.0,140.0,122.0,1.0,27.0,421.0,263.0,158.0,1.0,0,0.0,23.00,26.25,329.00,0.50,402.75,1.25,215.75,113.25,282.25,120.50,20.50,22.75,0.00


In [None]:
#apply the new rolling_averages columns to each group
df_rolling = df.groupby(["season", "week"]).apply(lambda x: rolling_averages(x, cols, new_cols))

In [None]:
df_rolling

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,season,week,day,ot,away_t,home_t,day_of_week,month,hour,date,ht_pts_scored,ht_pts_allowed,ht_1st_downs,ht_totyd,ht_passyd,ht_rushyd,ht_to,ht_1st_downs_allowed,ht_totyd_allowed,ht_passyd_allowed,ht_rushyd_allowed,ht_to_forced,home_team_wins,home_team_wins_rolling,ht_pts_scored_rolling,ht_pts_allowed_rolling,ht_totyd_rolling,ht_to_rolling,ht_totyd_allowed_rolling,ht_to_forced_rolling,ht_passyd_rolling,ht_rushyd_rolling,ht_passyd_allowed_rolling,ht_rushyd_allowed_rolling,ht_1st_downs_rolling,ht_1st_downs_allowed_rolling,ot_rolling
season,week,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
1994,1,14997,1994,1,4,0,TB,Chicago Bears,6,9,13,1994-09-04 13:00:00,21.0,9.0,17.0,270.0,204.0,66.0,0.0,17.0,304.0,181.0,123.0,0.0,0,0.25,22.50,22.00,291.25,2.25,374.50,0.75,211.50,79.75,240.00,134.50,18.75,23.50,0.00
1994,1,14979,1994,1,4,0,Tampa Bay Buccaneers,CHI,6,9,13,1994-09-04 13:00:00,21.0,9.0,17.0,270.0,204.0,66.0,0.0,17.0,304.0,181.0,123.0,0.0,1,0.50,26.00,17.25,294.25,1.50,360.50,0.75,206.50,87.75,241.25,119.25,18.75,22.00,0.00
1994,1,14962,1994,1,4,1,Atlanta Falcons,DET,6,9,13,1994-09-04 13:00:00,31.0,28.0,21.0,352.0,203.0,149.0,1.0,22.0,389.0,271.0,118.0,0.0,1,0.75,29.50,16.75,295.00,0.75,342.75,0.75,179.25,115.75,232.00,110.75,18.50,20.50,0.25
1994,1,14711,1994,1,4,0,CLE,Cincinnati Bengals,6,9,13,1994-09-04 13:00:00,20.0,28.0,23.0,331.0,213.0,118.0,2.0,16.0,256.0,149.0,107.0,2.0,0,0.50,23.25,18.50,305.75,0.75,313.25,0.50,206.00,99.75,195.50,117.75,19.50,18.00,0.25
1994,1,14729,1994,1,4,0,Cleveland Browns,CIN,6,9,13,1994-09-04 13:00:00,20.0,28.0,23.0,331.0,213.0,118.0,2.0,16.0,256.0,149.0,107.0,2.0,0,0.50,23.00,23.25,321.00,1.25,301.25,1.00,208.25,112.75,187.50,113.75,21.00,17.75,0.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022,9,325,2022,9,6,0,LAR,Tampa Bay Buccaneers,6,11,16,2022-11-06 16:00:00,16.0,13.0,18.0,323.0,272.0,51.0,0.0,9.0,206.0,138.0,68.0,0.0,0,0.25,18.50,22.00,292.50,0.50,313.50,0.50,206.00,86.50,200.50,113.00,16.50,18.00,0.00
2022,9,87,2022,9,6,1,TEN,Kansas City Chiefs,6,11,20,2022-11-06 20:00:00,20.0,17.0,29.0,499.0,422.0,77.0,1.0,9.0,229.0,57.0,172.0,0.0,0,0.25,18.25,18.50,351.75,0.50,265.50,0.25,276.50,75.25,149.00,116.50,20.00,13.50,0.25
2022,9,128,2022,9,6,1,Tennessee Titans,KC,6,11,20,2022-11-06 20:00:00,20.0,17.0,29.0,499.0,422.0,77.0,1.0,9.0,229.0,57.0,172.0,0.0,1,0.25,19.25,19.50,395.75,0.75,271.25,0.25,314.00,81.75,128.75,142.50,22.75,13.50,0.50
2022,9,48,2022,9,7,0,BAL,New Orleans Saints,7,11,20,2022-11-07 20:00:00,13.0,27.0,13.0,243.0,195.0,48.0,1.0,23.0,319.0,131.0,188.0,0.0,0,0.25,17.25,18.50,391.00,0.75,245.75,0.00,327.75,63.25,95.75,150.00,22.25,12.50,0.50


In [None]:
# dropping extra index levels
df_rolling = df_rolling.droplevel('season')
df_rolling = df_rolling.droplevel('week')
df_rolling

Unnamed: 0,season,week,day,ot,away_t,home_t,day_of_week,month,hour,date,ht_pts_scored,ht_pts_allowed,ht_1st_downs,ht_totyd,ht_passyd,ht_rushyd,ht_to,ht_1st_downs_allowed,ht_totyd_allowed,ht_passyd_allowed,ht_rushyd_allowed,ht_to_forced,home_team_wins,home_team_wins_rolling,ht_pts_scored_rolling,ht_pts_allowed_rolling,ht_totyd_rolling,ht_to_rolling,ht_totyd_allowed_rolling,ht_to_forced_rolling,ht_passyd_rolling,ht_rushyd_rolling,ht_passyd_allowed_rolling,ht_rushyd_allowed_rolling,ht_1st_downs_rolling,ht_1st_downs_allowed_rolling,ot_rolling
14997,1994,1,4,0,TB,Chicago Bears,6,9,13,1994-09-04 13:00:00,21.0,9.0,17.0,270.0,204.0,66.0,0.0,17.0,304.0,181.0,123.0,0.0,0,0.25,22.50,22.00,291.25,2.25,374.50,0.75,211.50,79.75,240.00,134.50,18.75,23.50,0.00
14979,1994,1,4,0,Tampa Bay Buccaneers,CHI,6,9,13,1994-09-04 13:00:00,21.0,9.0,17.0,270.0,204.0,66.0,0.0,17.0,304.0,181.0,123.0,0.0,1,0.50,26.00,17.25,294.25,1.50,360.50,0.75,206.50,87.75,241.25,119.25,18.75,22.00,0.00
14962,1994,1,4,1,Atlanta Falcons,DET,6,9,13,1994-09-04 13:00:00,31.0,28.0,21.0,352.0,203.0,149.0,1.0,22.0,389.0,271.0,118.0,0.0,1,0.75,29.50,16.75,295.00,0.75,342.75,0.75,179.25,115.75,232.00,110.75,18.50,20.50,0.25
14711,1994,1,4,0,CLE,Cincinnati Bengals,6,9,13,1994-09-04 13:00:00,20.0,28.0,23.0,331.0,213.0,118.0,2.0,16.0,256.0,149.0,107.0,2.0,0,0.50,23.25,18.50,305.75,0.75,313.25,0.50,206.00,99.75,195.50,117.75,19.50,18.00,0.25
14729,1994,1,4,0,Cleveland Browns,CIN,6,9,13,1994-09-04 13:00:00,20.0,28.0,23.0,331.0,213.0,118.0,2.0,16.0,256.0,149.0,107.0,2.0,0,0.50,23.00,23.25,321.00,1.25,301.25,1.00,208.25,112.75,187.50,113.75,21.00,17.75,0.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,2022,9,6,0,LAR,Tampa Bay Buccaneers,6,11,16,2022-11-06 16:00:00,16.0,13.0,18.0,323.0,272.0,51.0,0.0,9.0,206.0,138.0,68.0,0.0,0,0.25,18.50,22.00,292.50,0.50,313.50,0.50,206.00,86.50,200.50,113.00,16.50,18.00,0.00
87,2022,9,6,1,TEN,Kansas City Chiefs,6,11,20,2022-11-06 20:00:00,20.0,17.0,29.0,499.0,422.0,77.0,1.0,9.0,229.0,57.0,172.0,0.0,0,0.25,18.25,18.50,351.75,0.50,265.50,0.25,276.50,75.25,149.00,116.50,20.00,13.50,0.25
128,2022,9,6,1,Tennessee Titans,KC,6,11,20,2022-11-06 20:00:00,20.0,17.0,29.0,499.0,422.0,77.0,1.0,9.0,229.0,57.0,172.0,0.0,1,0.25,19.25,19.50,395.75,0.75,271.25,0.25,314.00,81.75,128.75,142.50,22.75,13.50,0.50
48,2022,9,7,0,BAL,New Orleans Saints,7,11,20,2022-11-07 20:00:00,13.0,27.0,13.0,243.0,195.0,48.0,1.0,23.0,319.0,131.0,188.0,0.0,0,0.25,17.25,18.50,391.00,0.75,245.75,0.00,327.75,63.25,95.75,150.00,22.25,12.50,0.50


In [None]:
df_rolling.dtypes

season                                   int64
week                                    object
day                                      int64
ot                                       int64
away_t                                  object
home_t                                  object
day_of_week                              int64
month                                    int64
hour                                     int64
date                            datetime64[ns]
ht_pts_scored                          float64
ht_pts_allowed                         float64
ht_1st_downs                           float64
ht_totyd                               float64
ht_passyd                              float64
ht_rushyd                              float64
ht_to                                  float64
ht_1st_downs_allowed                   float64
ht_totyd_allowed                       float64
ht_passyd_allowed                      float64
ht_rushyd_allowed                      float64
ht_to_forced 

In [None]:
from sklearn.preprocessing import OneHotEncoder

#creating instance of one-hot-encoder
encoder = OneHotEncoder(handle_unknown='ignore')

#perform one-hot encoding on 'team' column 
encoder_df = pd.DataFrame(encoder.fit_transform(df_rolling[['away_t', 'home_t']]).toarray())

#merge one-hot encoded columns back with original DataFrame
a_df = df_rolling.join(encoder_df)

#view final df
a_df

#### Getting "NaN" values because the index doesnt match (i think)

Unnamed: 0,season,week,day,ot,away_t,home_t,day_of_week,month,hour,date,ht_pts_scored,ht_pts_allowed,ht_1st_downs,ht_totyd,ht_passyd,ht_rushyd,ht_to,ht_1st_downs_allowed,ht_totyd_allowed,ht_passyd_allowed,ht_rushyd_allowed,ht_to_forced,home_team_wins,home_team_wins_rolling,ht_pts_scored_rolling,ht_pts_allowed_rolling,ht_totyd_rolling,ht_to_rolling,ht_totyd_allowed_rolling,ht_to_forced_rolling,ht_passyd_rolling,ht_rushyd_rolling,ht_passyd_allowed_rolling,ht_rushyd_allowed_rolling,ht_1st_downs_rolling,ht_1st_downs_allowed_rolling,ot_rolling,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143
14997,1994,1,4,0,TB,Chicago Bears,6,9,13,1994-09-04 13:00:00,21.0,9.0,17.0,270.0,204.0,66.0,0.0,17.0,304.0,181.0,123.0,0.0,0,0.25,22.50,22.00,291.25,2.25,374.50,0.75,211.50,79.75,240.00,134.50,18.75,23.50,0.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
14979,1994,1,4,0,Tampa Bay Buccaneers,CHI,6,9,13,1994-09-04 13:00:00,21.0,9.0,17.0,270.0,204.0,66.0,0.0,17.0,304.0,181.0,123.0,0.0,1,0.50,26.00,17.25,294.25,1.50,360.50,0.75,206.50,87.75,241.25,119.25,18.75,22.00,0.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
14962,1994,1,4,1,Atlanta Falcons,DET,6,9,13,1994-09-04 13:00:00,31.0,28.0,21.0,352.0,203.0,149.0,1.0,22.0,389.0,271.0,118.0,0.0,1,0.75,29.50,16.75,295.00,0.75,342.75,0.75,179.25,115.75,232.00,110.75,18.50,20.50,0.25,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
14711,1994,1,4,0,CLE,Cincinnati Bengals,6,9,13,1994-09-04 13:00:00,20.0,28.0,23.0,331.0,213.0,118.0,2.0,16.0,256.0,149.0,107.0,2.0,0,0.50,23.25,18.50,305.75,0.75,313.25,0.50,206.00,99.75,195.50,117.75,19.50,18.00,0.25,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
14729,1994,1,4,0,Cleveland Browns,CIN,6,9,13,1994-09-04 13:00:00,20.0,28.0,23.0,331.0,213.0,118.0,2.0,16.0,256.0,149.0,107.0,2.0,0,0.50,23.00,23.25,321.00,1.25,301.25,1.00,208.25,112.75,187.50,113.75,21.00,17.75,0.25,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,2022,9,6,0,LAR,Tampa Bay Buccaneers,6,11,16,2022-11-06 16:00:00,16.0,13.0,18.0,323.0,272.0,51.0,0.0,9.0,206.0,138.0,68.0,0.0,0,0.25,18.50,22.00,292.50,0.50,313.50,0.50,206.00,86.50,200.50,113.00,16.50,18.00,0.00,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
87,2022,9,6,1,TEN,Kansas City Chiefs,6,11,20,2022-11-06 20:00:00,20.0,17.0,29.0,499.0,422.0,77.0,1.0,9.0,229.0,57.0,172.0,0.0,0,0.25,18.25,18.50,351.75,0.50,265.50,0.25,276.50,75.25,149.00,116.50,20.00,13.50,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
128,2022,9,6,1,Tennessee Titans,KC,6,11,20,2022-11-06 20:00:00,20.0,17.0,29.0,499.0,422.0,77.0,1.0,9.0,229.0,57.0,172.0,0.0,1,0.25,19.25,19.50,395.75,0.75,271.25,0.25,314.00,81.75,128.75,142.50,22.75,13.50,0.50,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48,2022,9,7,0,BAL,New Orleans Saints,7,11,20,2022-11-07 20:00:00,13.0,27.0,13.0,243.0,195.0,48.0,1.0,23.0,319.0,131.0,188.0,0.0,0,0.25,17.25,18.50,391.00,0.75,245.75,0.00,327.75,63.25,95.75,150.00,22.25,12.50,0.50,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
df_rolling

Unnamed: 0,season,week,day,ot,away_t,home_t,day_of_week,month,hour,date,ht_pts_scored,ht_pts_allowed,ht_1st_downs,ht_totyd,ht_passyd,ht_rushyd,ht_to,ht_1st_downs_allowed,ht_totyd_allowed,ht_passyd_allowed,ht_rushyd_allowed,ht_to_forced,home_team_wins,home_team_wins_rolling,ht_pts_scored_rolling,ht_pts_allowed_rolling,ht_totyd_rolling,ht_to_rolling,ht_totyd_allowed_rolling,ht_to_forced_rolling,ht_passyd_rolling,ht_rushyd_rolling,ht_passyd_allowed_rolling,ht_rushyd_allowed_rolling,ht_1st_downs_rolling,ht_1st_downs_allowed_rolling,ot_rolling
14997,1994,1,4,0,TB,Chicago Bears,6,9,13,1994-09-04 13:00:00,21.0,9.0,17.0,270.0,204.0,66.0,0.0,17.0,304.0,181.0,123.0,0.0,0,0.25,22.50,22.00,291.25,2.25,374.50,0.75,211.50,79.75,240.00,134.50,18.75,23.50,0.00
14979,1994,1,4,0,Tampa Bay Buccaneers,CHI,6,9,13,1994-09-04 13:00:00,21.0,9.0,17.0,270.0,204.0,66.0,0.0,17.0,304.0,181.0,123.0,0.0,1,0.50,26.00,17.25,294.25,1.50,360.50,0.75,206.50,87.75,241.25,119.25,18.75,22.00,0.00
14962,1994,1,4,1,Atlanta Falcons,DET,6,9,13,1994-09-04 13:00:00,31.0,28.0,21.0,352.0,203.0,149.0,1.0,22.0,389.0,271.0,118.0,0.0,1,0.75,29.50,16.75,295.00,0.75,342.75,0.75,179.25,115.75,232.00,110.75,18.50,20.50,0.25
14711,1994,1,4,0,CLE,Cincinnati Bengals,6,9,13,1994-09-04 13:00:00,20.0,28.0,23.0,331.0,213.0,118.0,2.0,16.0,256.0,149.0,107.0,2.0,0,0.50,23.25,18.50,305.75,0.75,313.25,0.50,206.00,99.75,195.50,117.75,19.50,18.00,0.25
14729,1994,1,4,0,Cleveland Browns,CIN,6,9,13,1994-09-04 13:00:00,20.0,28.0,23.0,331.0,213.0,118.0,2.0,16.0,256.0,149.0,107.0,2.0,0,0.50,23.00,23.25,321.00,1.25,301.25,1.00,208.25,112.75,187.50,113.75,21.00,17.75,0.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,2022,9,6,0,LAR,Tampa Bay Buccaneers,6,11,16,2022-11-06 16:00:00,16.0,13.0,18.0,323.0,272.0,51.0,0.0,9.0,206.0,138.0,68.0,0.0,0,0.25,18.50,22.00,292.50,0.50,313.50,0.50,206.00,86.50,200.50,113.00,16.50,18.00,0.00
87,2022,9,6,1,TEN,Kansas City Chiefs,6,11,20,2022-11-06 20:00:00,20.0,17.0,29.0,499.0,422.0,77.0,1.0,9.0,229.0,57.0,172.0,0.0,0,0.25,18.25,18.50,351.75,0.50,265.50,0.25,276.50,75.25,149.00,116.50,20.00,13.50,0.25
128,2022,9,6,1,Tennessee Titans,KC,6,11,20,2022-11-06 20:00:00,20.0,17.0,29.0,499.0,422.0,77.0,1.0,9.0,229.0,57.0,172.0,0.0,1,0.25,19.25,19.50,395.75,0.75,271.25,0.25,314.00,81.75,128.75,142.50,22.75,13.50,0.50
48,2022,9,7,0,BAL,New Orleans Saints,7,11,20,2022-11-07 20:00:00,13.0,27.0,13.0,243.0,195.0,48.0,1.0,23.0,319.0,131.0,188.0,0.0,0,0.25,17.25,18.50,391.00,0.75,245.75,0.00,327.75,63.25,95.75,150.00,22.25,12.50,0.50


In [None]:
# using pandas to convert the prepared dataframe into a csv file that is model ready.
df_rolling.to_csv("Data/transformed.csv", index=False)