In [1]:
# import libraries
import pandas as pd
import numpy as np
import datetime

In [2]:
# display max columns to None to see everything and account for potential additions
pd.set_option('display.max_columns', None)
pd.set_option('mode.chained_assignment', None)

# load data
sheet_to_df_map = pd.read_excel('../Data/NFL_Master_Data.xlsx', sheet_name = None)
nfl_run_pass = pd.read_csv('../Data/NFL_Run_Pass_Data_Cleaned.csv')

# concat all sheets into dataframe
nfl = pd.concat(sheet_to_df_map, axis = 0, ignore_index = True)

# quick inspection
nfl.head()

Unnamed: 0,Season,Week,Game,Day,Date,Time,Off Bye,Result,OT,Wins,Loses,H_or_A,Team,Opp,Team_Score,Opp_Score,Team_Off_1stDwn,Team_Off_TotYd,Team_Off_PassY,Team_Off_RushY,Team_Off_TO,Team_Def_1stD,Team_Def_TotYd,Team_Def_PassY,Team_Def_RushY,Team_Def_TO,Offense,Defense,Sp. Tms,Spread,Spread_Outcome,Over/Under,OU Result,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,vs. Line,Over/Under.1,OU Result.1
0,2010,1,1.0,Sun,2020-09-12,1:02PM ET,N,L,,,,,Buffalo Bills,Miami Dolphins,10.0,15.0,9.0,166.0,116.0,50.0,,19.0,296.0,164.0,132.0,,-10.47,1.95,5.53,3.0,not covered,39.0,under,,,,,,,
1,2010,2,2.0,Sun,2020-09-19,1:02PM ET,N,L,,,,@,Buffalo Bills,Green Bay Packers,7.0,34.0,14.0,186.0,62.0,124.0,2.0,18.0,346.0,255.0,91.0,,-15.65,-14.91,1.26,13.0,not covered,43.0,under,,,,,,,
2,2010,3,3.0,Sun,2020-09-26,1:02PM ET,N,L,,,,@,Buffalo Bills,New England Patriots,30.0,38.0,19.0,374.0,240.0,134.0,2.0,25.0,445.0,245.0,200.0,1.0,7.72,-21.52,4.21,14.5,covered,43.0,over,,,,,,,
3,2010,4,4.0,Sun,2020-10-03,1:02PM ET,N,L,,,,,Buffalo Bills,New York Jets,14.0,38.0,12.0,223.0,109.0,114.0,2.0,25.0,444.0,171.0,273.0,,-9.04,-18.28,2.8,6.0,not covered,37.0,over,,,,,,,
4,2010,5,5.0,Sun,2020-10-10,1:02PM ET,N,L,,,,,Buffalo Bills,Jacksonville Jaguars,26.0,36.0,17.0,306.0,196.0,110.0,,21.0,381.0,165.0,216.0,3.0,-1.92,-6.06,-4.76,-2.5,not covered,41.5,over,,,,,,,


In [3]:
nfl_run_pass.head()

Unnamed: 0,team,season,date,week,game,day,team_cmp,team_p_att,team_cmp%,team_p_yards,team_p_td,int,team_sacked,team_qbr,team_r_att,team_r_yards,team_r_yrd_att,team_r_td
0,Arizona Cardinals,2010,2010-09-12,1,1,0,22,41,0.537,266,1,0,2,85.1,21,112,5.333,1
1,Arizona Cardinals,2010,2010-09-19,2,2,0,18,33,0.545,149,0,3,2,30.4,13,118,9.077,1
2,Arizona Cardinals,2010,2010-09-26,3,3,0,12,26,0.462,108,2,1,2,69.7,26,119,4.577,0
3,Arizona Cardinals,2010,2010-10-03,4,4,0,15,28,0.536,87,0,2,9,38.7,14,47,3.357,0
4,Arizona Cardinals,2010,2010-10-10,5,5,0,18,29,0.621,153,0,1,4,64.4,24,41,1.708,0


In [4]:
# focus on main nfl df
# list all columns
nfl.columns

Index(['Season', 'Week', 'Game', 'Day', 'Date', 'Time', 'Off Bye', 'Result',
       'OT', 'Wins', 'Loses', 'H_or_A', 'Team', 'Opp', 'Team_Score',
       'Opp_Score', 'Team_Off_1stDwn', 'Team_Off_TotYd', 'Team_Off_PassY',
       'Team_Off_RushY', 'Team_Off_TO', 'Team_Def_1stD', 'Team_Def_TotYd',
       'Team_Def_PassY', 'Team_Def_RushY', 'Team_Def_TO', 'Offense', 'Defense',
       'Sp. Tms', 'Spread', 'Spread_Outcome', 'Over/Under', 'OU Result',
       'Unnamed: 33', 'Unnamed: 34', 'Unnamed: 35', 'Unnamed: 36', 'vs. Line',
       'Over/Under.1', 'OU Result.1'],
      dtype='object')

In [5]:
print('Length:', len(nfl))

# see if all values in Unnamed: 35 column that are NaN
print('Fields with no values:', nfl['Unnamed: 35'].isna().sum())

Length: 5984
Fields with no values: 5984


In [6]:
# drop unnessary columns with all NaN values
nfl = nfl.drop(['Unnamed: 33', 'Unnamed: 34', 'Unnamed: 35', 
                'Unnamed: 36', 'vs. Line', 'Over/Under.1', 'OU Result.1'], axis = 1)

# drop Offense, Defense, Sp. Teams Columns as they are predicted values and not needed 
nfl = nfl.drop(['Offense', 'Defense', 'Sp. Tms'], axis = 1)

# look at new columns
nfl.columns

Index(['Season', 'Week', 'Game', 'Day', 'Date', 'Time', 'Off Bye', 'Result',
       'OT', 'Wins', 'Loses', 'H_or_A', 'Team', 'Opp', 'Team_Score',
       'Opp_Score', 'Team_Off_1stDwn', 'Team_Off_TotYd', 'Team_Off_PassY',
       'Team_Off_RushY', 'Team_Off_TO', 'Team_Def_1stD', 'Team_Def_TotYd',
       'Team_Def_PassY', 'Team_Def_RushY', 'Team_Def_TO', 'Spread',
       'Spread_Outcome', 'Over/Under', 'OU Result'],
      dtype='object')

In [7]:
# make column headers uniform
nfl.columns = nfl.columns.map(lambda x: x.replace(' ', '_').lower())

In [8]:
nfl.rename(columns = {'off_bye': 'tm_off_bye'}, inplace = True)
nfl.rename(columns = {'loses': 'losses'}, inplace = True)

In [9]:
# look for misspellings, errors in Opp column
print(nfl.opp.sort_values().unique())

print('')

# look for misspellings, errors in Team column
print(nfl.team.sort_values().unique())

['Arizona Cardinals' 'Atlanta Falcons' 'Baltimore Ravens' 'Buffalo Bills'
 'Bye Week' 'Carolina Panthers' 'Chicago Bears' 'Cincinnati Bengals'
 'Cleveland Browns' 'Dallas Cowboys' 'Denver Broncos' 'Detroit Lions'
 'Green Bay Packers' 'Houston Texans' 'Indianapolis Colts'
 'Jacksonville Jaguars' 'Kansas City Chiefs' 'Las Vegas Raiders'
 'Los Angeles Chargers' 'Los Angeles Rams' 'Miami Dolphins'
 'Minnesota Vikings' 'New England Patriots' 'New Orleans Saints'
 'New York Giants' 'New York Jets' 'Oakland Raiders' 'Philadelphia Eagles'
 'Pittsburgh Steelers' 'San Diego Chargers' 'San Francisco 49ers'
 'Seattle Seahawks' 'St. Louis Rams' 'Tampa Bay Buccaneers'
 'Tennessee Titans' 'Washington Football Team' 'Washington Redskins']

['Arizona Cardinals' 'Atlanta Falcons' 'Baltimore Ravens' 'Buffalo Bills'
 'Carolina Panthers' 'Chicago Bears' 'Cincinnati Bengals'
 'Cleveland Browns' 'Dallas Cowboys' 'Denver Broncos' 'Detroit Lions'
 'Green Bay Packers' 'Houston Texans' 'Indianapolis Colts'
 'Jac

In [10]:
# drop rows - Bye Weeks
nfl.drop(index = nfl[nfl['opp'] == 'Bye Week'].index, inplace = True)

# change team names, due to team relocations or offensive name
nfl.opp = np.where((nfl.opp == 'St. Louis Rams'), 'Los Angeles Rams', nfl.opp)
nfl.opp = np.where((nfl.opp == 'San Diego Chargers'), 'Los Angeles Chargers', nfl.opp)
nfl.opp = np.where((nfl.opp == 'Washington Redskins'), 'Washington Football Team', nfl.opp)
nfl.opp = np.where((nfl.opp == 'Oakland Raiders'), 'Las Vegas Raiders', nfl.opp)

# print len 
print(len(nfl))

# make sure number of teams equals number of Opp
print('# of Opp:', len(nfl.opp.unique()))
print('# of Teams:', len(nfl.team.unique()))

nfl = nfl.reset_index(drop = True)

5632
# of Opp: 32
# of Teams: 32


In [11]:
print('Spread Outcomes:', nfl.spread_outcome.unique())
print('OU Results:', nfl['ou_result'].unique())
print('Results:', nfl.result.unique())
print('OT:', nfl.ot.unique())
print('Home or Away:', nfl.h_or_a.unique())
print('Days of Week:', nfl.day.unique())
print('Time of Game:', nfl.time.sort_values().unique())

Spread Outcomes: ['not covered' 'covered' 'push']
OU Results: ['under' 'over' 'push']
Results: ['L' 'W' 'T']
OT: [nan 'OT']
Home or Away: [nan '@']
Days of Week: ['Sun' 'Sat' 'Thu' 'Mon' 'Tue' 'Wed' 'Fri']
Time of Game: ['10:10PM ET' '10:20PM ET' '10:25PM ET' '10:26PM ET' '11:35PM ET'
 '12:30PM ET' '12:35PM ET' '12:36PM ET' '12:38PM ET' '12:39PM ET'
 '12:47PM ET' '12:56PM ET' '1:00PM ET' '1:01PM ET' '1:02PM ET' '1:03PM ET'
 '1:04PM ET' '1:05PM ET' '1:06PM ET' '1:07PM ET' '1:08PM ET' '3:40PM ET'
 '4:04PM ET' '4:05PM ET' '4:06PM ET' '4:07PM ET' '4:08PM ET' '4:10PM ET'
 '4:15PM ET' '4:16PM ET' '4:17PM ET' '4:20PM ET' '4:21PM ET' '4:22PM ET'
 '4:23PM ET' '4:24PM ET' '4:25PM ET' '4:26PM ET' '4:27PM ET' '4:30PM ET'
 '4:31PM ET' '4:36PM ET' '4:41PM ET' '5:00PM ET' '7:00PM ET' '7:05PM ET'
 '7:10PM ET' '7:11PM ET' '7:13PM ET' '7:15PM ET' '7:20PM ET' '7:25PM ET'
 '7:26PM ET' '7:30PM ET' '8:04PM ET' '8:05PM ET' '8:06PM ET' '8:15PM ET'
 '8:20PM ET' '8:25PM ET' '8:26PM ET' '8:27PM ET' '8:28PM ET' '

In [12]:
# converting strings to integers
clean = {'tm_off_bye': {'N': 0, 'Y': 1}, 'result': {'L': 0, 'W': 1, 'T': 2}, 
         'spread_outcome': {'not covered': 0, 'covered': 1, 'push': 2}, 
         'ou_result': {'under': 0, 'over': 1, 'push': 2},
         'day': {'Sun': 0, 'Mon': 1, 'Tue': 2, 'Wed': 3, 'Thu': 4, 'Fri': 5, 'Sat': 6}}

# replace object with integers
nfl.replace(clean, inplace = True)

In [13]:
# convert time to numeric (since time was an odd string and not a timestamp)
day =['12:30PM ET', '12:35PM ET', '12:36PM ET', '12:38PM ET', '12:39PM ET', '12:47PM ET',
      '12:56PM ET', '1:00PM ET', '1:01PM ET', '1:02PM ET', '1:03PM ET', '1:04PM ET', '1:05PM ET',
      '1:06PM ET', '1:07PM ET', '1:08PM ET']

midday = ['3:40PM ET', '4:04PM ET', '4:05PM ET', '4:06PM ET', '4:07PM ET', '4:08PM ET', '4:10PM ET', '4:15PM ET', 
          '4:16PM ET', '4:17PM ET', '4:20PM ET', '4:21PM ET', '4:22PM ET', '4:23PM ET', '4:24PM ET', '4:25PM ET', 
          '4:26PM ET', '4:27PM ET', '4:30PM ET', '4:31PM ET', '4:36PM ET', '4:41PM ET', '5:00PM ET']

night = ['7:00PM ET', '7:05PM ET', '7:10PM ET', '7:11PM ET', '7:13PM ET', '7:15PM ET', '7:20PM ET', '7:25PM ET', 
         '7:26PM ET', '7:30PM ET', '8:04PM ET', '8:05PM ET', '8:06PM ET', '8:15PM ET', '8:20PM ET', '8:25PM ET', 
         '8:26PM ET', '8:27PM ET', '8:28PM ET', '8:29PM ET', '8:30PM ET', '8:31PM ET', '8:32PM ET', '8:33PM ET', 
         '8:35PM ET', '8:36PM ET', '8:40PM ET', '8:41PM ET', '8:42PM ET', '8:43PM ET', '9:00PM ET', '9:13PM ET', 
         '9:15PM ET', '9:20PM ET']

other = ['8:35AM ET', '8:36AM ET', '9:30AM ET', '9:31AM ET', '9:35AM ET', '9:36AM ET', '9:37AM ET', '10:10PM ET',
         '10:20PM ET', '10:25PM ET', '10:26PM ET', '11:35PM ET']


nfl.time = nfl.time.replace(day, 0)
nfl.time = nfl.time.replace(midday, 1)
nfl.time = nfl.time.replace(night, 2)
nfl.time = nfl.time.replace(other, 3)

In [14]:
# convert teams and opp to categorical
nfl.team = nfl.team.astype('category')
nfl.opp = nfl.opp.astype('category')

In [15]:
# fill in 0 for non OT game, 1 for OT game
nfl.ot = nfl.ot.apply(lambda x: 0 if x != 'OT' else 1)

# fill in 0 for home game, 1 for away game
nfl.h_or_a = nfl.h_or_a.apply(lambda x: 0 if x != '@' else 1)

In [16]:
# data was importing wrong year, how date will be used moving forward, this is easiest fix
year = nfl.season.astype(str)
year_n = (nfl.season + 1).astype(str)
nfl['date'] = nfl.date.astype(str).str[5:]
for i in range(len(nfl)):
    org_date = nfl.date[i]
    if not '01-0' in org_date:
        nfl.date[i] = (nfl.date[i] + '-' + year[i])
    else:
        nfl.date[i] = (nfl.date[i] + '-' + year_n[i])
        
nfl.date = pd.to_datetime(nfl.date, format = '%m-%d-%Y')
    
nfl.head(17)

Unnamed: 0,season,week,game,day,date,time,tm_off_bye,result,ot,wins,losses,h_or_a,team,opp,team_score,opp_score,team_off_1stdwn,team_off_totyd,team_off_passy,team_off_rushy,team_off_to,team_def_1std,team_def_totyd,team_def_passy,team_def_rushy,team_def_to,spread,spread_outcome,over/under,ou_result
0,2010,1,1.0,0,2010-09-12,0,0,0,0,,,0,Buffalo Bills,Miami Dolphins,10.0,15.0,9.0,166.0,116.0,50.0,,19.0,296.0,164.0,132.0,,3.0,0,39.0,0
1,2010,2,2.0,0,2010-09-19,0,0,0,0,,,1,Buffalo Bills,Green Bay Packers,7.0,34.0,14.0,186.0,62.0,124.0,2.0,18.0,346.0,255.0,91.0,,13.0,0,43.0,0
2,2010,3,3.0,0,2010-09-26,0,0,0,0,,,1,Buffalo Bills,New England Patriots,30.0,38.0,19.0,374.0,240.0,134.0,2.0,25.0,445.0,245.0,200.0,1.0,14.5,1,43.0,1
3,2010,4,4.0,0,2010-10-03,0,0,0,0,,,0,Buffalo Bills,New York Jets,14.0,38.0,12.0,223.0,109.0,114.0,2.0,25.0,444.0,171.0,273.0,,6.0,0,37.0,1
4,2010,5,5.0,0,2010-10-10,0,0,0,0,,,0,Buffalo Bills,Jacksonville Jaguars,26.0,36.0,17.0,306.0,196.0,110.0,,21.0,381.0,165.0,216.0,3.0,-2.5,0,41.5,1
5,2010,7,6.0,0,2010-10-24,0,1,0,1,,,1,Buffalo Bills,Baltimore Ravens,34.0,37.0,27.0,514.0,382.0,132.0,4.0,21.0,364.0,229.0,135.0,2.0,12.5,1,40.0,1
6,2010,8,7.0,0,2010-10-31,0,0,0,1,,,1,Buffalo Bills,Kansas City Chiefs,10.0,13.0,23.0,328.0,191.0,137.0,1.0,24.0,414.0,140.0,274.0,,7.0,1,46.0,0
7,2010,9,8.0,0,2010-11-07,0,0,0,0,,,0,Buffalo Bills,Chicago Bears,19.0,22.0,22.0,340.0,294.0,46.0,3.0,19.0,283.0,178.0,105.0,1.0,3.0,2,41.5,0
8,2010,10,9.0,0,2010-11-14,0,0,1,0,,,0,Buffalo Bills,Detroit Lions,14.0,12.0,12.0,288.0,139.0,149.0,1.0,19.0,390.0,314.0,76.0,1.0,-1.5,1,45.0,0
9,2010,11,10.0,0,2010-11-21,0,0,1,0,,,1,Buffalo Bills,Cincinnati Bengals,49.0,31.0,23.0,449.0,308.0,141.0,2.0,26.0,361.0,228.0,133.0,3.0,4.0,1,41.0,1


In [17]:
nfl.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5632 entries, 0 to 5631
Data columns (total 30 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   season           5632 non-null   int64         
 1   week             5632 non-null   int64         
 2   game             5632 non-null   float64       
 3   day              5632 non-null   int64         
 4   date             5632 non-null   datetime64[ns]
 5   time             5632 non-null   int64         
 6   tm_off_bye       5632 non-null   int64         
 7   result           5632 non-null   int64         
 8   ot               5632 non-null   int64         
 9   wins             0 non-null      float64       
 10  losses           0 non-null      float64       
 11  h_or_a           5632 non-null   int64         
 12  team             5632 non-null   category      
 13  opp              5632 non-null   category      
 14  team_score       5632 non-null   float64

In [18]:
# convert game to int from float
nfl.game = nfl.game.astype(int)

In [19]:
# merge dataframes
nfl = pd.merge(nfl, nfl_run_pass, how = 'left', on = ['team', 'season', 'game', 'week', 'day'])
nfl.head()

Unnamed: 0,season,week,game,day,date_x,time,tm_off_bye,result,ot,wins,losses,h_or_a,team,opp,team_score,opp_score,team_off_1stdwn,team_off_totyd,team_off_passy,team_off_rushy,team_off_to,team_def_1std,team_def_totyd,team_def_passy,team_def_rushy,team_def_to,spread,spread_outcome,over/under,ou_result,date_y,team_cmp,team_p_att,team_cmp%,team_p_yards,team_p_td,int,team_sacked,team_qbr,team_r_att,team_r_yards,team_r_yrd_att,team_r_td
0,2010,1,1,0,2010-09-12,0,0,0,0,,,0,Buffalo Bills,Miami Dolphins,10.0,15.0,9.0,166.0,116.0,50.0,,19.0,296.0,164.0,132.0,,3.0,0,39.0,0,2010-09-12,18,34,0.529,116,1,0,3,73.0,17,50,2.941,0
1,2010,2,2,0,2010-09-19,0,0,0,0,,,1,Buffalo Bills,Green Bay Packers,7.0,34.0,14.0,186.0,62.0,124.0,2.0,18.0,346.0,255.0,91.0,,13.0,0,43.0,0,2010-09-19,11,18,0.611,62,0,2,4,37.0,32,124,3.875,1
2,2010,3,3,0,2010-09-26,0,0,0,0,,,1,Buffalo Bills,New England Patriots,30.0,38.0,19.0,374.0,240.0,134.0,2.0,25.0,445.0,245.0,200.0,1.0,14.5,1,43.0,1,2010-09-26,20,28,0.714,240,2,2,1,92.4,24,134,5.583,0
3,2010,4,4,0,2010-10-03,0,0,0,0,,,0,Buffalo Bills,New York Jets,14.0,38.0,12.0,223.0,109.0,114.0,2.0,25.0,444.0,171.0,273.0,,6.0,0,37.0,1,2010-10-03,12,27,0.444,109,2,0,3,83.6,16,114,7.125,0
4,2010,5,5,0,2010-10-10,0,0,0,0,,,0,Buffalo Bills,Jacksonville Jaguars,26.0,36.0,17.0,306.0,196.0,110.0,,21.0,381.0,165.0,216.0,3.0,-2.5,0,41.5,1,2010-10-10,20,30,0.667,196,3,0,3,121.5,21,110,5.238,0


In [20]:
# clean up dates and duplicate columns
nfl = nfl.drop(['date_y', 'team_p_yards', 'team_r_yards'], axis = 1)
nfl.rename(columns = {'date_x': 'date'}, inplace = True)

In [21]:
# calculate wins, losses, and ties for each team, each season for each week
nfl.wins = nfl.groupby(['season', 'team'], sort=False)['result'].transform(lambda x: (x==1).expanding().sum())
nfl.losses = nfl.groupby(['season', 'team'], sort=False)['result'].transform(lambda x: (x==0).expanding().sum())

# create column for ties
nfl.insert(loc = 11, column = 'ties',
           value = nfl.groupby(['season', 'team'], sort=False)['result'].transform(lambda x: (x==2).expanding().sum()))

In [22]:
# calculate streak -(winning and losing, ATS, O/U)
def streaks(a, b, c):
    # a = (losing, not cover, under streaks)
    # b = (winning, cover, over streaks)
    # c = (result, spread outcome, ou result)
    for i in range(len(nfl)):
        nfl.a = ''
        nfl.b = ''
        if nfl.loc[i, 'week'] == 1:
            if nfl.loc[i, c] == 0:
                nfl.loc[i, a] = 1
                nfl.loc[i, b] = 0
            elif nfl.loc[i, c] == 1:
                nfl.loc[i, b] = 1
                nfl.loc[i, a] = 0
            else:
                nfl.loc[i, a] = 0
                nfl.loc[i, b] = 0
        else:
            if nfl.loc[i, c] == 0:
                nfl.loc[i, a] = nfl.loc[i-1, a] + 1
                nfl.loc[i, b] = 0
            elif nfl.loc[i, c] == 1:
                nfl.loc[i, b] = nfl.loc[i-1, b] + 1
                nfl.loc[i, a] = 0
            else:
                nfl.loc[i, a] = 0
                nfl.loc[i, b] = 0

In [23]:
# run streaks function to calculate and fill fields
streaks('tm_losing_streak', 'tm_winning_streak', 'result')
streaks('tm_not_cover_streak', 'tm_cover_streak', 'spread_outcome')
streaks('tm_under_streak', 'tm_over_streak', 'ou_result')

In [24]:
# create list off and def columns
off_def_cats = ['team_off_1stdwn', 'team_off_totyd', 'team_off_passy', 'team_off_rushy', 'team_off_to',
                'team_def_1std', 'team_def_totyd', 'team_def_passy', 'team_def_rushy', 'team_def_to']

# fill off_def_cats NaN values with 0 since blank means 0 occured in the game
nfl[off_def_cats] = nfl[off_def_cats].fillna(0)

In [25]:
# weekly team off & def averages
nfl = nfl.assign(tm_off_score_avg = nfl.groupby(['season', 'team'],
                                            sort=False)['team_score'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_def_score_avg = nfl.groupby(['season', 'team'],
                                             sort=False)['opp_score'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_off_1stdwn_avg = nfl.groupby(['season', 'team'],
                                                 sort=False)['team_off_1stdwn'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_off_totyd_avg = nfl.groupby(['season', 'team'],
                                                sort=False)['team_off_totyd'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_off_passy_avg = nfl.groupby(['season', 'team'],
                                                sort=False)['team_off_passy'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_cmp_avg = nfl.groupby(['season', 'team'],
                                             sort=False)['team_cmp'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_pass_att_avg = nfl.groupby(['season', 'team'],
                                             sort=False)['team_p_att'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_pass_td_avg = nfl.groupby(['season', 'team'],
                                             sort=False)['team_p_td'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_pass_int_avg = nfl.groupby(['season', 'team'],
                                             sort=False)['int'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_sacked_avg = nfl.groupby(['season', 'team'],
                                             sort=False)['team_sacked'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_off_rushy_avg = nfl.groupby(['season', 'team'],
                                                sort=False)['team_off_rushy'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_rush_att_avg = nfl.groupby(['season', 'team'],
                                                sort=False)['team_r_att'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_rush_td_avg = nfl.groupby(['season', 'team'],
                                                sort=False)['team_r_td'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_off_to_avg = nfl.groupby(['season', 'team'],
                                             sort=False)['team_off_to'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_def_1std_avg = nfl.groupby(['season', 'team'],
                                               sort=False)['team_def_1std'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_def_totyd_avg = nfl.groupby(['season', 'team'],
                                                sort=False)['team_def_totyd'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_def_passy_avg = nfl.groupby(['season', 'team'],
                                                sort=False)['team_def_passy'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_def_rushy_avg = nfl.groupby(['season', 'team'],
                                                sort=False)['team_def_rushy'].transform(lambda x: x.expanding().mean()))
nfl = nfl.assign(tm_def_to_avg = nfl.groupby(['season', 'team'],
                                             sort=False)['team_def_to'].transform(lambda x: x.expanding().mean()))

In [26]:
# quick inspection
nfl.head()

Unnamed: 0,season,week,game,day,date,time,tm_off_bye,result,ot,wins,losses,ties,h_or_a,team,opp,team_score,opp_score,team_off_1stdwn,team_off_totyd,team_off_passy,team_off_rushy,team_off_to,team_def_1std,team_def_totyd,team_def_passy,team_def_rushy,team_def_to,spread,spread_outcome,over/under,ou_result,team_cmp,team_p_att,team_cmp%,team_p_td,int,team_sacked,team_qbr,team_r_att,team_r_yrd_att,team_r_td,tm_losing_streak,tm_winning_streak,tm_not_cover_streak,tm_cover_streak,tm_under_streak,tm_over_streak,tm_off_score_avg,tm_def_score_avg,tm_off_1stdwn_avg,tm_off_totyd_avg,tm_off_passy_avg,tm_cmp_avg,tm_pass_att_avg,tm_pass_td_avg,tm_pass_int_avg,tm_sacked_avg,tm_off_rushy_avg,tm_rush_att_avg,tm_rush_td_avg,tm_off_to_avg,tm_def_1std_avg,tm_def_totyd_avg,tm_def_passy_avg,tm_def_rushy_avg,tm_def_to_avg
0,2010,1,1,0,2010-09-12,0,0,0,0,0,1,0,0,Buffalo Bills,Miami Dolphins,10.0,15.0,9.0,166.0,116.0,50.0,0.0,19.0,296.0,164.0,132.0,0.0,3.0,0,39.0,0,18,34,0.529,1,0,3,73.0,17,2.941,0,1.0,0.0,1.0,0.0,1.0,0.0,10.0,15.0,9.0,166.0,116.0,18.0,34.0,1.0,0.0,3.0,50.0,17.0,0.0,0.0,19.0,296.0,164.0,132.0,0.0
1,2010,2,2,0,2010-09-19,0,0,0,0,0,2,0,1,Buffalo Bills,Green Bay Packers,7.0,34.0,14.0,186.0,62.0,124.0,2.0,18.0,346.0,255.0,91.0,0.0,13.0,0,43.0,0,11,18,0.611,0,2,4,37.0,32,3.875,1,2.0,0.0,2.0,0.0,2.0,0.0,8.5,24.5,11.5,176.0,89.0,14.5,26.0,0.5,1.0,3.5,87.0,24.5,0.5,1.0,18.5,321.0,209.5,111.5,0.0
2,2010,3,3,0,2010-09-26,0,0,0,0,0,3,0,1,Buffalo Bills,New England Patriots,30.0,38.0,19.0,374.0,240.0,134.0,2.0,25.0,445.0,245.0,200.0,1.0,14.5,1,43.0,1,20,28,0.714,2,2,1,92.4,24,5.583,0,3.0,0.0,0.0,1.0,0.0,1.0,15.666667,29.0,14.0,242.0,139.333333,16.333333,26.666667,1.0,1.333333,2.666667,102.666667,24.333333,0.333333,1.333333,20.666667,362.333333,221.333333,141.0,0.333333
3,2010,4,4,0,2010-10-03,0,0,0,0,0,4,0,0,Buffalo Bills,New York Jets,14.0,38.0,12.0,223.0,109.0,114.0,2.0,25.0,444.0,171.0,273.0,0.0,6.0,0,37.0,1,12,27,0.444,2,0,3,83.6,16,7.125,0,4.0,0.0,1.0,0.0,0.0,2.0,15.25,31.25,13.5,237.25,131.75,15.25,26.75,1.25,1.0,2.75,105.5,22.25,0.25,1.5,21.75,382.75,208.75,174.0,0.25
4,2010,5,5,0,2010-10-10,0,0,0,0,0,5,0,0,Buffalo Bills,Jacksonville Jaguars,26.0,36.0,17.0,306.0,196.0,110.0,0.0,21.0,381.0,165.0,216.0,3.0,-2.5,0,41.5,1,20,30,0.667,3,0,3,121.5,21,5.238,0,5.0,0.0,2.0,0.0,0.0,3.0,17.4,32.2,14.2,251.0,144.6,16.2,27.4,1.6,0.8,2.8,106.4,22.0,0.2,1.2,21.6,382.4,200.0,182.4,0.8


In [27]:
# function to get opponents weekly post game averages (I am sure this could have been done in a 
# cleaner manner, but this works!)
def opp_avg(d):
    opponents = []
    compare = []
    opp_avg = []
    x = list([nfl['season'], nfl['date'], nfl['opp']])
    y = 'opp' + d[2:] # for creating column header

    # list of season, week, opponent
    for i in range(0, len(nfl)):
        z = ((x[0][i]), (x[1][i]), (x[2][i]))
        opponents.append(list(z))
    
    # list of season week, opponent but in team slot, scoring
    for k in range(0, len(nfl)):
        xx = list(nfl.loc[k, ['season', 'date', 'team', d]])
        compare.append(xx)
    
    # compare lists, find matches, pull opponents avg field, add to appropriate column
    for m in range(0, len(nfl)):
        for n in range(0, len(nfl)):
            if opponents[m] == compare[n][0:3]:
                stat = compare[n][3]
        opp_avg.append(stat)
    
    nfl[y] = opp_avg
    
    # clear [] when done with them
    opponents *= 0
    compare *= 0
    opp_avg *= 0

In [28]:
nfl.columns

Index(['season', 'week', 'game', 'day', 'date', 'time', 'tm_off_bye', 'result',
       'ot', 'wins', 'losses', 'ties', 'h_or_a', 'team', 'opp', 'team_score',
       'opp_score', 'team_off_1stdwn', 'team_off_totyd', 'team_off_passy',
       'team_off_rushy', 'team_off_to', 'team_def_1std', 'team_def_totyd',
       'team_def_passy', 'team_def_rushy', 'team_def_to', 'spread',
       'spread_outcome', 'over/under', 'ou_result', 'team_cmp', 'team_p_att',
       'team_cmp%', 'team_p_td', 'int', 'team_sacked', 'team_qbr',
       'team_r_att', 'team_r_yrd_att', 'team_r_td', 'tm_losing_streak',
       'tm_winning_streak', 'tm_not_cover_streak', 'tm_cover_streak',
       'tm_under_streak', 'tm_over_streak', 'tm_off_score_avg',
       'tm_def_score_avg', 'tm_off_1stdwn_avg', 'tm_off_totyd_avg',
       'tm_off_passy_avg', 'tm_cmp_avg', 'tm_pass_att_avg', 'tm_pass_td_avg',
       'tm_pass_int_avg', 'tm_sacked_avg', 'tm_off_rushy_avg',
       'tm_rush_att_avg', 'tm_rush_td_avg', 'tm_off_to_avg', 'tm

In [29]:
# list of col names to use in fucntion
col_names = ['tm_off_bye', 'tm_losing_streak', 'tm_winning_streak', 'tm_not_cover_streak', 'tm_cover_streak',
             'tm_under_streak', 'tm_over_streak', 'tm_off_score_avg', 'tm_def_score_avg', 'tm_off_1stdwn_avg',
             'tm_off_totyd_avg', 'tm_off_passy_avg', 'tm_cmp_avg', 'tm_pass_att_avg', 'tm_pass_td_avg', 
             'tm_pass_int_avg', 'tm_sacked_avg', 'tm_off_rushy_avg', 'tm_rush_att_avg', 'tm_rush_td_avg',
             'tm_off_to_avg', 'tm_def_1std_avg', 'tm_def_totyd_avg', 'tm_def_passy_avg', 'tm_def_rushy_avg',
             'tm_def_to_avg']

# run function
for i in range(0, len(col_names)):
    opp_avg(col_names[i])

In [30]:
# inspection
nfl.head()

Unnamed: 0,season,week,game,day,date,time,tm_off_bye,result,ot,wins,losses,ties,h_or_a,team,opp,team_score,opp_score,team_off_1stdwn,team_off_totyd,team_off_passy,team_off_rushy,team_off_to,team_def_1std,team_def_totyd,team_def_passy,team_def_rushy,team_def_to,spread,spread_outcome,over/under,ou_result,team_cmp,team_p_att,team_cmp%,team_p_td,int,team_sacked,team_qbr,team_r_att,team_r_yrd_att,team_r_td,tm_losing_streak,tm_winning_streak,tm_not_cover_streak,tm_cover_streak,tm_under_streak,tm_over_streak,tm_off_score_avg,tm_def_score_avg,tm_off_1stdwn_avg,tm_off_totyd_avg,tm_off_passy_avg,tm_cmp_avg,tm_pass_att_avg,tm_pass_td_avg,tm_pass_int_avg,tm_sacked_avg,tm_off_rushy_avg,tm_rush_att_avg,tm_rush_td_avg,tm_off_to_avg,tm_def_1std_avg,tm_def_totyd_avg,tm_def_passy_avg,tm_def_rushy_avg,tm_def_to_avg,opp_off_bye,opp_losing_streak,opp_winning_streak,opp_not_cover_streak,opp_cover_streak,opp_under_streak,opp_over_streak,opp_off_score_avg,opp_def_score_avg,opp_off_1stdwn_avg,opp_off_totyd_avg,opp_off_passy_avg,opp_cmp_avg,opp_pass_att_avg,opp_pass_td_avg,opp_pass_int_avg,opp_sacked_avg,opp_off_rushy_avg,opp_rush_att_avg,opp_rush_td_avg,opp_off_to_avg,opp_def_1std_avg,opp_def_totyd_avg,opp_def_passy_avg,opp_def_rushy_avg,opp_def_to_avg
0,2010,1,1,0,2010-09-12,0,0,0,0,0,1,0,0,Buffalo Bills,Miami Dolphins,10.0,15.0,9.0,166.0,116.0,50.0,0.0,19.0,296.0,164.0,132.0,0.0,3.0,0,39.0,0,18,34,0.529,1,0,3,73.0,17,2.941,0,1.0,0.0,1.0,0.0,1.0,0.0,10.0,15.0,9.0,166.0,116.0,18.0,34.0,1.0,0.0,3.0,50.0,17.0,0.0,0.0,19.0,296.0,164.0,132.0,0.0,0,0.0,1.0,0.0,1.0,1.0,0.0,15.0,10.0,19.0,296.0,164.0,21.0,34.0,0.0,0.0,3.0,132.0,36.0,1.0,0.0,9.0,166.0,116.0,50.0,0.0
1,2010,2,2,0,2010-09-19,0,0,0,0,0,2,0,1,Buffalo Bills,Green Bay Packers,7.0,34.0,14.0,186.0,62.0,124.0,2.0,18.0,346.0,255.0,91.0,0.0,13.0,0,43.0,0,11,18,0.611,0,2,4,37.0,32,3.875,1,2.0,0.0,2.0,0.0,2.0,0.0,8.5,24.5,11.5,176.0,89.0,14.5,26.0,0.5,1.0,3.5,87.0,24.5,0.5,1.0,18.5,321.0,209.5,111.5,0.0,0,0.0,2.0,0.0,2.0,2.0,0.0,30.5,13.5,20.0,322.5,211.0,19.0,30.0,2.0,1.0,1.5,111.5,30.0,1.5,1.0,15.0,253.0,116.5,136.5,1.5
2,2010,3,3,0,2010-09-26,0,0,0,0,0,3,0,1,Buffalo Bills,New England Patriots,30.0,38.0,19.0,374.0,240.0,134.0,2.0,25.0,445.0,245.0,200.0,1.0,14.5,1,43.0,1,20,28,0.714,2,2,1,92.4,24,5.583,0,3.0,0.0,0.0,1.0,0.0,1.0,15.666667,29.0,14.0,242.0,139.333333,16.333333,26.666667,1.0,1.333333,2.666667,102.666667,24.333333,0.333333,1.333333,20.666667,362.333333,221.333333,141.0,0.333333,0,0.0,1.0,2.0,0.0,0.0,3.0,30.0,27.333333,21.666667,370.666667,247.333333,22.0,32.666667,2.666667,0.666667,0.666667,123.333333,27.0,0.666667,1.333333,22.666667,379.333333,260.333333,119.0,1.333333
3,2010,4,4,0,2010-10-03,0,0,0,0,0,4,0,0,Buffalo Bills,New York Jets,14.0,38.0,12.0,223.0,109.0,114.0,2.0,25.0,444.0,171.0,273.0,0.0,6.0,0,37.0,1,12,27,0.444,2,0,3,83.6,16,7.125,0,4.0,0.0,1.0,0.0,0.0,2.0,15.25,31.25,13.5,237.25,131.75,15.25,26.75,1.25,1.0,2.75,105.5,22.25,0.25,1.5,21.75,382.75,208.75,174.0,0.25,0,0.0,3.0,0.0,3.0,0.0,3.0,26.5,15.25,18.5,339.5,171.75,15.5,26.25,2.25,0.0,1.25,167.75,32.75,0.75,0.25,19.0,308.0,233.25,74.75,2.25
4,2010,5,5,0,2010-10-10,0,0,0,0,0,5,0,0,Buffalo Bills,Jacksonville Jaguars,26.0,36.0,17.0,306.0,196.0,110.0,0.0,21.0,381.0,165.0,216.0,3.0,-2.5,0,41.5,1,20,30,0.667,3,0,3,121.5,21,5.238,0,5.0,0.0,2.0,0.0,0.0,3.0,17.4,32.2,14.2,251.0,144.6,16.2,27.4,1.6,0.8,2.8,106.4,22.0,0.2,1.2,21.6,382.4,200.0,182.4,0.8,0,0.0,2.0,0.0,2.0,0.0,2.0,21.4,27.4,19.0,311.6,164.8,17.6,27.0,1.8,1.2,2.4,146.8,33.0,0.4,2.0,21.2,385.0,282.2,102.8,1.4


In [31]:
# function to get teams weekly pre game averages 
def pre_game_team_avg(d):
    week_1 = []
    teams = []
    weeks = []
    y = 'entering_' + d # for creating column header
    
    nfl[y] = '' # create new column
    
    # list of season, game, team, average stat
    for i in range(0, len(nfl)):
        x = list(nfl.loc[i, ['season', 'game', 'team', d]])
        teams.append(x)
        # list of of week 1 info
        if nfl.game[i] == 1:
            xt = list(nfl.loc[i, ['season', 'game', 'team']])
            week_1.append(xt)
  
    # create list of weeks
    for m in range(0, len(nfl)):
        weeks.append(list(nfl.loc[m,['season', 'game', 'team']]))
    
    # fill in with matching first week_1 values
    for m in range(0, len(nfl)):
        for n in range(0, len(week_1)):
            if weeks[m] == week_1[n]:
                nfl[y][m] = 0.0
    
    # fill in empty cells with appropriate value
    for m in range(0, len(nfl)):
        if nfl.loc[m,[y]].any() == '':
            nfl.loc[m,[y]] = teams[m-1][3]
    
    # clear [] when done with them
    week_1 *= 0
    teams *= 0
    weeks *= 0

In [32]:
# run function to populate columns and fields
for i in range(0, len(col_names)):
    pre_game_team_avg(col_names[i])

In [33]:
# function to get opponents weekly entering game averages
def entering_opp_avg(d):
    opponents = []
    compare = []
    opp_avg = []
    x = list([nfl['season'], nfl['date'], nfl['opp']])
    y = 'entering_opp' + d[11:] # for creating column header

    # list of season, week, opponent
    for i in range(0, len(nfl)):
        z = ((x[0][i]), (x[1][i]), (x[2][i]))
        opponents.append(list(z))
    
    # list of season week, opponent but in team slot, scoring
    for k in range(0, len(nfl)):
        xx = list(nfl.loc[k, ['season', 'date', 'team', d]])
        compare.append(xx)
    
    # compare lists, find matches, pull opponents avg field, add to appropriate column
    for m in range(0, len(nfl)):
        for n in range(0, len(nfl)):
            if opponents[m] == compare[n][0:3]:
                stat = compare[n][3]
        opp_avg.append(stat)
    
    nfl[y] = opp_avg
    
    # clear [] when done with them
    opponents *= 0
    compare *= 0
    opp_avg *= 0

In [34]:
nfl.head()

Unnamed: 0,season,week,game,day,date,time,tm_off_bye,result,ot,wins,losses,ties,h_or_a,team,opp,team_score,opp_score,team_off_1stdwn,team_off_totyd,team_off_passy,team_off_rushy,team_off_to,team_def_1std,team_def_totyd,team_def_passy,team_def_rushy,team_def_to,spread,spread_outcome,over/under,ou_result,team_cmp,team_p_att,team_cmp%,team_p_td,int,team_sacked,team_qbr,team_r_att,team_r_yrd_att,team_r_td,tm_losing_streak,tm_winning_streak,tm_not_cover_streak,tm_cover_streak,tm_under_streak,tm_over_streak,tm_off_score_avg,tm_def_score_avg,tm_off_1stdwn_avg,tm_off_totyd_avg,tm_off_passy_avg,tm_cmp_avg,tm_pass_att_avg,tm_pass_td_avg,tm_pass_int_avg,tm_sacked_avg,tm_off_rushy_avg,tm_rush_att_avg,tm_rush_td_avg,tm_off_to_avg,tm_def_1std_avg,tm_def_totyd_avg,tm_def_passy_avg,tm_def_rushy_avg,tm_def_to_avg,opp_off_bye,opp_losing_streak,opp_winning_streak,opp_not_cover_streak,opp_cover_streak,opp_under_streak,opp_over_streak,opp_off_score_avg,opp_def_score_avg,opp_off_1stdwn_avg,opp_off_totyd_avg,opp_off_passy_avg,opp_cmp_avg,opp_pass_att_avg,opp_pass_td_avg,opp_pass_int_avg,opp_sacked_avg,opp_off_rushy_avg,opp_rush_att_avg,opp_rush_td_avg,opp_off_to_avg,opp_def_1std_avg,opp_def_totyd_avg,opp_def_passy_avg,opp_def_rushy_avg,opp_def_to_avg,entering_tm_off_bye,entering_tm_losing_streak,entering_tm_winning_streak,entering_tm_not_cover_streak,entering_tm_cover_streak,entering_tm_under_streak,entering_tm_over_streak,entering_tm_off_score_avg,entering_tm_def_score_avg,entering_tm_off_1stdwn_avg,entering_tm_off_totyd_avg,entering_tm_off_passy_avg,entering_tm_cmp_avg,entering_tm_pass_att_avg,entering_tm_pass_td_avg,entering_tm_pass_int_avg,entering_tm_sacked_avg,entering_tm_off_rushy_avg,entering_tm_rush_att_avg,entering_tm_rush_td_avg,entering_tm_off_to_avg,entering_tm_def_1std_avg,entering_tm_def_totyd_avg,entering_tm_def_passy_avg,entering_tm_def_rushy_avg,entering_tm_def_to_avg
0,2010,1,1,0,2010-09-12,0,0,0,0,0,1,0,0,Buffalo Bills,Miami Dolphins,10.0,15.0,9.0,166.0,116.0,50.0,0.0,19.0,296.0,164.0,132.0,0.0,3.0,0,39.0,0,18,34,0.529,1,0,3,73.0,17,2.941,0,1.0,0.0,1.0,0.0,1.0,0.0,10.0,15.0,9.0,166.0,116.0,18.0,34.0,1.0,0.0,3.0,50.0,17.0,0.0,0.0,19.0,296.0,164.0,132.0,0.0,0,0.0,1.0,0.0,1.0,1.0,0.0,15.0,10.0,19.0,296.0,164.0,21.0,34.0,0.0,0.0,3.0,132.0,36.0,1.0,0.0,9.0,166.0,116.0,50.0,0.0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2010,2,2,0,2010-09-19,0,0,0,0,0,2,0,1,Buffalo Bills,Green Bay Packers,7.0,34.0,14.0,186.0,62.0,124.0,2.0,18.0,346.0,255.0,91.0,0.0,13.0,0,43.0,0,11,18,0.611,0,2,4,37.0,32,3.875,1,2.0,0.0,2.0,0.0,2.0,0.0,8.5,24.5,11.5,176.0,89.0,14.5,26.0,0.5,1.0,3.5,87.0,24.5,0.5,1.0,18.5,321.0,209.5,111.5,0.0,0,0.0,2.0,0.0,2.0,2.0,0.0,30.5,13.5,20.0,322.5,211.0,19.0,30.0,2.0,1.0,1.5,111.5,30.0,1.5,1.0,15.0,253.0,116.5,136.5,1.5,0,1,0,1,0,1,0,10.0,15.0,9.0,166.0,116.0,18.0,34.0,1.0,0.0,3.0,50.0,17.0,0.0,0.0,19.0,296.0,164.0,132.0,0.0
2,2010,3,3,0,2010-09-26,0,0,0,0,0,3,0,1,Buffalo Bills,New England Patriots,30.0,38.0,19.0,374.0,240.0,134.0,2.0,25.0,445.0,245.0,200.0,1.0,14.5,1,43.0,1,20,28,0.714,2,2,1,92.4,24,5.583,0,3.0,0.0,0.0,1.0,0.0,1.0,15.666667,29.0,14.0,242.0,139.333333,16.333333,26.666667,1.0,1.333333,2.666667,102.666667,24.333333,0.333333,1.333333,20.666667,362.333333,221.333333,141.0,0.333333,0,0.0,1.0,2.0,0.0,0.0,3.0,30.0,27.333333,21.666667,370.666667,247.333333,22.0,32.666667,2.666667,0.666667,0.666667,123.333333,27.0,0.666667,1.333333,22.666667,379.333333,260.333333,119.0,1.333333,0,2,0,2,0,2,0,8.5,24.5,11.5,176.0,89.0,14.5,26.0,0.5,1.0,3.5,87.0,24.5,0.5,1.0,18.5,321.0,209.5,111.5,0.0
3,2010,4,4,0,2010-10-03,0,0,0,0,0,4,0,0,Buffalo Bills,New York Jets,14.0,38.0,12.0,223.0,109.0,114.0,2.0,25.0,444.0,171.0,273.0,0.0,6.0,0,37.0,1,12,27,0.444,2,0,3,83.6,16,7.125,0,4.0,0.0,1.0,0.0,0.0,2.0,15.25,31.25,13.5,237.25,131.75,15.25,26.75,1.25,1.0,2.75,105.5,22.25,0.25,1.5,21.75,382.75,208.75,174.0,0.25,0,0.0,3.0,0.0,3.0,0.0,3.0,26.5,15.25,18.5,339.5,171.75,15.5,26.25,2.25,0.0,1.25,167.75,32.75,0.75,0.25,19.0,308.0,233.25,74.75,2.25,0,3,0,0,1,0,1,15.6667,29.0,14.0,242.0,139.333,16.3333,26.6667,1.0,1.33333,2.66667,102.667,24.3333,0.333333,1.33333,20.6667,362.333,221.333,141.0,0.333333
4,2010,5,5,0,2010-10-10,0,0,0,0,0,5,0,0,Buffalo Bills,Jacksonville Jaguars,26.0,36.0,17.0,306.0,196.0,110.0,0.0,21.0,381.0,165.0,216.0,3.0,-2.5,0,41.5,1,20,30,0.667,3,0,3,121.5,21,5.238,0,5.0,0.0,2.0,0.0,0.0,3.0,17.4,32.2,14.2,251.0,144.6,16.2,27.4,1.6,0.8,2.8,106.4,22.0,0.2,1.2,21.6,382.4,200.0,182.4,0.8,0,0.0,2.0,0.0,2.0,0.0,2.0,21.4,27.4,19.0,311.6,164.8,17.6,27.0,1.8,1.2,2.4,146.8,33.0,0.4,2.0,21.2,385.0,282.2,102.8,1.4,0,4,0,1,0,0,2,15.25,31.25,13.5,237.25,131.75,15.25,26.75,1.25,1.0,2.75,105.5,22.25,0.25,1.5,21.75,382.75,208.75,174.0,0.25


In [35]:
# list of col names to use in fucntion
entering_cols = ['entering_tm_off_score_avg', 'entering_tm_def_score_avg', 'entering_tm_off_1stdwn_avg', 
                 'entering_tm_off_totyd_avg', 'entering_tm_off_passy_avg', 'entering_tm_cmp_avg',
                 'entering_tm_pass_att_avg', 'entering_tm_pass_td_avg', 'entering_tm_pass_int_avg',
                 'entering_tm_sacked_avg', 'entering_tm_off_rushy_avg', 'entering_tm_rush_att_avg',
                 'entering_tm_rush_td_avg', 'entering_tm_off_to_avg', 'entering_tm_def_1std_avg',
                 'entering_tm_def_totyd_avg', 'entering_tm_def_passy_avg', 'entering_tm_def_rushy_avg', 
                 'entering_tm_def_to_avg', 'entering_tm_losing_streak', 'entering_tm_winning_streak',
                 'entering_tm_not_cover_streak', 'entering_tm_cover_streak', 'entering_tm_under_streak',
                 'entering_tm_over_streak']

# run function to populate columns and fields
for i in range(0, len(entering_cols)):
    entering_opp_avg(entering_cols[i])

In [36]:
# create column for winning percentage
nfl['tm_winning%'] = ''

# calculate weekly entering winning percentage 
for i in range(0, len(nfl)):
    x = i - 1 # previous week
    if nfl.loc[i, 'game'] == 1:
        nfl.loc[i, 'tm_winning%'] = 0.000
    else:
        nfl.loc[i, 'tm_winning%'] = round((nfl.loc[x, 'wins'] + (nfl.loc[x, 'ties'] / 2)) / nfl.loc[x, 'game'], 3)

In [37]:
# opp winning %
entering_opp_avg('tm_winning%')

# rename column
nfl.rename(columns = {'entering_opp':'opp_winning%'}, inplace = True) 

In [38]:
# create column for time between games
nfl['tm_btwn_gms'] = ''

# calculate time between games 
for i in range(0, len(nfl)):
    x = i - 1 # previous week
    if nfl.loc[i, 'game'] == 1:
        nfl.loc[i, 'tm_btwn_gms'] = '0'
    else:
        nfl.loc[i, 'tm_btwn_gms'] = str(nfl.loc[i, 'date'] - nfl.loc[x, 'date'])
        nfl.loc[i, 'tm_btwn_gms'] = int(nfl.loc[i, 'tm_btwn_gms'][:2])

In [39]:
# opp time between games
entering_opp_avg('tm_btwn_gms')

# rename column
nfl.rename(columns = {'entering_opp':'opp_btwn_gms'}, inplace = True)

# convert to int
nfl.tm_btwn_gms = nfl.tm_btwn_gms.astype(int)
nfl.opp_btwn_gms = nfl.opp_btwn_gms.astype(int)

In [40]:
# create new df
nfl_cleaned = nfl[['season', 'week', 'game', 'day', 'date', 'time', 'tm_off_bye', 'result', 'ot', 'wins', 'losses',
                   'ties', 'h_or_a', 'team', 'opp', 'team_score', 'opp_score', 'team_off_1stdwn', 'team_off_totyd',
                   'team_off_passy', 'team_off_rushy', 'team_off_to', 'team_def_1std', 'team_def_totyd', 
                   'team_def_passy', 'team_def_rushy', 'team_def_to', 'spread', 'spread_outcome', 'over/under',
                   'ou_result', 'tm_losing_streak', 'tm_winning_streak', 'tm_not_cover_streak', 'tm_cover_streak',
                   'tm_under_streak', 'tm_over_streak', 'opp_off_bye', 'entering_tm_off_score_avg', 
                   'entering_tm_def_score_avg', 'entering_tm_off_1stdwn_avg', 'entering_tm_off_totyd_avg', 
                   'entering_tm_off_passy_avg', 'entering_tm_cmp_avg', 'entering_tm_pass_att_avg', 
                   'entering_tm_pass_td_avg', 'entering_tm_pass_int_avg', 'entering_tm_sacked_avg', 
                   'entering_tm_off_rushy_avg', 'entering_tm_rush_att_avg', 'entering_tm_rush_td_avg', 
                   'entering_tm_off_to_avg', 'entering_tm_def_1std_avg', 'entering_tm_def_totyd_avg', 
                   'entering_tm_def_passy_avg', 'entering_tm_def_rushy_avg', 'entering_tm_def_to_avg', 
                   'entering_tm_losing_streak', 'entering_tm_winning_streak', 'entering_tm_not_cover_streak',
                   'entering_tm_cover_streak', 'entering_tm_under_streak', 'entering_tm_over_streak',
                   'entering_opp_off_score_avg', 'entering_opp_def_score_avg', 'entering_opp_off_1stdwn_avg',
                   'entering_opp_off_totyd_avg', 'entering_opp_off_passy_avg', 'entering_opp_cmp_avg',
                   'entering_opp_pass_att_avg', 'entering_opp_pass_td_avg', 'entering_opp_pass_int_avg',
                   'entering_opp_sacked_avg', 'entering_opp_off_rushy_avg', 'entering_opp_rush_att_avg',
                   'entering_opp_rush_td_avg', 'entering_opp_off_to_avg', 'entering_opp_def_1std_avg', 
                   'entering_opp_def_totyd_avg', 'entering_opp_def_passy_avg',
                   'entering_opp_def_rushy_avg', 'entering_opp_def_to_avg', 'entering_opp_losing_streak',
                   'entering_opp_winning_streak', 'entering_opp_not_cover_streak', 'entering_opp_cover_streak', 
                   'entering_opp_under_streak', 'entering_opp_over_streak', 'tm_winning%', 'opp_winning%', 
                   'tm_btwn_gms', 'opp_btwn_gms']]
                   
nfl_cleaned.columns = ['season', 'week', 'game', 'day', 'date', 'time', 'tm_off_bye', 'result', 'ot', 'wins', 'losses', 
                       'ties', 'h_or_a', 'team', 'opp', 'team_score', 'opp_score', 'team_off_1stdwn', 'team_off_totyd',
                       'team_off_passy', 'team_off_rushy', 'team_off_to', 'team_def_1std', 'team_def_totyd', 
                       'team_def_passy', 'team_def_rushy', 'team_def_to', 'spread', 'spread_outcome', 'over/under',
                       'ou_result', 'xxtm_losing_streak', 'xxtm_winning_streak', 'xxtm_not_cover_streak',
                       'xxtm_cover_streak', 'xxtm_under_streak', 'xxtm_over_streak', 'opp_off_bye', 'tm_off_score_avg', 
                       'tm_def_score_avg', 'tm_off_1stdwn_avg', 'tm_off_totyd_avg', 'tm_off_passy_avg',
                       'tm_cmp_avg', 'tm_pass_att_avg', 'tm_pass_td_avg', 'tm_pass_int_avg', 'tm_sacked_avg',
                       'tm_off_rushy_avg', 'tm_rush_att_avg', 'tm_rush_td_avg', 'tm_off_to_avg', 'tm_def_1std_avg',
                       'tm_def_totyd_avg', 'tm_def_passy_avg', 'tm_def_rushy_avg', 'tm_def_to_avg', 'tm_losing_streak',
                       'tm_winning_streak', 'tm_not_cover_streak', 'tm_cover_streak', 'tm_under_streak', 
                       'tm_over_streak', 'opp_off_score_avg', 'opp_def_score_avg', 'opp_off_1stdwn_avg', 
                       'opp_off_totyd_avg', 'opp_off_passy_avg', 'opp_cmp_avg', 'opp_pass_att_avg',
                       'opp_pass_td_avg', 'opp_pass_int_avg', 'opp_sacked_avg', 'opp_off_rushy_avg', 
                       'opp_rush_att_avg', 'opp_rush_td_avg', 'opp_off_to_avg', 'opp_def_1std_avg', 
                       'opp_def_totyd_avg', 'opp_def_passy_avg','opp_def_rushy_avg', 'opp_def_to_avg', 
                       'opp_losing_streak','opp_winning_streak', 'opp_not_cover_streak', 'opp_cover_streak', 
                       'opp_under_streak', 'opp_over_streak', 'tm_winning%', 'opp_winning%', 
                       'tm_btwn_gms', 'opp_btwn_gms']

# inspect
nfl_cleaned.head()

Unnamed: 0,season,week,game,day,date,time,tm_off_bye,result,ot,wins,losses,ties,h_or_a,team,opp,team_score,opp_score,team_off_1stdwn,team_off_totyd,team_off_passy,team_off_rushy,team_off_to,team_def_1std,team_def_totyd,team_def_passy,team_def_rushy,team_def_to,spread,spread_outcome,over/under,ou_result,xxtm_losing_streak,xxtm_winning_streak,xxtm_not_cover_streak,xxtm_cover_streak,xxtm_under_streak,xxtm_over_streak,opp_off_bye,tm_off_score_avg,tm_def_score_avg,tm_off_1stdwn_avg,tm_off_totyd_avg,tm_off_passy_avg,tm_cmp_avg,tm_pass_att_avg,tm_pass_td_avg,tm_pass_int_avg,tm_sacked_avg,tm_off_rushy_avg,tm_rush_att_avg,tm_rush_td_avg,tm_off_to_avg,tm_def_1std_avg,tm_def_totyd_avg,tm_def_passy_avg,tm_def_rushy_avg,tm_def_to_avg,tm_losing_streak,tm_winning_streak,tm_not_cover_streak,tm_cover_streak,tm_under_streak,tm_over_streak,opp_off_score_avg,opp_def_score_avg,opp_off_1stdwn_avg,opp_off_totyd_avg,opp_off_passy_avg,opp_cmp_avg,opp_pass_att_avg,opp_pass_td_avg,opp_pass_int_avg,opp_sacked_avg,opp_off_rushy_avg,opp_rush_att_avg,opp_rush_td_avg,opp_off_to_avg,opp_def_1std_avg,opp_def_totyd_avg,opp_def_passy_avg,opp_def_rushy_avg,opp_def_to_avg,opp_losing_streak,opp_winning_streak,opp_not_cover_streak,opp_cover_streak,opp_under_streak,opp_over_streak,tm_winning%,opp_winning%,tm_btwn_gms,opp_btwn_gms
0,2010,1,1,0,2010-09-12,0,0,0,0,0,1,0,0,Buffalo Bills,Miami Dolphins,10.0,15.0,9.0,166.0,116.0,50.0,0.0,19.0,296.0,164.0,132.0,0.0,3.0,0,39.0,0,1.0,0.0,1.0,0.0,1.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0
1,2010,2,2,0,2010-09-19,0,0,0,0,0,2,0,1,Buffalo Bills,Green Bay Packers,7.0,34.0,14.0,186.0,62.0,124.0,2.0,18.0,346.0,255.0,91.0,0.0,13.0,0,43.0,0,2.0,0.0,2.0,0.0,2.0,0.0,0,10.0,15.0,9.0,166.0,116.0,18.0,34.0,1.0,0.0,3.0,50.0,17.0,0.0,0.0,19.0,296.0,164.0,132.0,0.0,1,0,1,0,1,0,27.0,20.0,22.0,299.0,167.0,19.0,31.0,2.0,2.0,3.0,132.0,33.0,1.0,2.0,16.0,320.0,171.0,149.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0,1.0,7,7
2,2010,3,3,0,2010-09-26,0,0,0,0,0,3,0,1,Buffalo Bills,New England Patriots,30.0,38.0,19.0,374.0,240.0,134.0,2.0,25.0,445.0,245.0,200.0,1.0,14.5,1,43.0,1,3.0,0.0,0.0,1.0,0.0,1.0,0,8.5,24.5,11.5,176.0,89.0,14.5,26.0,0.5,1.0,3.5,87.0,24.5,0.5,1.0,18.5,321.0,209.5,111.5,0.0,2,0,2,0,2,0,26.0,26.0,20.0,333.5,248.5,22.5,35.5,2.5,1.0,0.5,85.0,21.5,0.0,1.5,24.5,382.0,270.5,111.5,1.0,1.0,0.0,1.0,0.0,0.0,2.0,0,0.5,7,7
3,2010,4,4,0,2010-10-03,0,0,0,0,0,4,0,0,Buffalo Bills,New York Jets,14.0,38.0,12.0,223.0,109.0,114.0,2.0,25.0,444.0,171.0,273.0,0.0,6.0,0,37.0,1,4.0,0.0,1.0,0.0,0.0,2.0,0,15.6667,29.0,14.0,242.0,139.333,16.3333,26.6667,1.0,1.33333,2.66667,102.667,24.3333,0.333333,1.33333,20.6667,362.333,221.333,141.0,0.333333,3,0,0,1,0,1,22.666667,15.666667,16.333333,304.666667,172.0,15.333333,26.333333,2.0,0.0,1.666667,132.666667,27.333333,0.333333,0.333333,21.333333,336.333333,274.666667,61.666667,2.333333,0.0,2.0,0.0,2.0,0.0,2.0,0,0.667,7,7
4,2010,5,5,0,2010-10-10,0,0,0,0,0,5,0,0,Buffalo Bills,Jacksonville Jaguars,26.0,36.0,17.0,306.0,196.0,110.0,0.0,21.0,381.0,165.0,216.0,3.0,-2.5,0,41.5,1,5.0,0.0,2.0,0.0,0.0,3.0,0,15.25,31.25,13.5,237.25,131.75,15.25,26.75,1.25,1.0,2.75,105.5,22.25,0.25,1.5,21.75,382.75,208.75,174.0,0.25,4,0,1,0,0,2,17.75,27.75,18.5,294.25,164.75,18.0,28.75,1.5,1.25,2.5,129.5,31.25,0.5,1.75,22.25,404.75,303.75,101.0,1.75,0.0,1.0,0.0,1.0,0.0,1.0,0,0.5,7,7


In [41]:
# drop unnessary columns
nfl_cleaned.drop(['xxtm_losing_streak', 'xxtm_winning_streak', 'xxtm_not_cover_streak','xxtm_cover_streak',
                  'xxtm_under_streak', 'xxtm_over_streak',], axis = 1, inplace = True) 

In [42]:
# convert necessary added columns from objects to floats 
cols = ['tm_off_score_avg', 'tm_def_score_avg', 'tm_off_1stdwn_avg', 'tm_off_totyd_avg', 'tm_off_passy_avg',
        'tm_cmp_avg', 'tm_pass_att_avg', 'tm_pass_td_avg', 'tm_pass_int_avg', 'tm_sacked_avg', 'tm_off_rushy_avg',
        'tm_rush_att_avg', 'tm_rush_td_avg', 'tm_off_to_avg', 'tm_def_1std_avg', 'tm_def_totyd_avg', 'tm_def_passy_avg',
        'tm_def_rushy_avg', 'tm_def_to_avg', 'tm_losing_streak', 'tm_winning_streak', 'tm_not_cover_streak',           
        'tm_cover_streak', 'tm_under_streak', 'tm_over_streak', 'tm_winning%'] 

nfl_cleaned[cols] = nfl_cleaned[cols].astype('float64', errors = 'ignore')

nfl_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5632 entries, 0 to 5631
Data columns (total 86 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   season                5632 non-null   int64         
 1   week                  5632 non-null   int64         
 2   game                  5632 non-null   int32         
 3   day                   5632 non-null   int64         
 4   date                  5632 non-null   datetime64[ns]
 5   time                  5632 non-null   int64         
 6   tm_off_bye            5632 non-null   int64         
 7   result                5632 non-null   int64         
 8   ot                    5632 non-null   int64         
 9   wins                  5632 non-null   int64         
 10  losses                5632 non-null   int64         
 11  ties                  5632 non-null   int64         
 12  h_or_a                5632 non-null   int64         
 13  team              

In [43]:
# save cleaned data
nfl_cleaned.to_csv('..\\Data\\NFL_Data_Cleaned.csv', index = False) 
print('nfl_cleaned has been saved as CSV file')

nfl_cleaned has been saved as CSV file
