# General Assembly DSI - Denver 2018
## Capstone Project - DFS Model
This is my capstone project at General Assembly's fifth [Data Science Immersive](https://generalassemb.ly/education/data-science-immersive) cohort in 2018. I am developing a model to assist in optimizing NFL lineups on the daily fantasy sports platforms [Draft Kings](https://www.draftkings.com/) and [Fan Duel](https://www.fanduel.com/).

### Problem Statement

Can we build a model to predict a football player’s fantasy football performance to estimate their value and implement the model in conjunction with a daily fantasy strategy to be profitable?

In [2]:
import pandas as pd
import requests
import time
from bs4 import BeautifulSoup

### Quarterbacks

In [49]:
quarterbacks = []

for i in range(30):
    url = 'https://www.pro-football-reference.com/play-index/pgl_finder.cgi?request=1&match=game&year_min=2011&year_max=2017&season_start=1&season_end=-1&pos=QB&game_type=R&career_game_num_min=1&career_game_num_max=400&game_num_min=0&game_num_max=99&week_num_min=1&week_num_max=17&is_active=Y&c1stat=pass_att&c1comp=gt&c1val=1&c5val=1.0&order_by=pass_rating&offset={}'.format(i * 100)
    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'lxml')
    
    if res.status_code == 200:    
        for row in soup.find_all('tr')[2:]:
            try:
                player_dict = {}
                player_dict['Name'] = row.find('a').text.split()[1] + ', ' + row.find('a').text.split()[0]
                player_dict['Age'] = row.find('td', attrs = {'data-stat': 'age'}).text
                player_dict['Date'] = row.find('td', attrs = {'data-stat': "game_date"}).text
                player_dict['Team'] = row.find('td', attrs = {'data-stat': 'team'}).text
                player_dict['Oppt'] = row.find('td', attrs = {'data-stat': 'opp'}).text
                player_dict['Week'] = row.find('td', attrs = {'data-stat': 'week_num'}).text
                player_dict['Year'] = row.find('td', attrs = {'data-stat': "game_date"}).text[:4]
                player_dict['Completions'] = row.find('td', attrs = {'data-stat': 'pass_cmp'}).text
                player_dict['Attempts'] = row.find('td', attrs = {'data-stat': 'pass_att'}).text
                player_dict['Yards'] = row.find('td', attrs = {'data-stat': 'pass_yds'}).text
                player_dict['TDs'] = row.find('td', attrs = {'data-stat': 'pass_td'}).text
                player_dict['Interceptions'] = row.find('td', attrs = {'data-stat': 'pass_int'}).text
                player_dict['Rating'] = row.find('td', attrs = {'data-stat': 'pass_rating'}).text
                player_dict['Y/A'] = row.find('td', attrs = {'data-stat': 'pass_yds_per_att'}).text
                quarterbacks.append(player_dict)
            except:
                continue
    else:
        print("Oops something went wrong... Error Code: {}".format(res.status_code))
    time.sleep(.5)

In [55]:
qb_stats = pd.DataFrame(quarterbacks)

In [59]:
qb_stats = qb_stats.sort_values(['Week', 'Year'])

In [61]:
qb_stats['Month'] = qb_stats['Date'].map(lambda x: x[5:7])

In [64]:
qb_stats.drop("Date", axis = 1, inplace = True)

In [67]:
teams = {'NWE': 'NE', 'JAX': 'JAX', 'CIN': 'CIN', 'NOR': 'NO', 'BAL': 'BAL', 'ATL': 'ATL', 'DET': 'DET', 'NYG': 'NYG',
        'PIT': 'PIT', 'GNB': 'GB', 'CAR': 'CAR', 'CLE': 'CLE', 'HOU': 'HOU', 'WAS': 'WAS', 'KAN': 'KC',
        'BUF': 'BUF', 'SDG': 'LAC', 'SEA': 'SEA', 'OAK': 'OAK', 'IND': 'IND','NYJ': 'NYJ', 'MIA': 'MIA',
        'PHI': 'PHI', 'TAM': 'TB', 'MIN': 'MIN', 'SFO': 'SF', 'TEN': 'TEN', 'STL': 'LAR', 'DEN': 'DEN', 'DAL': 'DAL',
        'CHI': 'CHI', 'LAR': 'LAR', 'ARI': 'ARI', 'LAC': 'LAC'}

In [70]:
qb_stats['Oppt'] = qb_stats['Oppt'].map(teams)
qb_stats['Team'] = qb_stats['Team'].map(teams)

In [73]:
qb_stats.reset_index(drop = True, inplace = True)

In [74]:
qb_stats.to_csv('../data/qb_stats.csv', index = False)

### Runningbacks

In [119]:
runningbacks = []

for i in range(43):
    url = 'https://www.pro-football-reference.com/play-index/pgl_finder.cgi?request=1&match=game&year_min=2011&year_max=2017&season_start=1&season_end=-1&pos=RB&game_type=R&career_game_num_min=1&career_game_num_max=400&game_num_min=0&game_num_max=99&week_num_min=1&week_num_max=17&is_active=Y&c1stat=rush_att&c1comp=gt&c1val=1&c2stat=targets&c2comp=gt&c2val=0&c5val=1.0&order_by=game_date&offset={}'.format(i * 100)
    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'lxml')
    
    if res.status_code == 200:    
        for row in soup.find_all('tr')[2:]:
            if row.find('td', attrs = {'data-stat': 'rush_att'}) == None:
                continue
            else:
                player_dict = {}
                player_dict['Name'] = row.find('a').text.split()[1] + ', ' + row.find('a').text.split()[0]
                player_dict['Age'] = row.find('td', attrs = {'data-stat': 'age'}).text
                player_dict['Date'] = row.find('td', attrs = {'data-stat': "game_date"}).text
                player_dict['Team'] = row.find('td', attrs = {'data-stat': 'team'}).text
                player_dict['Oppt'] = row.find('td', attrs = {'data-stat': 'opp'}).text
                player_dict['Week'] = row.find('td', attrs = {'data-stat': 'week_num'}).text
                player_dict['Year'] = row.find('td', attrs = {'data-stat': "game_date"}).text[:4]
                player_dict['Month'] = row.find('td', attrs = {'data-stat': "game_date"}).text[5:7]
                player_dict['Carries'] = row.find('td', attrs = {'data-stat': 'rush_att'}).text
                player_dict['Rush_Yds'] = row.find('td', attrs = {'data-stat': 'rush_yds'}).text
                player_dict['Rush_TDs'] = row.find('td', attrs = {'data-stat': 'rush_td'}).text
                player_dict['Targets'] = row.find('td', attrs = {'data-stat': 'targets'}).text
                player_dict['Receptions'] = row.find('td', attrs = {'data-stat': 'rec'}).text
                player_dict['Rec_Yds'] = row.find('td', attrs = {'data-stat': 'rec_yds'}).text
                player_dict['Rec_TDs'] = row.find('td', attrs = {'data-stat': 'rec_td'}).text
                runningbacks.append(player_dict)
    else:
        print("Oops something went wrong... Error Code: {}".format(res.status_code))
    time.sleep(.5)

In [203]:
rb_stats = pd.DataFrame(runningbacks)

In [204]:
rb_stats.shape

(4289, 15)

In [235]:
# rb_stats.to_csv('../data/rb_stats.csv', index = False)

In [236]:
rb_stats = pd.read_csv('../data/rb_stats.csv')

In [238]:
rb_stats.drop('Date', axis = 1, inplace = True)

In [239]:
rb_stats = rb_stats.sort_values(['Week', 'Year'])
rb_stats.reset_index(inplace = True, drop = True)

In [240]:
rb_stats.head()

Unnamed: 0,Age,Carries,Month,Name,Oppt,Rec_TDs,Rec_Yds,Receptions,Rush_TDs,Rush_Yds,Targets,Team,Week,Year
0,24.287,9.666667,September,"Blount, LeGarrette",MIN,0.0,2.0,0.333333,0.333333,35.333333,0.666667,TB,2,2011
1,24.265,10.0,September,"Charles, Jamaal",DET,1.0,9.0,5.0,0.0,56.0,6.0,KC,2,2011
2,28.127,17.0,September,"Gore, Frank",DAL,0.0,4.333333,0.333333,1.0,73.333333,1.666667,SF,2,2011
3,21.271,12.333333,September,"Ingram, Mark",CHI,0.0,5.666667,1.0,0.666667,48.333333,1.666667,NO,2,2011
4,23.054,4.5,September,"Jones, Taiwan",BUF,0.0,0.0,0.0,0.0,21.0,0.0,OAK,2,2011


In [210]:
rb_stats['Month'].value_counts()

December     1175
October      1095
November     1021
September     882
January       116
Name: Month, dtype: int64

In [208]:
rb_stats['Team'] = rb_stats['Team'].map(teams)
rb_stats['Oppt'] = rb_stats['Oppt'].map(teams)

In [209]:
rb_stats['Month'] = rb_stats['Month'].map({'09': 'September', '10': 'October', '11': 'November', 
                                           '12': 'December', '01': 'January'})

In [211]:
features = ['Carries', 'Rec_TDs', 'Rec_Yds', 'Receptions', 'Rush_TDs', 'Rush_Yds', 'Targets']

In [212]:
rolling_rb_stats = rb_stats.groupby(['Name', 'Year'])[features].rolling(window = 3, min_periods = 0).mean()

In [214]:
shifted_rb_stats = rolling_rb_stats.groupby(level = [0,1]).shift()

In [217]:
shifted_rb_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Carries,Rec_TDs,Rec_Yds,Receptions,Rush_TDs,Rush_Yds,Targets
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"Abdullah, Ameer",2015,84,,,,,,,
"Abdullah, Ameer",2015,343,7.0,0.0,44.0,4.0,1.0,50.0,4.0
"Abdullah, Ameer",2015,586,6.0,0.0,25.5,2.5,0.5,32.5,2.5
"Abdullah, Ameer",2015,862,8.0,0.0,17.666667,2.0,0.333333,36.333333,2.333333
"Abdullah, Ameer",2015,1132,11.0,0.0,7.0,1.0,0.0,40.666667,1.666667


In [218]:
for index, row in rb_stats.iterrows():
    rolling_index = (row['Name'], row['Year'], index)
    rb_stats.loc[index, features] = shifted_rb_stats.loc[rolling_index, features]

In [224]:
rb_stats.head()

Unnamed: 0,Age,Carries,Date,Month,Name,Oppt,Rec_TDs,Rec_Yds,Receptions,Rush_TDs,Rush_Yds,Targets,Team,Week,Year
0,24.28,,2011-09-11,September,"Blount, LeGarrette",DET,,,,,,,TB,1,2011
1,24.258,,2011-09-11,September,"Charles, Jamaal",BUF,,,,,,,KC,1,2011
2,28.12,,2011-09-11,September,"Gore, Frank",SEA,,,,,,,SF,1,2011
3,20.349,,2011-09-11,September,"Lewis, Dion",LAR,,,,,,,PHI,1,2011
4,25.142,,2011-09-11,September,"Lynch, Marshawn",SF,,,,,,,SEA,1,2011


In [225]:
rb_stats.dropna(inplace = True)

In [226]:
rb_stats['Month'].value_counts()

December     1131
October      1087
November      904
September     605
January        99
Name: Month, dtype: int64

### Wide Receivers

In [243]:
receivers = []

for i in range(77):
    url = 'https://www.pro-football-reference.com/play-index/pgl_finder.cgi?request=1&match=game&year_min=2011&year_max=2017&season_start=1&season_end=-1&pos=WR&game_type=R&career_game_num_min=1&career_game_num_max=400&game_num_min=0&game_num_max=99&week_num_min=1&week_num_max=17&is_active=Y&c2stat=targets&c2comp=gt&c2val=1&c5val=1.0&order_by=game_date&offset={}'.format(i * 100)
    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'lxml')
    
    if res.status_code == 200:    
        for row in soup.find_all('tr')[2:]:
            if row.find('td', attrs = {'data-stat': 'targets'}) == None:
                continue
            else:
                player_dict = {}
                player_dict['Name'] = row.find('a').text.split()[1] + ', ' + row.find('a').text.split()[0]
                player_dict['Age'] = row.find('td', attrs = {'data-stat': 'age'}).text
                player_dict['Team'] = row.find('td', attrs = {'data-stat': 'team'}).text
                player_dict['Oppt'] = row.find('td', attrs = {'data-stat': 'opp'}).text
                player_dict['Week'] = row.find('td', attrs = {'data-stat': 'week_num'}).text
                player_dict['Year'] = row.find('td', attrs = {'data-stat': "game_date"}).text[:4]
                player_dict['Month'] = row.find('td', attrs = {'data-stat': "game_date"}).text[5:7]
                player_dict['Targets'] = row.find('td', attrs = {'data-stat': 'targets'}).text
                player_dict['Receptions'] = row.find('td', attrs = {'data-stat': 'rec'}).text
                player_dict['Rec_Yds'] = row.find('td', attrs = {'data-stat': 'rec_yds'}).text
                player_dict['Rec_TDs'] = row.find('td', attrs = {'data-stat': 'rec_td'}).text
                receivers.append(player_dict)
    else:
        print("Oops something went wrong... Error Code: {}".format(res.status_code))
    time.sleep(.5)

In [248]:
wr_stats = pd.DataFrame(receivers)

In [251]:
wr_stats = wr_stats.sort_values(['Week', 'Year'])
wr_stats.reset_index(drop = True, inplace = True)

In [254]:
wr_stats.head()

Unnamed: 0,Age,Month,Name,Oppt,Rec_TDs,Rec_Yds,Receptions,Targets,Team,Week,Year
0,24.181,9,"Decker, Eric",OAK,0,53,3,6,DEN,1,2011
1,25.113,9,"Edelman, Julian",MIA,0,9,1,2,NE,1,2011
2,24.198,9,"Heyward-Bey, Darrius",DEN,0,44,4,7,OAK,1,2011
3,27.173,9,"Marshall, Brandon",NE,0,139,7,13,MIA,1,2011
4,26.003,9,"Slater, Matt",MIA,0,46,1,2,NE,1,2011


In [253]:
wr_stats['Team'] = wr_stats['Team'].map(teams)
wr_stats['Oppt'] = wr_stats['Oppt'].map(teams)

In [261]:
# wr_stats.to_csv('../data/wr_stats.csv', index = False)
wr_stats = pd.read_csv('../data/wr_stats.csv')

In [263]:
features = ['Rec_TDs', 'Rec_Yds', 'Receptions', 'Targets']

In [264]:
rolling_wr_stats = wr_stats.groupby(['Name', 'Year'])[features].rolling(window = 3, min_periods = 0).mean()

In [265]:
shifted_wr_stats = rolling_wr_stats.groupby(level = [0,1]).shift()

In [268]:
shifted_wr_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Rec_TDs,Rec_Yds,Receptions,Targets
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adams, Davante",2014,546,,,,
"Adams, Davante",2014,974,0.0,10.0,1.0,2.0
"Adams, Davante",2014,1401,0.5,11.5,1.5,2.0
"Adams, Davante",2014,1871,0.333333,11.0,1.333333,2.666667
"Adams, Davante",2014,2814,0.333333,48.0,3.0,5.666667


In [269]:
for index, row in wr_stats.iterrows():
    rolling_index = (row['Name'], row['Year'], index)
    wr_stats.loc[index, features] = shifted_wr_stats.loc[rolling_index, features]

In [276]:
wr_stats.dropna(inplace = True)

In [280]:
wr_stats = wr_stats.sort_values(['Week', 'Year'])
wr_stats.reset_index(drop = True, inplace = True)

In [282]:
# wr_stats.to_csv('../data/wr_stats.csv', index = False)

### Tight Ends

In [283]:
tight_ends = []

for i in range(36):
    url = 'https://www.pro-football-reference.com/play-index/pgl_finder.cgi?request=1&match=game&year_min=2011&year_max=2017&season_start=1&season_end=-1&pos=TE&game_type=R&career_game_num_min=1&career_game_num_max=400&game_num_min=0&game_num_max=99&week_num_min=1&week_num_max=17&is_active=Y&c2stat=targets&c2comp=gt&c2val=1&c5val=1.0&order_by=game_date&offset={}'.format(i * 100)
    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'lxml')
    
    if res.status_code == 200:    
        for row in soup.find_all('tr')[2:]:
            if row.find('td', attrs = {'data-stat': 'targets'}) == None:
                continue
            else:
                player_dict = {}
                player_dict['Name'] = row.find('a').text.split()[1] + ', ' + row.find('a').text.split()[0]
                player_dict['Age'] = row.find('td', attrs = {'data-stat': 'age'}).text
                player_dict['Team'] = row.find('td', attrs = {'data-stat': 'team'}).text
                player_dict['Oppt'] = row.find('td', attrs = {'data-stat': 'opp'}).text
                player_dict['Week'] = row.find('td', attrs = {'data-stat': 'week_num'}).text
                player_dict['Year'] = row.find('td', attrs = {'data-stat': "game_date"}).text[:4]
                player_dict['Month'] = row.find('td', attrs = {'data-stat': "game_date"}).text[5:7]
                player_dict['Targets'] = row.find('td', attrs = {'data-stat': 'targets'}).text
                player_dict['Receptions'] = row.find('td', attrs = {'data-stat': 'rec'}).text
                player_dict['Rec_Yds'] = row.find('td', attrs = {'data-stat': 'rec_yds'}).text
                player_dict['Rec_TDs'] = row.find('td', attrs = {'data-stat': 'rec_td'}).text
                tight_ends.append(player_dict)
    else:
        print("Oops something went wrong... Error Code: {}".format(res.status_code))
    time.sleep(.5)

In [317]:
te_stats = pd.DataFrame(tight_ends)

In [318]:
te_stats = te_stats.sort_values(['Week', 'Year'])
te_stats.reset_index(drop = True, inplace = True)

In [319]:
te_stats.head()

Unnamed: 0,Age,Month,Name,Oppt,Rec_TDs,Rec_Yds,Receptions,Targets,Team,Week,Year
0,22.121,9,"Gronkowski, Rob",MIA,1,86,6,7,NWE,1,2011
1,24.157,9,"Cook, Jared",JAX,0,7,1,2,TEN,1,2011
2,27.223,9,"Davis, Vernon",SEA,0,47,5,6,SFO,1,2011
3,24.048,9,"Dickson, Ed",PIT,1,59,5,5,BAL,1,2011
4,31.085,9,"Gates, Antonio",MIN,0,74,8,13,SDG,1,2011


In [320]:
te_stats['Team'] = te_stats['Team'].map(teams)
te_stats['Oppt'] = te_stats['Oppt'].map(teams)

In [321]:
# te_stats.to_csv('../data/te_stats.csv', index = False)
te_stats = pd.read_csv('../data/te_stats.csv')

In [322]:
features = ['Rec_TDs', 'Rec_Yds', 'Receptions', 'Targets']

In [323]:
rolling_te_stats = te_stats.groupby(['Name', 'Year'])[features].rolling(window = 3, min_periods = 0).mean()

In [324]:
shifted_te_stats = rolling_te_stats.groupby(level = [0,1]).shift()

In [325]:
shifted_te_stats.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Rec_TDs,Rec_Yds,Receptions,Targets
Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adams, Davante",2014,1108,,,,
"Adams, Jerell",2016,308,,,,
"Adams, Jerell",2016,508,1.0,18.0,3.0,3.0
"Adams, Jerell",2016,948,0.5,9.0,1.5,2.0
"Adams, Jerell",2016,1181,0.333333,9.333333,1.333333,2.333333


In [326]:
for index, row in te_stats.iterrows():
    rolling_index = (row['Name'], row['Year'], index)
    te_stats.loc[index, features] = shifted_te_stats.loc[rolling_index, features]

In [327]:
te_stats.dropna(inplace = True)

In [328]:
te_stats = te_stats.sort_values(['Week', 'Year'])
te_stats.reset_index(drop = True, inplace = True)

In [329]:
te_stats['Month'] = te_stats['Month'].map({9: 'September', 10: 'October', 11: 'November', 
                                           12: 'December', 1: 'January'})

In [330]:
te_stats.head()

Unnamed: 0,Age,Month,Name,Oppt,Rec_TDs,Rec_Yds,Receptions,Targets,Team,Week,Year
0,23.077,September,"Hoomanawanui, Michael",NYG,0.0,21.0,2.0,3.0,LAR,2,2011
1,23.232,September,"Kendricks, Lance",NYG,0.0,19.333333,2.0,3.0,LAR,2,2011
2,24.164,September,"Cook, Jared",BAL,0.333333,90.666667,5.666667,6.666667,TEN,2,2011
3,27.23,September,"Davis, Vernon",DAL,0.333333,52.666667,3.666667,7.0,SF,2,2011
4,24.055,September,"Dickson, Ed",TEN,0.666667,23.0,2.333333,3.666667,BAL,2,2011


In [331]:
# te_stats.to_csv('../data/te_stats.csv', index = False)