# _Fantasy Football Analytics_

I want to see if I can analyze last season's NFL individual player statistics to create the best team possible. 

First though I have to get the data!

In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [4]:
# import libraries
import pandas as pd
pd.options.display.max_columns = None
import numpy as np
import random
import os

# Matplotlib
%matplotlib inline
%config InlineBackend.figure_format='retina'
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')

In [5]:
os.getcwd()

'/Users/jai/Documents/projects/fantasy-football'

### _Gather Passing Data_

In [7]:
# import libraries
from urllib.request import urlopen
from bs4 import BeautifulSoup

To get things started, I'm just going to focus on pulling the 2018 passing statistics from [pro-football-reference.com](https://www.pro-football-reference.com/). Much like [basketball-reference.com](https://www.basketball-reference.com/), which I used for my first capstone project, I think this website alone will provide the data I need to better analyze NFL players.

In [10]:
# url of website
year = 2018
statistic = 'passing'
url = 'https://www.pro-football-reference.com/years/{}/{}.htm'.format(year, statistic); url

'https://www.pro-football-reference.com/years/2018/passing.htm'

In [11]:
# open url
htm = urlopen(url)
soup = BeautifulSoup(htm)

In [26]:
for i in [htm, soup]:
    print(type(i))

<class 'http.client.HTTPResponse'>
<class 'bs4.BeautifulSoup'>


In [13]:
headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]; headers

['Rk',
 'Player',
 'Tm',
 'Age',
 'Pos',
 'G',
 'GS',
 'QBrec',
 'Cmp',
 'Att',
 'Cmp%',
 'Yds',
 'TD',
 'TD%',
 'Int',
 'Int%',
 'Lng',
 'Y/A',
 'AY/A',
 'Y/C',
 'Y/G',
 'Rate',
 'QBR',
 'Sk',
 'Yds',
 'NY/A',
 'ANY/A',
 'Sk%',
 '4QC',
 'GWD']

In [14]:
# exclude the first column as we don't need the ranking order
headers = headers[1:]

In [17]:
# gather rows
rows = soup.findAll('tr')[1:]

In [20]:
player_stats = [[td.getText() for td in rows[i].findAll('td')] for i in range(len(rows))]; 
player_stats[:2]

[['Ben Roethlisberger',
  'PIT',
  '36',
  'QB',
  '16',
  '16',
  '9-6-1',
  '452',
  '675',
  '67.0',
  '5129',
  '34',
  '5.0',
  '16',
  '2.4',
  '97',
  '7.6',
  '7.5',
  '11.3',
  '320.6',
  '96.5',
  '73.0',
  '24',
  '166',
  '7.10',
  '7.04',
  '3.4',
  '2',
  '3'],
 ['Andrew Luck*',
  'IND',
  '29',
  'QB',
  '16',
  '16',
  '10-6-0',
  '430',
  '639',
  '67.3',
  '4593',
  '39',
  '6.1',
  '15',
  '2.3',
  '68',
  '7.2',
  '7.4',
  '10.7',
  '287.1',
  '98.7',
  '71.5',
  '18',
  '134',
  '6.79',
  '6.95',
  '2.7',
  '3',
  '3']]

In [22]:
# create a new DataFrame with player_stats and headers
df = pd.DataFrame(player_stats, columns = headers); df.head()

Unnamed: 0,Player,Tm,Age,Pos,G,GS,QBrec,Cmp,Att,Cmp%,Yds,TD,TD%,Int,Int%,Lng,Y/A,AY/A,Y/C,Y/G,Rate,QBR,Sk,Yds.1,NY/A,ANY/A,Sk%,4QC,GWD
0,Ben Roethlisberger,PIT,36,QB,16,16,9-6-1,452,675,67.0,5129,34,5.0,16,2.4,97,7.6,7.5,11.3,320.6,96.5,73.0,24,166,7.1,7.04,3.4,2,3
1,Andrew Luck*,IND,29,QB,16,16,10-6-0,430,639,67.3,4593,39,6.1,15,2.3,68,7.2,7.4,10.7,287.1,98.7,71.5,18,134,6.79,6.95,2.7,3,3
2,Matt Ryan,ATL,33,QB,16,16,7-9-0,422,608,69.4,4924,35,5.8,7,1.2,75,8.1,8.7,11.7,307.8,108.1,70.6,42,296,7.12,7.71,6.5,1,1
3,Kirk Cousins,MIN,30,QB,16,16,8-7-1,425,606,70.1,4298,30,5.0,10,1.7,75,7.1,7.3,10.1,268.6,99.7,60.6,40,262,6.25,6.48,6.2,1,0
4,Aaron Rodgers*,GNB,35,QB,16,16,6-9-1,372,597,62.3,4442,25,4.2,2,0.3,75,7.4,8.1,11.9,277.6,97.6,56.8,49,353,6.33,6.96,7.6,3,3


In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 109 entries, 0 to 108
Data columns (total 29 columns):
Player    106 non-null object
Tm        106 non-null object
Age       106 non-null object
Pos       106 non-null object
G         106 non-null object
GS        106 non-null object
QBrec     106 non-null object
Cmp       106 non-null object
Att       106 non-null object
Cmp%      106 non-null object
Yds       106 non-null object
TD        106 non-null object
TD%       106 non-null object
Int       106 non-null object
Int%      106 non-null object
Lng       106 non-null object
Y/A       106 non-null object
AY/A      106 non-null object
Y/C       106 non-null object
Y/G       106 non-null object
Rate      106 non-null object
QBR       106 non-null object
Sk        106 non-null object
Yds       106 non-null object
NY/A      106 non-null object
ANY/A     106 non-null object
Sk%       106 non-null object
4QC       106 non-null object
GWD       106 non-null object
dtypes: object(29)
memory

### _Test Wrangling Function from custom `football` library_

In [28]:
from football import wrangling

In [37]:
df = wrangling.football_stats(2018, 'passing'); df.head()

Unnamed: 0,Player,Tm,Age,Pos,G,GS,QBrec,Cmp,Att,Cmp%,Yds,TD,TD%,Int,Int%,Lng,Y/A,AY/A,Y/C,Y/G,Rate,QBR,Sk,Yds.1,NY/A,ANY/A,Sk%,4QC,GWD,year
0,Ben Roethlisberger,PIT,36,QB,16,16,9-6-1,452,675,67.0,5129,34,5.0,16,2.4,97,7.6,7.5,11.3,320.6,96.5,73.0,24,166,7.1,7.04,3.4,2,3,2018
1,Andrew Luck*,IND,29,QB,16,16,10-6-0,430,639,67.3,4593,39,6.1,15,2.3,68,7.2,7.4,10.7,287.1,98.7,71.5,18,134,6.79,6.95,2.7,3,3,2018
2,Matt Ryan,ATL,33,QB,16,16,7-9-0,422,608,69.4,4924,35,5.8,7,1.2,75,8.1,8.7,11.7,307.8,108.1,70.6,42,296,7.12,7.71,6.5,1,1,2018
3,Kirk Cousins,MIN,30,QB,16,16,8-7-1,425,606,70.1,4298,30,5.0,10,1.7,75,7.1,7.3,10.1,268.6,99.7,60.6,40,262,6.25,6.48,6.2,1,0,2018
4,Aaron Rodgers*,GNB,35,QB,16,16,6-9-1,372,597,62.3,4442,25,4.2,2,0.3,75,7.4,8.1,11.9,277.6,97.6,56.8,49,353,6.33,6.96,7.6,3,3,2018


In [38]:
years = list(range(2016, 2018, 1)); years

[2016, 2017]