# Scaping NFL data


In [37]:
# Importing libraries
import random
import time

import numpy as np
import pandas as pd

In [38]:
# Create list of seasons as stings to scrape
seasons = [str(season) for season in range(2021, 2022)]
print(f"Scraping data for {len(seasons)} seasons")

# Create list of team abbreviations used by pro-football-reference
teams = [
    "crd",
    "atl",
    "rav",
    "buf",
    "car",
    "chi",
    "cin",
    "cle",
    "dal",
    "den",
    "det",
    "gnb",
    "htx",
    "clt",
    "jax",
    "kan",
    "rai",
    "sdg",
    "ram",
    "mia",
    "min",
    "nwe",
    "nor",
    "nyg",
    "nyj",
    "phi",
    "pit",
    "sfo",
    "sea",
    "tam",
    "oti",
    "was",
]
print(f"Scraping data for {len(teams)} teams")

Scraping data for 1 seasons
Scraping data for 32 teams
['2021']


In [39]:
# Empty list to store dataframes
all_dfs = []

# Loop through each season for each team
for season in seasons:
    for team in teams:
        # 1. Create url
        url = f"https://www.pro-football-reference.com/teams/{team}/{season}/gamelog/"
        print(url)
        # 2. Scrape data
        df = pd.read_html(url, header=1, attrs={"id": "gamelog" + season})[0]

        # 3. Insert Team and Season into dataframe
        df.insert(loc=0, column="Season", value=season)
        df.insert(loc=2, column="Team", value=team)

        # 4. Append to list
        all_dfs.append(df)

        print(f"Scraped {team} {season}")

        # Limit requests to 20 requests per minute
        time.sleep(random.randint(3, 5))

# Concatenate all dataframes in list
gamelog_df = pd.concat(all_dfs)
gamelog_df.to_csv(
    f"../data/nfl_gamelog_{seasons[0]}_{seasons[-1]}.csv", index=False
)

https://www.pro-football-reference.com/teams/crd/2021/gamelog/
Scraped crd 2021
https://www.pro-football-reference.com/teams/atl/2021/gamelog/
Scraped atl 2021
https://www.pro-football-reference.com/teams/rav/2021/gamelog/
Scraped rav 2021
https://www.pro-football-reference.com/teams/buf/2021/gamelog/
Scraped buf 2021
https://www.pro-football-reference.com/teams/car/2021/gamelog/
Scraped car 2021
https://www.pro-football-reference.com/teams/chi/2021/gamelog/
Scraped chi 2021
https://www.pro-football-reference.com/teams/cin/2021/gamelog/
Scraped cin 2021
https://www.pro-football-reference.com/teams/cle/2021/gamelog/
Scraped cle 2021
https://www.pro-football-reference.com/teams/dal/2021/gamelog/
Scraped dal 2021
https://www.pro-football-reference.com/teams/den/2021/gamelog/
Scraped den 2021
https://www.pro-football-reference.com/teams/det/2021/gamelog/
Scraped det 2021
https://www.pro-football-reference.com/teams/gnb/2021/gamelog/
Scraped gnb 2021
https://www.pro-football-reference.com/t

In [43]:
print(gamelog_df.head())
# print df shape
print(gamelog_df.shape)

  Season  Week Team  Day          Date Unnamed: 3 Unnamed: 4   OT Unnamed: 6  \
0   2021     1  crd  Sun  September 12   boxscore          W  NaN          @   
1   2021     2  crd  Sun  September 19   boxscore          W  NaN        NaN   
2   2021     3  crd  Sun  September 26   boxscore          W  NaN          @   
3   2021     4  crd  Sun     October 3   boxscore          W  NaN          @   
4   2021     5  crd  Sun    October 10   boxscore          W  NaN        NaN   

                    Opp  ...  FGA  XPM  XPA  Pnt  Yds.3  3DConv  3DAtt  \
0      Tennessee Titans  ...    2    5    5    3    144       7     13   
1     Minnesota Vikings  ...    2    4    4    4    221       3      9   
2  Jacksonville Jaguars  ...    2    4    4    5    253       1      9   
3      Los Angeles Rams  ...    3    4    4    2     88       8     13   
4   San Francisco 49ers  ...    2    2    2    4    205       3     10   

   4DConv  4DAtt    ToP  
0       0      0  33:21  
1       1      1  31:5