In [None]:
import time
import datetime
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from io import StringIO

#team abbreviations used in pro-football-reference URLs
teams = [
    'crd', 'atl', 'rav', 'buf', 'car', 'chi', 'cin', 'cle', 'dal', 'den', 'det', 'gnb', 'htx', 'clt', 'jax', 'kan',
    'rai', 'sdg', 'ram', 'mia', 'min', 'nwe', 'nor', 'nyg', 'nyj', 'phi', 'pit', 'sfo', 'sea', 'tam', 'oti', 'was'
]
#last 5 years to scrape
current_year = datetime.datetime.now().year
years = list(range(current_year-5, current_year+1))

all_games = []

options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

#loop through each team and year to scrape the data
for team in teams:
    for year in years:
        url = f"https://www.pro-football-reference.com/teams/{team}/{year}/gamelog/#table_pfr_team-year_game-logs_team-year-regular-season-game-log"
        print(f"Scraping {url}")
        driver.get(url)
        time.sleep(2)

        #parse the html and find the table using the id
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        table = soup.find('table', id='table_pfr_team-year_game-logs_team-year-regular-season-game-log')

        #if no table is found, print an error message and continue to the next team and year
        if table is None:
            print(f"No table found for {team} {year}")
            continue
        
        #read the table into a dataframe
        df = pd.read_html(StringIO(str(table)))[0]
        df['Team'] = team
        df['Year'] = year
        all_games.append(df)

driver.quit()

# Combine all dataframes
if all_games:
    full_df = pd.concat(all_games, ignore_index=True)
    if 'Week' in full_df.columns:
        full_df = full_df[full_df['Week'] != 'Week']
    filename = f"nfl_schedule_stats_{current_year-5}_{current_year}.csv"
    full_df.to_csv(filename, index=False)
    print(f"saved to {filename}")
else:
    print("No data scraped.")

Scraping https://www.pro-football-reference.com/teams/crd/2020/gamelog/#table_pfr_team-year_game-logs_team-year-regular-season-game-log
Scraping https://www.pro-football-reference.com/teams/crd/2021/gamelog/#table_pfr_team-year_game-logs_team-year-regular-season-game-log
Scraping https://www.pro-football-reference.com/teams/crd/2022/gamelog/#table_pfr_team-year_game-logs_team-year-regular-season-game-log
Scraping https://www.pro-football-reference.com/teams/crd/2023/gamelog/#table_pfr_team-year_game-logs_team-year-regular-season-game-log
Scraping https://www.pro-football-reference.com/teams/crd/2024/gamelog/#table_pfr_team-year_game-logs_team-year-regular-season-game-log
Scraping https://www.pro-football-reference.com/teams/crd/2025/gamelog/#table_pfr_team-year_game-logs_team-year-regular-season-game-log
Scraping https://www.pro-football-reference.com/teams/atl/2020/gamelog/#table_pfr_team-year_game-logs_team-year-regular-season-game-log
Scraping https://www.pro-football-reference.com/