# Imports

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup, Comment

# Functions

In [2]:
def get_page_html(team_abbrev, season):
    url = f"https://www.pro-football-reference.com/teams/{team_abbrev}/{season}.htm"
    r = requests.get(url)
    if r.status_code == 200:
        soup = BeautifulSoup(r.text, 'html.parser')
        return soup
    else:
        print(r.text)
        raise Exception(f"Failed to fetch data: Status code {r.status_code}")

In [3]:
def soup_to_table(soup, target_ids):
    # target_ids = ['team_stats', 'games']
    table_list = list()
    for table_id in target_ids:
        table = soup.select_one(f"table#{table_id}")
        if table.get("id") == table_id:
            table_list.append(table)

    return table_list

In [4]:
def create_team_stat_df(table):
    team_stat_table = table
    rows = team_stat_table.find_all("tr")
    
    team_stat_rows = list()
    for i, row in enumerate(rows[1:]):
        if i == 0:
            header_text = [td.get_text(strip=True) for td in row.find_all(['th', 'td'])]
            for i in range(len(header_text)):
                if 1 <= i < 8:
                    header_text[i] = f"{header_text[i]}_totals"
                elif 8 <= i < 15:
                    header_text[i] = f"{header_text[i]}_passing_totals"
                elif 15 <= i < 20:
                    header_text[i] = f"{header_text[i]}_rushing_totals"
                elif 20 <= i < 23:
                    header_text[i] = f"{header_text[i]}_penalties"
                elif 23 <= i < 26:
                    header_text[i] = f"{header_text[i]}_drives"
                elif 26 <= i:
                    header_text[i] = f"{header_text[i]}_average_drive"
            
            team_stat_rows.append(header_text)
        else:
            row_text = [td.get_text(strip=True) for td in row.find_all(['th', 'td'])]
            row_text[0] = f"{team_abbrev}_{row_text[0]}"
            team_stat_rows.append(row_text)

    df_team_stats = pd.DataFrame(columns=team_stat_rows[0], data=team_stat_rows[1:])
    df_team_stats["Att_passing_totals"] = df_team_stats["Att_passing_totals"].astype(int)
    df_team_stats["Att_rushing_totals"] = df_team_stats["Att_rushing_totals"].astype(int)
    df_team_stats["total_snaps"] = df_team_stats["Att_passing_totals"] + df_team_stats["Att_rushing_totals"]

    return df_team_stats

# Testing Functions

In [5]:
team_abbrev = "dal"
season = "2022"

In [6]:
soup = get_page_html(team_abbrev, season)

In [7]:
target_ids = ["team_stats"]
table = soup_to_table(soup, target_ids)

In [8]:
team_stats_table = table[0]
df_team_stats = create_team_stat_df(team_stats_table)

In [9]:
df_team_stats.head()

Unnamed: 0,Player,PF_totals,Yds_totals,Ply_totals,Y/P_totals,TO_totals,FL_totals,1stD_totals,Cmp_passing_totals,Att_passing_totals,...,1stPy_penalties,#Dr_drives,Sc%_drives,TO%_drives,Start_average_drive,Time_average_drive,Plays_average_drive,Yds_average_drive,Pts_average_drive,total_snaps
0,dal_Team Stats,467,6034,1114.0,5.4,23,5,347,355.0,556,...,24.0,195.0,41.5,10.8,Own 29.0,2:35,5.88,30.9,2.31,1087
1,dal_Opp. Stats,342,5613,1101.0,5.1,33,17,327,345.0,550,...,34.0,197.0,33.5,16.2,Own 29.2,2:40,5.8,28.4,1.65,1047
2,dal_Lg Rank Offense,4,11,,,17,2,11,,19,...,,,6.0,20.0,13,27,18.0,18.0,7.0,25
3,dal_Lg Rank Defense,5,12,,,1,1,14,,11,...,,,25.0,1.0,27,5,8.0,6.0,4.0,38


In [10]:
df_team_stats[["Att_passing_totals", "Att_rushing_totals", "total_snaps"]]

Unnamed: 0,Att_passing_totals,Att_rushing_totals,total_snaps
0,556,531,1087
1,550,497,1047
2,19,6,25
3,11,27,38


# Team Dict Translation

In [11]:
team_dict = {
    '49ers': 'sfo',
    'Bears': 'chi',
    'Bengals': 'cin',
    'Bills': 'buf',
    'Broncos': 'den',
    'Browns': 'cle',
    'Buccaneers': 'tam',
    'Cardinals': 'crd',
    'Chargers': 'sdg',
    'Chiefs': 'kan',
    'Colts': 'clt',
    'Commanders': 'was',
    'Cowboys': 'dal',
    'Dolphins': 'mia',
    'Eagles': 'phi',
    'Falcons': 'atl',
    'Giants': 'nyg',
    'Jaguars': 'jax',
    'Jets': 'nyj',
    'Lions': 'det',
    'Packers': 'gnb',
    'Panthers': 'car',
    'Patriots': 'nwe',
    'Raiders': 'rai',
    'Rams': 'ram',
    'Ravens': 'rav',
    'Redskins': 'was',
    'Saints': 'nor',
    'Seahawks': 'sea',
    'Steelers': 'pit',
    'Texans': 'htx',
    'Titans': 'oti',
    'Vikings': 'min',
}

# Link Teams df

In [12]:
import time

In [13]:
team_dfs = dict()
for key in team_dict.keys():
    team_abbrev = team_dict[key]
    for season in range(2012, 2023 + 1):
        soup = get_page_html(team_abbrev, season)
        
        target_ids = ["team_stats"]
        table = soup_to_table(soup, target_ids)

        team_stats_table = table[0]
        df_team_stats = create_team_stat_df(team_stats_table)

        team_dfs[f"{key}-{season}"] = df_team_stats
        print(f"Team: {key}")
        print(f"Season: {season}")
        print()
        time.sleep(10)
    time.sleep(60)

Team: 49ers
Season: 2012

Team: 49ers
Season: 2013

Team: 49ers
Season: 2014

Team: 49ers
Season: 2015

Team: 49ers
Season: 2016

Team: 49ers
Season: 2017

Team: 49ers
Season: 2018

Team: 49ers
Season: 2019

Team: 49ers
Season: 2020

Team: 49ers
Season: 2021

Team: 49ers
Season: 2022

Team: 49ers
Season: 2023

Team: Bears
Season: 2012

Team: Bears
Season: 2013

Team: Bears
Season: 2014

Team: Bears
Season: 2015

Team: Bears
Season: 2016

Team: Bears
Season: 2017

Team: Bears
Season: 2018

Team: Bears
Season: 2019

Team: Bears
Season: 2020

Team: Bears
Season: 2021

Team: Bears
Season: 2022

Team: Bears
Season: 2023

Team: Bengals
Season: 2012

Team: Bengals
Season: 2013

Team: Bengals
Season: 2014

Team: Bengals
Season: 2015

Team: Bengals
Season: 2016

Team: Bengals
Season: 2017

Team: Bengals
Season: 2018

Team: Bengals
Season: 2019

Team: Bengals
Season: 2020

Team: Bengals
Season: 2021

Team: Bengals
Season: 2022

Team: Bengals
Season: 2023

Team: Bills
Season: 2012

Team: Bills
Se

In [17]:
import pickle

# Specify the filename
filename = 'team-dfs-dict.pkl'

# Open a file for writing
with open(filename, 'wb') as file:
    # Serialize the dictionary and write it to the file
    pickle.dump(team_dfs, file)

In [21]:
team_dfs["49ers-2012"].columns

Index(['Player', 'PF_totals', 'Yds_totals', 'Ply_totals', 'Y/P_totals',
       'TO_totals', 'FL_totals', '1stD_totals', 'Cmp_passing_totals',
       'Att_passing_totals', 'Yds_passing_totals', 'TD_passing_totals',
       'Int_passing_totals', 'NY/A_passing_totals', '1stD_passing_totals',
       'Att_rushing_totals', 'Yds_rushing_totals', 'TD_rushing_totals',
       'Y/A_rushing_totals', '1stD_rushing_totals', 'Pen_penalties',
       'Yds_penalties', '1stPy_penalties', '#Dr_drives', 'Sc%_drives',
       'TO%_drives', 'Start_average_drive', 'Time_average_drive',
       'Plays_average_drive', 'Yds_average_drive', 'Pts_average_drive',
       'total_snaps'],
      dtype='object')