In [65]:
# 3rd Party Libraries
from bs4 import BeautifulSoup
from colored import Fore, Back, Style
from dotenv import load_dotenv
import pandas as pd
import requests

# Built in libraries
from time import gmtime, strftime
from typing import List, Set, Dict, Tuple, Optional

In [66]:
BASE_URL = 'https://www.pro-football-reference.com'

# Complete URL for the 2023 year page
standings_stats_url = f"{BASE_URL}/years/2023/"

response = requests.get(standings_stats_url)

# Return status code if request is successful (200)
def request_status(url: str) -> bool:
    if response.status_code == 200:
        return response.status_code
    else:
        print(f'{Fore.white}{Back.red}False{Style.reset}')
        return response.status_code

In [67]:
# Function to create a BeautifulSoup object if the request is successful
def make_soup(url: str) -> BeautifulSoup:
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return BeautifulSoup(response.text, 'html.parser')
    except requests.RequestException as e:
        print(f"Error fetching {url}: {e}")
    return None

soup = make_soup(standings_stats_url)

In [68]:
# Function to extract team URLs from a specific division
def gather_team_stats(teams: List[str]) -> None:
    for t in teams:
        soup = make_soup(t)
        if soup:
            table = soup.find('table', id='team_stats')
            if table:
                print(table)
            else:
                print(f"Table not found in {t}")
        else:
            print(f"Failed to get data for {t}")

afc_teams_urls = get_team_urls('AFC')
nfc_teams_urls = get_team_urls('NFC')

In [69]:
afc_teams_urls

['https://www.pro-football-reference.com/teams/buf/2023.htm',
 'https://www.pro-football-reference.com/teams/mia/2023.htm',
 'https://www.pro-football-reference.com/teams/nyj/2023.htm',
 'https://www.pro-football-reference.com/teams/nwe/2023.htm',
 'https://www.pro-football-reference.com/teams/rav/2023.htm',
 'https://www.pro-football-reference.com/teams/cle/2023.htm',
 'https://www.pro-football-reference.com/teams/pit/2023.htm',
 'https://www.pro-football-reference.com/teams/cin/2023.htm',
 'https://www.pro-football-reference.com/teams/htx/2023.htm',
 'https://www.pro-football-reference.com/teams/jax/2023.htm',
 'https://www.pro-football-reference.com/teams/clt/2023.htm',
 'https://www.pro-football-reference.com/teams/oti/2023.htm',
 'https://www.pro-football-reference.com/teams/kan/2023.htm',
 'https://www.pro-football-reference.com/teams/rai/2023.htm',
 'https://www.pro-football-reference.com/teams/den/2023.htm',
 'https://www.pro-football-reference.com/teams/sdg/2023.htm']

## Not returning proper data frame

In [80]:
def gather_team_stats(teams: List[str]) -> None:
    for t in teams:
        soup = make_soup(t)
        if soup:
            table = soup.find('table', id='team_stats')
            if table:
                # Convert the table HTML to a DataFrame
                stats_df = pd.read_html(str(table), index_col=0)[0]  # [0] because pd.read_html returns a list of DataFrames
                print(stats_df)
            else:
                print(f"Table not found in {t}")
        else:
            print(f"Failed to get data for {t}")

In [81]:
afc_team_stats = gather_team_stats(afc_teams_urls[:1])

                Unnamed: 1_level_0 Unnamed: 2_level_0 Tot Yds & TO           \
Player                          PF                Yds          Ply  Y/P  TO   
Team Stats                     451               6366       1115.0  5.7  28   
Opp. Stats                     311               5222       1015.0  5.1  30   
Lg Rank Offense                  6                  4          NaN  NaN  23   
Lg Rank Defense                  4                  9          NaN  NaN   3   

                Unnamed: 6_level_0 Unnamed: 7_level_0 Passing             ...  \
Player                          FL               1stD     Cmp  Att   Yds  ...   
Team Stats                      10                381   385.0  579  4154  ...   
Opp. Stats                      12                313   363.0  552  3342  ...   
Lg Rank Offense                 16                  3     NaN   16     8  ...   
Lg Rank Defense                  5                 11     NaN    8     7  ...   

                Penalties       Unname

  stats_df = pd.read_html(str(table), index_col=0)[0]  # [0] because pd.read_html returns a list of DataFrames
