In [7]:
stats_url = "https://www.pro-football-reference.com/years/2024/"

In [None]:
import requests
import pandas as pd
import time
from io import StringIO
from bs4 import BeautifulSoup

In [9]:
def getTeamUrls(stats_url):
    start = time.time()
    data = requests.get(stats_url)
    delay = time.time() - start
    time.sleep(2.423*delay)

    soup = BeautifulSoup(data.text)
    afc_standings = soup.select('table.stats_table')
    afc_team_links = []
    for tag in afc_standings:
        links = tag.find_all('a')
        afc_team_links.extend(links)
    afc_team_links = [link.get("href") for link in afc_team_links]
    
    
    
    nfc_standings = soup.select('#NFC')
    nfc_team_links = []
    for tag in nfc_standings:
        links = tag.find_all('a')
        nfc_team_links.extend(links)
    nfc_team_links = [link.get("href") for link in nfc_team_links]
    team_links = afc_team_links + nfc_team_links
    team_urls = [f"https://pro-football-reference.com{link}" for link in team_links]
    return team_urls

In [10]:
#Making pandas database out of Game Results and Basic Stats
def makeTeamDb (team_url):
    data = requests.get(team_url)
    
    matches = pd.read_html(StringIO(data.text), match = "Schedule & Game Results Table")[0]
    
    #Adding names for unnamed columns manually
    matches = matches.rename(columns={'Unnamed: 3_level_1' : 'Time', 'Unnamed: 4_level_1' : 'Game Link', 'Unnamed: 5_level_1' : 'Result',
                                      'Unnamed: 8_level_1' : 'Home/Away'},)
    
    
    #Renaming columns 
    new_columns = []
    for column in matches.columns:
        if column[0] == 'Offense':
            new_columns.append('Off' + column[1])
                
        elif column[0] == 'Defense':
            new_columns.append('Def' + column[1])
    
        elif column[0] == 'Expected Points':
            new_columns.append('EP' + column[1][:3])
    
        elif column[0] == 'Score':
            new_columns.append(column[1] + 'Sc')
    
        else:
            new_columns.append(column[1])
    
    matches.columns = new_columns
    
    
    #Fixing empty data values
    matches['OT'] = matches['OT'].apply(lambda x : 'N' if pd.isna(x) or x=='N' else 'Y')
    matches['Result'] = matches['Result'].apply(lambda x : '' if pd.isna(x) or x=='' else x)
    matches['Rec'] = matches['Rec'].apply(lambda x : '' if pd.isna(x) or x=='' else x)
    matches['Home/Away'] = matches['Home/Away'].apply(lambda x : 'H' if pd.isna(x) or x=='H' else 'A')
    
    #Fixing empty data numbers to 0.0
    for column in matches.columns[13:]:
        matches[column] = matches[column].apply(lambda x : '0.0' if pd.isna(x) or x=='0.0' else x)
    
    #Fixing links to more detailed game stats
    data = requests.get(team_url)
    soup = BeautifulSoup(data.text)
    game_table = soup.select('#games')
    game_links = []
    for tag in game_table:
        links = tag.find_all('a')
        game_links.extend(links)
    
    game_links = [link.get("href") for link in game_links]
    game_links = [link for link in game_links if '/boxscores' in link]
    game_links = [f"https://pro-football-reference.com{link}" for link in game_links]
    bye_week = 0
    while (matches['Opp'][bye_week] != 'Bye Week'):
        bye_week+=1
    game_links.insert(bye_week, '')
    matches['Game Link'] = game_links
    return matches

In [11]:
years = list(range(2022,2020, -1))