In [39]:
stats_url = "https://www.pro-football-reference.com/years/2024/#all_team_scoring"

In [40]:
import requests
import pandas as pd


In [41]:
data = requests.get(stats_url)
from bs4 import BeautifulSoup
soup = BeautifulSoup(data.text)

In [42]:
afc_standings = soup.select('#AFC')
afc_team_links = []
for tag in afc_standings:
    links = tag.find_all('a')
    afc_team_links.extend(links)
afc_team_links = [link.get("href") for link in afc_team_links]

In [43]:
nfc_standings = soup.select('#NFC')
nfc_team_links = []
for tag in nfc_standings:
    links = tag.find_all('a')
    nfc_team_links.extend(links)
nfc_team_links = [link.get("href") for link in nfc_team_links]

In [44]:
team_links = afc_team_links + nfc_team_links

In [45]:
team_urls = [f"https://pro-football-reference.com{link}" for link in team_links]

In [46]:
team_url = team_urls[0]
data = requests.get(team_url)

In [47]:
#Making pandas database out of Game Results and Basic Stats

from io import StringIO
matches = pd.read_html(StringIO(data.text), match = "Schedule & Game Results Table")[0]


#Adding names for unnamed columns manually
matches = matches.rename(columns={'Unnamed: 3_level_1' : 'Time', 'Unnamed: 4_level_1' : 'Game Link', 'Unnamed: 5_level_1' : 'Result',
                                  'Unnamed: 8_level_1' : 'Home/Away'},)


#Renaming columns 
new_columns = []
for column in matches.columns:
    if column[0] == 'Offense':
        new_columns.append('Off' + column[1])
            
    elif column[0] == 'Defense':
        new_columns.append('Def' + column[1])

    elif column[0] == 'Expected Points':
        new_columns.append('EP' + column[1][:3])

    elif column[0] == 'Score':
        new_columns.append(column[1] + 'Sc')

    else:
        new_columns.append(column[1])

matches.columns = new_columns


In [53]:
#Fixing empty data values
matches['OT'] = matches['OT'].apply(lambda x : 'N' if pd.isna(x) or x=='N' else 'Y')
matches['Result'] = matches['Result'].apply(lambda x : '' if pd.isna(x) or x=='' else x)
matches['Rec'] = matches['Rec'].apply(lambda x : '' if pd.isna(x) or x=='' else x)
matches['Home/Away'] = matches['Home/Away'].apply(lambda x : 'H' if pd.isna(x) or x=='H' else 'A')

#Fixing empty data numbers to 0.0
for column in matches.columns[13:]:
    matches[column] = matches[column].apply(lambda x : '0.0' if pd.isna(x) or x=='0.0' else x)

#Fixing links to more detailed game stats
data = requests.get(team_url)
soup = BeautifulSoup(data.text)
game_table = soup.select('#games')
game_links = []
for tag in game_table:
    links = tag.find_all('a')
    game_links.extend(links)

game_links = [link.get("href") for link in game_links]
game_links = [link for link in game_links if '/boxscores' in link]
game_links = [f"https://pro-football-reference.com{link}" for link in game_links]
bye_week = 0
while (matches['Opp'][bye_week] != 'Bye Week'):
    bye_week+=1
game_links.insert(bye_week, '')
matches['Game Link'] = game_links
matches

Unnamed: 0,Week,Day,Date,Time,Game Link,Result,OT,Rec,Home/Away,Opp,...,OffRushY,OffTO,Def1stD,DefTotYd,DefPassY,DefRushY,DefTO,EPOff,EPDef,EPSp.
0,1,Sun,September 8,1:00PM ET,https://pro-football-reference.com/boxscores/2...,W,N,1-0,H,Arizona Cardinals,...,130.0,1.0,18.0,270.0,146.0,124.0,1.0,13.51,-3.22,-2.25
1,2,Thu,September 12,8:15PM ET,https://pro-football-reference.com/boxscores/2...,W,N,2-0,A,Miami Dolphins,...,108.0,0.0,20.0,351.0,212.0,139.0,3.0,8.59,14.11,-2.53
2,3,Mon,September 23,7:30PM ET,https://pro-football-reference.com/boxscores/2...,W,N,3-0,H,Jacksonville Jaguars,...,122.0,0.0,19.0,239.0,147.0,92.0,2.0,28.39,12.44,-6.15
3,4,Sun,September 29,8:20PM ET,https://pro-football-reference.com/boxscores/2...,L,N,3-1,A,Baltimore Ravens,...,81.0,1.0,22.0,427.0,156.0,271.0,1.0,-6.92,-19.69,2.37
4,5,Sun,October 6,1:00PM ET,https://pro-football-reference.com/boxscores/2...,L,N,3-2,A,Houston Texans,...,150.0,0.0,18.0,425.0,331.0,94.0,2.0,-1.59,0.68,-2.09
5,6,Mon,October 14,8:15PM ET,https://pro-football-reference.com/boxscores/2...,W,N,4-2,A,New York Jets,...,149.0,0.0,20.0,393.0,272.0,121.0,1.0,14.9,-13.81,0.34
6,7,Sun,October 20,1:00PM ET,https://pro-football-reference.com/boxscores/2...,W,N,5-2,H,Tennessee Titans,...,74.0,0.0,18.0,289.0,200.0,89.0,2.0,15.86,12.71,-4.4
7,8,Sun,October 27,4:05PM ET,https://pro-football-reference.com/boxscores/2...,W,N,6-2,A,Seattle Seahawks,...,164.0,1.0,17.0,233.0,201.0,32.0,2.0,16.41,8.68,-2.1
8,9,Sun,November 3,1:00PM ET,https://pro-football-reference.com/boxscores/2...,W,N,7-2,H,Miami Dolphins,...,94.0,1.0,26.0,373.0,224.0,149.0,1.0,13.82,-15.8,6.12
9,10,Sun,November 10,1:00PM ET,https://pro-football-reference.com/boxscores/2...,W,N,8-2,A,Indianapolis Colts,...,135.0,2.0,19.0,361.0,240.0,121.0,4.0,4.88,6.55,-1.17
