In [105]:
from bs4 import BeautifulSoup as bs
from requests import get
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 
import numpy as np

# Parse HTML

url = "https://www.nfl.com/stats/player-stats/category/passing/2023/post/all/passingyards/desc"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}
page = get(url, headers=headers)
soup = bs(page.content, 'html.parser')
print(soup.prettify())

<!DOCTYPE html>
<html dir="ltr" lang="en-US">
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <link href="https://www.nfl.com/stats/player-stats/category/passing/2023/post/all/passingyards/desc" rel="canonical"/>
  <link href="//nflenterprises.tt.omtrdc.net" rel="dns-prefetch"/>
  <link href="//securepubads.g.doubleclick.net" rel="preconnect"/>
  <link href="//cdn.onesignal.com" rel="preconnect"/>
  <link href="//assets.adobedtm.com" rel="preconnect"/>
  <title>
   2023 NFL passing stats - Players | NFL.com
  </title>
  <meta content="See the passing statistics by player for the 2023 NFL season. See NFL player statistics across every major statistical category." name="description"/>
  <meta content="" name="keywords"/>
  <meta content="NFL.com | Official Site of the National Football League" property="og:title"/>
  <meta content="The official source for NFL ne

In [62]:
# This is the table being used to create CSV

table = soup.find('table')
print(table)

<table class="d3-o-table d3-o-table--detailed d3-o-player-stats--detailed d3-o-table--sortable">
<caption class="d3-o-table__caption">
<span class="d3-o-table__caption-value">Aggregated players statistics</span>
</caption>
<thead>
<tr>
<th aria-sort="none" class="header header--no-hover">Player</th>
<th aria-sort="descending" class="header headerSort" scope="col">
<a href="/stats/player-stats/category/passing/2023/REG/all/passingyards/asc">Pass Yds</a>
</th>
<th aria-sort="none" class="header" scope="col">
<a href="/stats/player-stats/category/passing/2023/REG/all/passingaverageyards/DESC">Yds/Att</a>
</th>
<th aria-sort="none" class="header" scope="col">
<a href="/stats/player-stats/category/passing/2023/REG/all/passingattempts/DESC">Att</a>
</th>
<th aria-sort="none" class="header" scope="col">
<a href="/stats/player-stats/category/passing/2023/REG/all/passingcompletions/DESC">Cmp</a>
</th>
<th aria-sort="none" class="header" scope="col">
<a href="/stats/player-stats/category/passing

In [100]:
# Creating a list of columns

column_headers = table.find('thead').find_all('a')
Columns = ([headers.get_text() for headers in column_headers])
Columns.insert(0, 'Player')
print(Columns)

['Player', 'Pass Yds', 'Yds/Att', 'Att', 'Cmp', 'Cmp %', 'TD', 'INT', 'Rate', '1st', '1st%', '20+', '40+', 'Lng', 'Sck', 'SckY']


In [85]:
# Creating rows of data from Table

rows = table.find('tbody').find_all('tr')

# Get Player stats
player_stats = []

for row in rows:
    stats = []
    cell = row.find_all('td')
    for stat in range(len(cell)):
        stats.append(cell[stat].get_text(strip=True))
    player_stats.append(stats)

print(player_stats)


[['Tua Tagovailoa', '4624', '8.3', '560', '388', '69.3', '29', '14', '101.1', '222', '39.6', '58', '11', '78', '29', '171'], ['Jared Goff', '4575', '7.6', '605', '407', '67.3', '30', '12', '97.9', '227', '37.5', '69', '9', '70', '30', '197'], ['Dak Prescott', '4516', '7.6', '590', '410', '69.5', '36', '9', '105.9', '222', '37.6', '62', '7', '92', '39', '255'], ['Josh Allen', '4306', '7.4', '579', '385', '66.5', '29', '18', '92.2', '199', '34.4', '49', '9', '81', '24', '152'], ['Brock Purdy', '4280', '9.6', '444', '308', '69.4', '31', '11', '113', '192', '43.2', '72', '14', '76', '28', '153'], ['Patrick Mahomes', '4183', '7', '597', '401', '67.2', '27', '14', '92.6', '206', '34.5', '50', '8', '67', '27', '186'], ['Jordan Love', '4159', '7.2', '579', '372', '64.2', '32', '11', '96.1', '209', '36.1', '55', '7', '77', '30', '242'], ['C.J. Stroud', '4108', '8.2', '499', '319', '63.9', '23', '5', '100.8', '188', '37.7', '66', '9', '75', '38', '331'], ['Baker Mayfield', '4044', '7.1', '566', 

In [102]:
# Create a CSV and Pandas dataframe from lists

df = pd.DataFrame(player_stats, columns=Columns)
df.to_csv('NFLStats.csv')
df.head()

Unnamed: 0,Player,Pass Yds,Yds/Att,Att,Cmp,Cmp %,TD,INT,Rate,1st,1st%,20+,40+,Lng,Sck,SckY
0,Tua Tagovailoa,4624,8.3,560,388,69.3,29,14,101.1,222,39.6,58,11,78,29,171
1,Jared Goff,4575,7.6,605,407,67.3,30,12,97.9,227,37.5,69,9,70,30,197
2,Dak Prescott,4516,7.6,590,410,69.5,36,9,105.9,222,37.6,62,7,92,39,255
3,Josh Allen,4306,7.4,579,385,66.5,29,18,92.2,199,34.4,49,9,81,24,152
4,Brock Purdy,4280,9.6,444,308,69.4,31,11,113.0,192,43.2,72,14,76,28,153


In [103]:
# Note, this is just passing stats
# Let's wrap this all into a function so we can create CSVs from other stats

def getStats(url):
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}
    page = get(url, headers=headers)
    soup = bs(page.content, 'html.parser')

    category = soup.find('h2', class_='nfl-c-content-header__roofline').get_text(strip=True)
    table = soup.find('table')
    
    column_headers = table.find('thead').find_all('a')
    Columns = ([headers.get_text() for headers in column_headers])
    Columns.insert(0, 'Player')

    player_stats = []

    rows = table.find('tbody').find_all('tr')

    for row in rows:
        stats = []
        cell = row.find_all('td')
        for stat in range(len(cell)):
            stats.append(cell[stat].get_text(strip=True))
        player_stats.append(stats)

    df = pd.DataFrame(player_stats, columns=Columns)
    df.to_csv('NFL{}Stats.csv'.format(category).replace(" ", ""))

In [134]:
# Creating CSVs for all player stat categories

categories = soup.find_all('li', class_='d3-o-tabs__list-item')

for category in categories:
    link = f"{"https://www.nfl.com"}{category.find('a', href=True)['href']}"
    getStats(link)