In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# pip install lxml

## Import Libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

In [7]:
# Define Base Url
base_url = "https://www.nfl.com"

# Define Week
current_week = 1

# Define level we want stats for
level = "player"

## Player Stats

### Get Pages to All Tabs

In [62]:
# Request raw HTML
html = requests.get("https://www.nfl.com/stats/player-stats")

# Create a BeautifulSoup object called `soup` to traverse the combined HTML
soup = BeautifulSoup(html.content, "html.parser")

# Find all <a> elements within <li> elements
a_elements = soup.find_all('li', class_='d3-o-tabs__list-item')

# Extract the text from the <a> elements
tabs = [element.find('a').get_text() for element in a_elements]

# Extract the links (href attributes) from the <a> elements
links = [base_url + element.find('a')['href'] for element in a_elements]

# Create a dictionary to store the category names and their corresponding links
stat_pages = dict(zip(tabs, links))

# Print the dictionary as a table-like format using Pandas
stat_pages_df = pd.DataFrame(list(stat_pages.items()), columns=['Category', 'Link'])

print(stat_pages_df)

           Category                                               Link
0           Passing  https://www.nfl.com/stats/player-stats/categor...
1           Rushing  https://www.nfl.com/stats/player-stats/categor...
2         Receiving  https://www.nfl.com/stats/player-stats/categor...
3           Fumbles  https://www.nfl.com/stats/player-stats/categor...
4           Tackles  https://www.nfl.com/stats/player-stats/categor...
5     Interceptions  https://www.nfl.com/stats/player-stats/categor...
6       Field Goals  https://www.nfl.com/stats/player-stats/categor...
7          Kickoffs  https://www.nfl.com/stats/player-stats/categor...
8   Kickoff Returns  https://www.nfl.com/stats/player-stats/categor...
9           Punting  https://www.nfl.com/stats/player-stats/categor...
10     Punt Returns  https://www.nfl.com/stats/player-stats/categor...


In [59]:
# Create a dictionary to store the category names and their corresponding page links
category_pages = {}

# Loop through each category's link and scrape data
for index, row in stat_pages_df.iterrows():
    current_link = row['Link']
    current_stat = row['Category']
    page_count = 0  # Initialize page count
    category_pages[current_stat] = {page_count: current_link}  # Initialize the category's dictionary
    while current_link:
        # Request raw HTML for the current page
        response = requests.get(current_link)
        
        # Check if the request was successful
        if response.status_code == 200:
            # Create a BeautifulSoup object to parse the HTML
            soup = BeautifulSoup(response.content, "html.parser")

            # Scrape the data from the current page here
            # (You can add your scraping logic here)
            # Example: print(soup.title.text) to print the page title
            
            # Find the "Next Page" link
            next_page_link = soup.find('a', class_='nfl-o-table-pagination__next')
            
            if next_page_link:
                # Extract the 'href' attribute
                href = next_page_link['href']

                # Update current_link with the next page's URL
                current_link = base_url + href
                page_count += 1  # Increment page count
                category_pages[current_stat][page_count] = current_link  # Add the link to the category's dictionary
            else:
                # No more pages to scrape, exit the loop
                break
        else:
            print("Error: Unable to fetch the page.")
            break

#### Export DataFrame

In [60]:
# Combine the base directory path with the current week
directory_path = os.path.join('data', level, 'week' + str(current_week))

# Check if the directory exists
if not os.path.exists(directory_path):
    # Create the directory if it doesn't exist
    os.makedirs(directory_path)
    print(f'Directory "{directory_path}" has been created.')
else:
    print(f'Directory "{directory_path}" already exists.')

# Loop through each category in category_pages
for category, links in category_pages.items():
    # Initialize a list to store DataFrames
    dataframes = []

    # Loop through the links for the current category
    for page, link in enumerate(links):
        # Request raw HTML for the current page
        response = requests.get(links[link])

        # Check if the request was successful
        if response.status_code == 200:
            # Create a BeautifulSoup object to parse the HTML
            soup = BeautifulSoup(response.content, "html.parser")

            # Find all elements with the class 'd3-o-player-stats--detailed'
            stats = soup.find_all(attrs={"class": 'd3-o-' + level + '-stats--detailed'})

            # Initialize lists to collect data
            stat_val = []
            stat_col = []

            # Loop through each <tr> element to extract and collect the text from <td> elements
            for row in stats:
                # This gets the stat names
                header_cells = row.find_all('th')

                if len(header_cells) > 0:
                    for cell in header_cells:
                        stat_col.append(cell.get_text(strip=True))

                # This gets the stats
                data_cells = row.find_all('td')

                if len(data_cells) > 0:
                    for cell in data_cells:
                        stat_val.append(cell.get_text(strip=True))

            # Determine the number of columns in each row
            num_columns = len(stat_col) if stat_col else 1  # Use 1 if stat_col is empty

            # Split the list into rows
            rows = [stat_val[i:i + num_columns] for i in range(0, len(stat_val), num_columns)]

            # Create a DataFrame for the current category and page
            df = pd.DataFrame(rows, columns=stat_col)

            # Convert all columns except "Player" to numeric
            numeric_columns = df.columns.difference(['Player'])
            df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

            # Add the DataFrame to the list
            dataframes.append(df)
        else:
            print("Error: Unable to fetch the page.")
            break

    # Combine all DataFrames into one final DataFrame
    final_df = pd.concat(dataframes, ignore_index=True)

    # Specify the file path where you want to save the CSV file
    csv_file_path = os.path.join(directory_path, category + '.csv')

    # Export the DataFrame to a CSV file
    final_df.to_csv(csv_file_path, index=False)  # Set index=False to exclude the index column

    print(f'DataFrame for category "{category}" has been exported to {csv_file_path}')


Directory "data\player\week1" has been created.
DataFrame for category "Passing" has been exported to data\player\week1\Passing.csv
DataFrame for category "Rushing" has been exported to data\player\week1\Rushing.csv
DataFrame for category "Receiving" has been exported to data\player\week1\Receiving.csv
DataFrame for category "Fumbles" has been exported to data\player\week1\Fumbles.csv
DataFrame for category "Tackles" has been exported to data\player\week1\Tackles.csv
DataFrame for category "Interceptions" has been exported to data\player\week1\Interceptions.csv
DataFrame for category "Field Goals" has been exported to data\player\week1\Field Goals.csv
DataFrame for category "Kickoffs" has been exported to data\player\week1\Kickoffs.csv
DataFrame for category "Kickoff Returns" has been exported to data\player\week1\Kickoff Returns.csv
DataFrame for category "Punting" has been exported to data\player\week1\Punting.csv
DataFrame for category "Punt Returns" has been exported to data\player

## Team Stats

In [86]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://www.nfl.com"
current_week = 1  # Replace with the current week number

# Define level we want stats for
level = "player"  # Replace with "team" or "player" as needed

# Define unit
lst = ["offense", "defense", "special-teams"]

In [69]:
# Create a dictionary to store the links for each unit and its categories
unit_links = {}

for unit in lst:
    # Request raw HTML
    html = requests.get(base_url + "/stats/team-stats/" + unit + "/passing/2023/reg/all")

    # Create a BeautifulSoup object called `soup` to traverse the combined HTML
    soup = BeautifulSoup(html.content, "html.parser")

    # Find all <a> elements within <li> elements
    a_elements = soup.find_all('li', class_='d3-o-tabs__list-item')

    # Extract the text from the <a> elements
    tabs = [element.find('a').get_text() for element in a_elements]

    # Extract the links (href attributes) from the <a> elements
    links = [base_url + element.find('a')['href'] for element in a_elements]

    # Create a dictionary to store the category names and their corresponding links
    category_links = dict(zip(tabs, links))

    # Add the category links to the unit_links dictionary
    unit_links[unit.capitalize()] = category_links

# Print the nested dictionary
print(unit_links)


{'Offense': {'Passing': 'https://www.nfl.com/stats/team-stats/offense/passing/2023/reg/all', 'Rushing': 'https://www.nfl.com/stats/team-stats/offense/rushing/2023/reg/all', 'Receiving': 'https://www.nfl.com/stats/team-stats/offense/receiving/2023/reg/all', 'Scoring': 'https://www.nfl.com/stats/team-stats/offense/scoring/2023/reg/all', 'Downs': 'https://www.nfl.com/stats/team-stats/offense/downs/2023/reg/all'}, 'Defense': {'Passing': 'https://www.nfl.com/stats/team-stats/defense/passing/2023/reg/all', 'Rushing': 'https://www.nfl.com/stats/team-stats/defense/rushing/2023/reg/all', 'Receiving': 'https://www.nfl.com/stats/team-stats/defense/receiving/2023/reg/all', 'Scoring': 'https://www.nfl.com/stats/team-stats/defense/scoring/2023/reg/all', 'Tackles': 'https://www.nfl.com/stats/team-stats/defense/tackles/2023/reg/all', 'Downs': 'https://www.nfl.com/stats/team-stats/defense/downs/2023/reg/all', 'Fumbles': 'https://www.nfl.com/stats/team-stats/defense/fumbles/2023/reg/all', 'Interceptions

In [84]:


# Create a dictionary to store the links for each unit and its categories
unit_links = {}

# Function to process HTML content and return category links
def process_html(url):
    # Request raw HTML
    html = requests.get(url)

    # Create a BeautifulSoup object called `soup` to traverse the combined HTML
    soup = BeautifulSoup(html.content, "html.parser")

    # Find all <a> elements within <li> elements
    a_elements = soup.find_all('li', class_='d3-o-tabs__list-item')

    # Extract the text from the <a> elements
    tabs = [element.find('a').get_text() for element in a_elements]

    # Extract the links (href attributes) from the <a> elements
    links = [base_url + element.find('a')['href'] for element in a_elements]

    # Create a dictionary to store the category names and their corresponding links
    category_links = dict(zip(tabs, links))

    return category_links

# Define the URL to scrape based on the specified level
url = None  # Initialize URL to None

if level == "player":
    url = f"{base_url}/stats/player-stats"
elif level == "team":
    lst = ["offense", "defense", "special-teams"]
else:
    print("Invalid level specified.")

# Process the URL and build the unit_links dictionary
if url:
    unit_links = process_html(url)
    
    # Print the dictionary as a table-like format using Pandas
    stat_pages_df = pd.DataFrame(list(unit_links.items()), columns=['Category', 'Link'])
    print(stat_pages_df)

if level == "team":
    for unit in lst:
        # Define the URL for the current unit
        url = f"{base_url}/stats/team-stats/{unit}/passing/2023/reg/all"
        category_links = process_html(url)

        # Add the category links to the unit_links dictionary
        unit_links[unit.capitalize()] = category_links

    # Print the nested dictionary
    print(unit_links)


           Category                                               Link
0           Passing  https://www.nfl.com/stats/player-stats/categor...
1           Rushing  https://www.nfl.com/stats/player-stats/categor...
2         Receiving  https://www.nfl.com/stats/player-stats/categor...
3           Fumbles  https://www.nfl.com/stats/player-stats/categor...
4           Tackles  https://www.nfl.com/stats/player-stats/categor...
5     Interceptions  https://www.nfl.com/stats/player-stats/categor...
6       Field Goals  https://www.nfl.com/stats/player-stats/categor...
7          Kickoffs  https://www.nfl.com/stats/player-stats/categor...
8   Kickoff Returns  https://www.nfl.com/stats/player-stats/categor...
9           Punting  https://www.nfl.com/stats/player-stats/categor...
10     Punt Returns  https://www.nfl.com/stats/player-stats/categor...


In [61]:
# Combine the base directory path with the current week
directory_path = os.path.join('data', level, 'week' + str(current_week))

# Check if the directory exists
if not os.path.exists(directory_path):
    # Create the directory if it doesn't exist
    os.makedirs(directory_path)
    print(f'Directory "{directory_path}" has been created.')
else:
    print(f'Directory "{directory_path}" already exists.')

# Loop through each unit in unit_links
for unit, categories in unit_links.items():
    # Create a subdirectory for the current unit
    unit_directory_path = os.path.join(directory_path, unit)
    
    if not os.path.exists(unit_directory_path):
        os.makedirs(unit_directory_path)

    # Loop through the categories for the current unit
    for category, link in categories.items():
        # Request raw HTML for the current page
        response = requests.get(link)

        # Check if the request was successful
        if response.status_code == 200:
            # Create a BeautifulSoup object to parse the HTML
            soup = BeautifulSoup(response.content, "html.parser")

            # Find all elements with the class 'd3-o-player-stats--detailed'
            stats = soup.find_all(attrs={"class": 'd3-o-' + level + '-stats--detailed'})

            # Initialize lists to collect data
            stat_val = []
            stat_col = []

            # Loop through each <tr> element to extract and collect the text from <td> elements
            for row in stats:
                # This gets the stat names
                header_cells = row.find_all('th')

                if len(header_cells) > 0:
                    for cell in header_cells:
                        stat_col.append(cell.get_text(strip=True))

                # This gets the stats
                data_cells = row.find_all('td')

                if len(data_cells) > 0:
                    for cell in data_cells:
                        stat_val.append(cell.get_text(strip=True))

            # Determine the number of columns in each row
            num_columns = len(stat_col) if stat_col else 1  # Use 1 if stat_col is empty
            
            # Split the list into rows
            rows = [stat_val[i:i + num_columns] for i in range(0, len(stat_val), num_columns)]

            # Create a DataFrame for the current category
            df = pd.DataFrame(rows, columns=stat_col)

            # Convert all columns except "Team" to numeric
            numeric_columns = df.columns.difference(['Team'])
            df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

            # Check if the DataFrame has a "Team" column before attempting to remove the duplicated part
            if 'Team' in df.columns:
                # Remove duplicated part from the "Team" column
                df['Team'] = df['Team'].apply(lambda x: x[:len(x)//2])

            # Specify the file path within the unit's directory
            csv_file_path = os.path.join(unit_directory_path, category + '.csv')

            # Export the DataFrame to a CSV file
            df.to_csv(csv_file_path, index=False)  # Set index=False to exclude the index column

            print(f'DataFrame for category "{category}" in unit "{unit}" has been exported to {csv_file_path}')
        else:
            print(f"Error: Unable to fetch the page for category '{category}' in unit '{unit}'.")



Directory "data\team\week1" has been created.
DataFrame for category "Passing" in unit "Offense" has been exported to data\team\week1\Offense\Passing.csv
DataFrame for category "Rushing" in unit "Offense" has been exported to data\team\week1\Offense\Rushing.csv
DataFrame for category "Receiving" in unit "Offense" has been exported to data\team\week1\Offense\Receiving.csv
DataFrame for category "Scoring" in unit "Offense" has been exported to data\team\week1\Offense\Scoring.csv
DataFrame for category "Downs" in unit "Offense" has been exported to data\team\week1\Offense\Downs.csv
DataFrame for category "Passing" in unit "Defense" has been exported to data\team\week1\Defense\Passing.csv
DataFrame for category "Rushing" in unit "Defense" has been exported to data\team\week1\Defense\Rushing.csv
DataFrame for category "Receiving" in unit "Defense" has been exported to data\team\week1\Defense\Receiving.csv
DataFrame for category "Scoring" in unit "Defense" has been exported to data\team\week

In [None]:
import ipywidgets as widgets

widgets.Dropdown(
    options=[('One', 1), ('Two', 2), ('Three', 3)],
    value=2,
    description='Number:',
)

Checkbox(value=False, description='Check me', indent=False)