In [26]:
# Importing packages needed for scrape
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import time
from io import StringIO

In [28]:
# Code to scrape a single seasons data
def single(season):
    """
    Scrape NFL player stats for a single season.    
    Args:
        season (int): The year to scrape data for    
    Returns:
        pandas.DataFrame: Player stats for the specified season
    """
    try:
        # Construct URL and get HTML
        url = f'https://www.pro-football-reference.com/years/{season}/scoring.htm'
        html = urlopen(url)
        soup = BeautifulSoup(html, 'html.parser')
        
        # Find all tables and convert to string for pandas
        table_html = soup.findAll('table')
        if not table_html:
            raise ValueError(f"No tables found for season {season}")
            
        # Use StringIO to handle the HTML string (fixes deprecation warning)
        html_string = StringIO(str(table_html))
        df = pd.read_html(html_string)[0]
        
        # Clean up the dataframe
        df = df.drop(columns=['Rk'])  # Updated drop syntax
        df['Player'] = df['Player'].str.replace('*', '', regex=False)  # Updated replace syntax
        df.insert(0, 'Season', season)
        
        # Convert numeric columns
        numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
        for col in numeric_cols:
            df[col] = pd.to_numeric(df[col], errors='coerce')
            
        return df
        
    except Exception as e:
        print(f"Error scraping season {season}: {str(e)}")
        return None

# Function to scrape multiple seasons of data at a time
def multiple(start_year, end_year):
    """
    Scrape NFL player stats for multiple seasons. 
    Args:
        start_year (int): First season to scrape
        end_year (int): Last season to scrape (inclusive)  
    Returns:
        pandas.DataFrame: Combined player stats for all seasons
    """
    dfs = []
    current_year = start_year
    
    while current_year <= end_year:
        print(f"Scraping season {current_year}...")
        df = single(current_year)
        
        if df is not None:
            dfs.append(df)
        else:
            print(f"Skipping season {current_year} due to error")
            
        time.sleep(4)  # Respect rate limiting
        current_year += 1
    
    if not dfs:
        raise ValueError("No data was successfully scraped")
        
    # Use concat instead of append (append is deprecated)
    final_df = pd.concat(dfs, ignore_index=True)
    
    # Remove any rows where 'Player' column contains column headers
    final_df = final_df[final_df['Player'] != 'Player']
    
    return final_df

In [52]:
# Calling for NFL seasons (minimum 1922, maximum 2022)
year_start = 2015
year_end = 2022

df = multiple(year_start, year_end)

Scraping season 2015...
Scraping season 2016...
Scraping season 2017...
Scraping season 2018...
Scraping season 2019...
Scraping season 2020...
Scraping season 2021...
Scraping season 2022...


In [54]:
# Quick check to ensure the start and end years were included in the table
df

Unnamed: 0,Season,Player,Tm,Age,Pos,G,GS,RshTD,RecTD,PR TD,...,2PM,2PA,D2P,XPM,XPA,FGM,FGA,Sfty,Pts,Pts/G
0,2015,Stephen Gostkowski+,NWE,31,K,16,0,,,,...,,,,52,52,33,36,,151,9.4
1,2015,Graham Gano,CAR,28,K,16,0,,,,...,,,,56,59,30,36,,146,9.1
2,2015,Chandler Catanzaro,ARI,24,K,16,0,,,,...,,,,53,58,28,31,,137,8.6
3,2015,Blair Walsh,MIN,25,K,16,0,,,,...,,,,33,37,34,39,,135,8.4
4,2015,Josh Brown,NYG,36,K,16,0,,,,...,,,,44,45,30,32,,134,8.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3929,2022,Mike Purcell,DEN,31,NT,17,4,,,,...,,,0,,,,,1,2,0.1
3930,2022,Mike Thomas,CIN,28,WR,10,2,,,,...,1,,0,,,,,,2,0.2
3931,2022,Trevon Wesco,CHI,27,TE,14,1,,,,...,1,,0,,,,,,2,0.1
3932,2022,Justin Reid,KAN,25,FS,17,17,,,,...,,,0,1,2,,,,1,0.1


In [56]:
# Removal of any rows containing the column names
df = df[df['Player']!='Player']

In [58]:
# Exporting df as csv
df.to_csv(f'NFL Player Stats({year_start} - {year_end}).csv', index = True)
print("Data successfully scraped and saved!")

Data successfully scraped and saved!
