In [1]:
## The number of 20+ yards rushes for each RB
# ESPN API returns cumulative results (not week-by-week)

In [2]:
import pandas as pd
import requests
from IPython.display import display, HTML

# Set Pandas options to display all columns in a single row without wrapping
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 1000)        # Set the display width to a large number

In [3]:
# ESPN import json files 
def fetch_rb_data_for_years(years, season_types, limit=50):
    base_url = "https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/statistics/byathlete"
    all_data = []
    
    # Manually define the column names based on the expected order in 'displayNames'
    manual_column_display_names = [
        'Rushing Attempts','Rushing Yards','Yards per Rush','Longest Rush','20plus Yard Rushes',
        'Rushing Touchdowns','Rushing Yards per Game','Rushing Fumbles','Rushing Fumbles Lost',
        'Rushing First Downs'
    ]
    for year in years:
        for season_type in season_types:
            page = 1
            while True:
                # Construct the URL for each API request
                url = (
                    f"{base_url}?region=us&lang=en&contentorigin=espn&isqualified=false"
                    f"&page={page}&limit={limit}&category=offense:rushing"
                    f"&sort=rushing.rushingYards:desc&season={year}&seasontype={season_type}"
                )
                
                try:
                    # Fetch the JSON data from the URL
                    response = requests.get(url)
                    response.raise_for_status()  # Raise HTTPError for bad responses
                    data = response.json()
                except requests.exceptions.RequestException as e:
                    print(f"Request failed: {e}")
                    break  # Exit the loop if request fails

                if not data.get("athletes"):
                    break  # Exit the loop if there are no more athletes

                # Iterate over each athlete
                for athlete_info in data['athletes']:
                    athlete = athlete_info.get('athlete', {})
                    player_data = {
                        'season': year,
                        'seasonType': season_type,
                        'firstName': athlete.get('firstName', 'N/A'),
                        'lastName': athlete.get('lastName', 'N/A'),
                        'position': athlete.get('position', {}).get('displayName', 'Unknown'),
                        'teamName': athlete.get('teamName', 'Unknown')
                    }

                    # Iterate over each category for the athlete
                    for category in athlete_info.get('categories', []):
                        if category.get('name') == "rushing":  # Only interested in rushing stats
                            category_values = category.get('totals', [])
                            
                            # Ensure that 'totals' exist and match the manual column display names
                            if category_values and len(category_values) == len(manual_column_display_names):
                                for display_name, value in zip(manual_column_display_names, category_values):
                                    player_data[display_name] = value  # Use the totals array for integers

                    # Append the player's data to the list
                    all_data.append(player_data)

                # Move to the next page
                page += 1

    # Convert the list of dictionaries to a DataFrame
    all_rb_data = pd.DataFrame(all_data)

    # Define the desired column order based on display names
    column_order = [
        'season', 'seasonType', 'firstName', 'lastName', 'position', 'teamName',
        'Rushing Attempts','Rushing Yards','Yards per Rush','Longest Rush','20plus Yard Rushes',
        'Rushing Touchdowns','Rushing Yards per Game','Rushing Fumbles','Rushing Fumbles Lost',
        'Rushing First Downs'
    ]
        
    # Reorder and select columns that exist in the DataFrame
    existing_columns = [col for col in column_order if col in all_rb_data.columns]
    all_rb_data = all_rb_data[existing_columns]
    
    return all_rb_data

In [4]:
# Specify the years and season types you want to fetch data for
years = [2024, 2023, 2022, 2021, 2020]

# season_types = [2, 3]  # 2 for regular season and 3 for playoffs

season_types = [2]  # 2 for regular season only
# season_types = [3]  # 3 for playoffs only


In [5]:
# return all rb stats from the ESPN API
def fetch_cumulative_rb_data():

    # Fetch data for the specified years and season types
    rb_data = fetch_rb_data_for_years(years, season_types)
        
    # Display the filtered DataFrame
    display(HTML(rb_data.to_html(notebook=True, index=False)))

    # Export to CSV
    output_path = 'rb_cumulative_data.csv'
    rb_data.to_csv(output_path, index=False)
    print(f"Data has been exported to {output_path}")

fetch_cumulative_rb_data()

season,seasonType,firstName,lastName,position,teamName,Rushing Attempts,Rushing Yards,Yards per Rush,Longest Rush,20plus Yard Rushes,Rushing Touchdowns,Rushing Yards per Game,Rushing Fumbles,Rushing Fumbles Lost,Rushing First Downs
2024,2,J.K.,Dobbins,Running Back,Chargers,27,266,9.9,61,3,2,133.0,0,0,11
2024,2,Jordan,Mason,Running Back,49ers,48,247,5.1,24,2,2,123.5,1,0,12
2024,2,Josh,Jacobs,Running Back,Packers,48,235,4.9,34,3,0,117.5,2,1,12
2024,2,Rhamondre,Stevenson,Running Back,Patriots,52,224,4.3,18,0,2,74.7,3,1,11
2024,2,Saquon,Barkley,Running Back,Eagles,46,204,4.4,34,1,2,102.0,0,0,9
2024,2,Alvin,Kamara,Running Back,Saints,35,198,5.7,17,0,4,99.0,0,0,13
2024,2,Joe,Mixon,Running Back,Texans,39,184,4.7,16,0,1,92.0,0,0,10
2024,2,Brian,Robinson Jr.,Running Back,Commanders,29,173,6.0,40,2,1,86.5,0,0,9
2024,2,James,Conner,Running Back,Cardinals,37,172,4.6,22,2,2,86.0,1,0,13
2024,2,Breece,Hall,Running Back,Jets,46,170,3.7,30,1,2,56.7,1,0,9


Data has been exported to rb_cumulative_data.csv


In [6]:
# return all rb stats with perct. of 20plus rushes
def rb_all_20plus_runs():
    # Fetch data for the specified years and season types
    rb_data = fetch_rb_data_for_years(years, season_types)
    
    # Ensure that the columns are numeric
    rb_data['20plus Yard Rushes'] = pd.to_numeric(rb_data['20plus Yard Rushes'], errors='coerce')
    rb_data['Rushing Attempts'] = pd.to_numeric(rb_data['Rushing Attempts'], errors='coerce')
    
    # Drop the specified columns
    columns_to_remove = ['Rushing Touchdowns', 'Rushing Fumbles', 'Rushing Fumbles Lost', 'Rushing First Downs']
    rb_data = rb_data.drop(columns=columns_to_remove, errors='ignore')
    
    # Calculate the percentage of rushes resulting in 20+ yard gains, rounded to two decimal places
    rb_data['20plus_pct'] = (rb_data['20plus Yard Rushes'] / rb_data['Rushing Attempts']).round(2)
    
    # Create a new column that checks if 20% or more of rush attempts resulted in 20+ yards
    rb_data['is_20percent_explosive'] = (rb_data['20plus Yard Rushes'] / rb_data['Rushing Attempts']) >= 0.20
    
    # Display the full DataFrame with all RBs and the percentage column
    display(HTML(rb_data.to_html(notebook=True, index=False)))
    
    # Export the full DataFrame to CSV
    output_path = 'rb_all_20plus_runs.csv'
    rb_data.to_csv(output_path, index=False)
    print(f"Explosive RB data has been exported to {output_path}")
    
    # Return the full DataFrame with the new column and removed columns
    return rb_data

rb_all_20plus_runs()


season,seasonType,firstName,lastName,position,teamName,Rushing Attempts,Rushing Yards,Yards per Rush,Longest Rush,20plus Yard Rushes,Rushing Yards per Game,20plus_pct,is_20percent_explosive
2024,2,J.K.,Dobbins,Running Back,Chargers,27,266,9.9,61,3,133.0,0.11,False
2024,2,Jordan,Mason,Running Back,49ers,48,247,5.1,24,2,123.5,0.04,False
2024,2,Josh,Jacobs,Running Back,Packers,48,235,4.9,34,3,117.5,0.06,False
2024,2,Rhamondre,Stevenson,Running Back,Patriots,52,224,4.3,18,0,74.7,0.0,False
2024,2,Saquon,Barkley,Running Back,Eagles,46,204,4.4,34,1,102.0,0.02,False
2024,2,Alvin,Kamara,Running Back,Saints,35,198,5.7,17,0,99.0,0.0,False
2024,2,Joe,Mixon,Running Back,Texans,39,184,4.7,16,0,92.0,0.0,False
2024,2,Brian,Robinson Jr.,Running Back,Commanders,29,173,6.0,40,2,86.5,0.07,False
2024,2,James,Conner,Running Back,Cardinals,37,172,4.6,22,2,86.0,0.05,False
2024,2,Breece,Hall,Running Back,Jets,46,170,3.7,30,1,56.7,0.02,False


Explosive RB data has been exported to rb_all_20plus_runs.csv


Unnamed: 0,season,seasonType,firstName,lastName,position,teamName,Rushing Attempts,Rushing Yards,Yards per Rush,Longest Rush,20plus Yard Rushes,Rushing Yards per Game,20plus_pct,is_20percent_explosive
0,2024,2,J.K.,Dobbins,Running Back,Chargers,27,266,9.9,61,3,133.0,0.11,False
1,2024,2,Jordan,Mason,Running Back,49ers,48,247,5.1,24,2,123.5,0.04,False
2,2024,2,Josh,Jacobs,Running Back,Packers,48,235,4.9,34,3,117.5,0.06,False
3,2024,2,Rhamondre,Stevenson,Running Back,Patriots,52,224,4.3,18,0,74.7,0.00,False
4,2024,2,Saquon,Barkley,Running Back,Eagles,46,204,4.4,34,1,102.0,0.02,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1554,2020,2,Philip,Rivers,Quarterback,Colts,18,-8,-0.4,3,0,-0.5,0.00,False
1555,2020,2,Tim,Boyle,Quarterback,Packers,13,-9,-0.7,0,0,-1.1,0.00,False
1556,2020,2,Cedrick,Wilson Jr.,Wide Receiver,Cowboys,3,-12,-4.0,-1,0,-0.8,0.00,False
1557,2020,2,Adrian,Killins,Running Back,Eagles,1,-12,-12.0,-12,0,-12.0,0.00,False


In [7]:
def rb_10percent_20plus_runs():
    # Fetch data for the specified years and season types
    rb_data = fetch_rb_data_for_years(years, season_types)
    
    # Ensure that the columns are numeric
    rb_data['20plus Yard Rushes'] = pd.to_numeric(rb_data['20plus Yard Rushes'], errors='coerce')
    rb_data['Rushing Attempts'] = pd.to_numeric(rb_data['Rushing Attempts'], errors='coerce')
    
    # Drop the specified columns
    columns_to_remove = ['Rushing Touchdowns', 'Rushing Fumbles', 'Rushing Fumbles Lost', 'Rushing First Downs']
    rb_data = rb_data.drop(columns=columns_to_remove, errors='ignore')
    
    # Calculate the percentage of rushes resulting in 20+ yard gains, rounded to two decimal places
    rb_data['20plus_pct'] = (rb_data['20plus Yard Rushes'] / rb_data['Rushing Attempts']).round(2)
    
    # Filter for RBs where 10% or more of their rush attempts resulted in 20+ yards
    rbs_with_10plus_pct = rb_data[rb_data['20plus_pct'] >= 0.10]
    
    # Display the filtered DataFrame
    display(HTML(rbs_with_10plus_pct.to_html(notebook=True, index=False)))
    
    # Export the filtered DataFrame to CSV
    output_path = 'rb_10percent_20plus_runs.csv'
    rbs_with_10plus_pct.to_csv(output_path, index=False)
    print(f"Filtered RB data (10% or more 20+ yard gains) has been exported to {output_path}")
    
    # Return the filtered DataFrame
    return rbs_with_10plus_pct

rb_10percent_20plus_runs()

season,seasonType,firstName,lastName,position,teamName,Rushing Attempts,Rushing Yards,Yards per Rush,Longest Rush,20plus Yard Rushes,Rushing Yards per Game,20plus_pct
2024,2,J.K.,Dobbins,Running Back,Chargers,27,266,9.9,61,3,133.0,0.11
2024,2,Kyler,Murray,Quarterback,Cardinals,10,116,11.6,29,2,58.0,0.2
2024,2,Anthony,Richardson,Quarterback,Colts,10,93,9.3,21,1,46.5,0.1
2024,2,Will,Levis,Quarterback,Titans,8,74,9.3,21,1,37.0,0.12
2024,2,Jayden,Reed,Wide Receiver,Packers,3,70,23.3,33,2,35.0,0.67
2024,2,Bo,Nix,Quarterback,Broncos,9,60,6.7,23,1,30.0,0.11
2024,2,Caleb,Williams,Quarterback,Bears,10,59,5.9,24,1,29.5,0.1
2024,2,Trevor,Lawrence,Quarterback,Jaguars,3,53,17.7,33,1,26.5,0.33
2024,2,Emari,Demercado,Running Back,Cardinals,2,46,23.0,41,1,23.0,0.5
2024,2,Geno,Smith,Quarterback,Seahawks,9,38,4.2,34,1,19.0,0.11


Filtered RB data (10% or more 20+ yard gains) has been exported to rb_10percent_20plus_runs.csv


Unnamed: 0,season,seasonType,firstName,lastName,position,teamName,Rushing Attempts,Rushing Yards,Yards per Rush,Longest Rush,20plus Yard Rushes,Rushing Yards per Game,20plus_pct
0,2024,2,J.K.,Dobbins,Running Back,Chargers,27,266,9.9,61,3,133.0,0.11
26,2024,2,Kyler,Murray,Quarterback,Cardinals,10,116,11.6,29,2,58.0,0.20
32,2024,2,Anthony,Richardson,Quarterback,Colts,10,93,9.3,21,1,46.5,0.10
40,2024,2,Will,Levis,Quarterback,Titans,8,74,9.3,21,1,37.0,0.12
43,2024,2,Jayden,Reed,Wide Receiver,Packers,3,70,23.3,33,2,35.0,0.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1384,2020,2,Clayton,Fejedelem,Safety,Dolphins,2,23,11.5,22,1,1.8,0.50
1388,2020,2,Julian,Edelman,Wide Receiver,Patriots,2,22,11.0,23,1,3.7,0.50
1391,2020,2,DJ,Moore,Wide Receiver,Panthers,2,22,11.0,21,1,1.5,0.50
1392,2020,2,Antonio,Gandy-Golden,Tight End,Unknown,1,22,22.0,22,1,3.7,1.00
