In [5]:
## WR 20 plus reception yards  - at least 20 percent of total receptions resulted in 20+ yards gained ##
# all of the code in a single block ##

## TASK - normalize the variable names so they make sense ##

import pandas as pd
import requests
from IPython.display import display, HTML

# Set Pandas options to display all columns in a single row without wrapping
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 1000)        # Set the display width to a large number

# ESPN import json files 
def fetch_wr_data_for_years(years, season_types, limit=50):
    base_url = "https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/statistics/byathlete"
    all_data = []

    # Manually define the column names based on the expected order in 'displayNames'
    manual_column_display_names = [
        'Receptions', 'Receiving Targets', 'Receiving Yards', 'Yards Per Reception',
        'Receiving Touchdowns', 'Long Reception', '20+ Yard Receiving Plays', 'Receiving Yards Per Game',
        'Receiving Fumbles', 'Receiving Fumbles Lost', 'Receiving Yards After Catch', 'Receiving First Downs'
    ]

    for year in years:
        for season_type in season_types:
            page = 1
            while True:
                # Construct the URL for each API request
                url = (
                    f"{base_url}?region=us&lang=en&contentorigin=espn&isqualified=false"
                    f"&page={page}&limit={limit}&category=offense:receiving"
                    f"&sort=receiving.receivingYards:desc&season={year}&seasontype={season_type}"
                )
                
                try:
                    # Fetch the JSON data from the URL
                    response = requests.get(url)
                    response.raise_for_status()  # Raise HTTPError for bad responses
                    data = response.json()
                except requests.exceptions.RequestException as e:
                    print(f"Request failed: {e}")
                    break  # Exit the loop if request fails

                if not data.get("athletes"):
                    break  # Exit the loop if there are no more athletes

                # Iterate over each athlete
                for athlete_info in data['athletes']:
                    athlete = athlete_info.get('athlete', {})
                    player_data = {
                        'season': year,
                        'seasonType': season_type,
                        'firstName': athlete.get('firstName', 'N/A'),
                        'lastName': athlete.get('lastName', 'N/A'),
                        'position': athlete.get('position', {}).get('displayName', 'Unknown'),
                        'teamName': athlete.get('teamName', 'Unknown')
                    }

                    # Iterate over each category for the athlete
                    for category in athlete_info.get('categories', []):
                        if category.get('name') == "receiving":  # Only interested in receiving stats
                            category_values = category.get('totals', [])
                            
                            # Ensure that 'totals' exist and match the manual column display names
                            if category_values and len(category_values) == len(manual_column_display_names):
                                for display_name, value in zip(manual_column_display_names, category_values):
                                    player_data[display_name] = value  # Use the totals array for integers

                    # Append the player's data to the list
                    all_data.append(player_data)

                # Move to the next page
                page += 1

    # Convert the list of dictionaries to a DataFrame
    all_wr_data = pd.DataFrame(all_data)
    
    # Define the desired column order based on display names
    column_order = [
        'season', 'seasonType', 'firstName', 'lastName', 'position', 'teamName',
        'Receptions', 'Receiving Targets', 'Receiving Yards', 'Yards Per Reception',
        'Receiving Touchdowns', 'Long Reception', '20+ Yard Receiving Plays', 'Receiving Yards Per Game',
        'Receiving Fumbles', 'Receiving Fumbles Lost', 'Receiving Yards After Catch', 'Receiving First Downs'
    ]
    
    # Reorder and select columns that exist in the DataFrame
    existing_columns = [col for col in column_order if col in all_wr_data.columns]
    all_wr_data = all_wr_data[existing_columns]

    return all_wr_data

# Specify the years and season types you want to fetch data for
years = [2024, 2023, 2022, 2021, 2020]
# season_types = [2, 3]  # 2 for regular season and 3 for playoffs
season_types = [2]  # 2 for regular season only
# season_types = [3]  # 3 for playoffs only

# Fetch data for the specified years and season types
wr_data = fetch_wr_data_for_years(years, season_types)

# Convert the '20+ Yard Receiving Plays' and 'Receptions' columns to numeric types
wr_data['20+ Yard Receiving Plays'] = pd.to_numeric(wr_data['20+ Yard Receiving Plays'], errors='coerce')
wr_data['Receptions'] = pd.to_numeric(wr_data['Receptions'], errors='coerce')

# Calculate the ratio of "20+ Yard Receiving Plays" to "Receptions"
wr_data['20+ Yard Play Percentage'] = wr_data['20+ Yard Receiving Plays'] / wr_data['Receptions']

# Round the "20+ Yard Play Percentage" to two decimal places
wr_data['20+ Yard Play Percentage'] = wr_data['20+ Yard Play Percentage'].round(2)

# Filter the DataFrame to include only those wide receivers where the percentage is at least 20%
filtered_wr_data = wr_data[wr_data['20+ Yard Play Percentage'] >= 0.20]

# Ensure years are in reverse order
filtered_wr_data = filtered_wr_data.sort_values('season', ascending=False)

# Export to CSV
output_path = 'wr_rec_20plusYds_20pct_of_total_rec.csv'
filtered_wr_data.to_csv(output_path, index=False)

# Display the filtered DataFrame
display(HTML(filtered_wr_data.to_html(notebook=True, index=False)))

print(f"Data has been exported to {output_path}")


season,seasonType,firstName,lastName,position,teamName,Receptions,Receiving Targets,Receiving Yards,Yards Per Reception,Receiving Touchdowns,Long Reception,20+ Yard Receiving Plays,Receiving Yards Per Game,Receiving Fumbles,Receiving Fumbles Lost,Receiving Yards After Catch,Receiving First Downs,20+ Yard Play Percentage
2024,2,Tyreek,Hill,Wide Receiver,Dolphins,10,18,154,15.4,1,80,2,77.0,0,0,109,6,0.2
2024,2,Colby,Parkinson,Tight End,Rams,4,5,47,11.8,0,20,1,47.0,0,0,30,3,0.25
2024,2,Josh,Reynolds,Wide Receiver,Broncos,5,8,45,9.0,0,25,1,45.0,0,0,8,3,0.2
2024,2,David,Njoku,Tight End,Browns,4,5,44,11.0,0,29,1,44.0,0,0,28,1,0.25
2024,2,Foster,Moreau,Tight End,Saints,4,4,43,10.8,1,22,1,43.0,0,0,8,2,0.25
2024,2,Alexander,Mattison,Running Back,Raiders,4,6,43,10.8,1,31,1,43.0,0,0,49,2,0.25
2024,2,Kyle,Juszczyk,Fullback,49ers,2,3,40,20.0,0,34,1,40.0,0,0,10,1,0.5
2024,2,Brandin,Cooks,Wide Receiver,Cowboys,4,7,40,10.0,1,21,1,40.0,0,0,5,2,0.25
2024,2,George,Kittle,Tight End,49ers,4,5,40,10.0,0,26,1,40.0,0,0,9,1,0.25
2024,2,Tank,Dell,Wide Receiver,Texans,3,7,40,13.3,0,23,1,40.0,0,0,8,2,0.33


Data has been exported to wr_rec_20plusYds_20pct_of_total_rec.csv
