In [1]:
# Model will produce a dataframe and csv file
# 1) at least season average 7tgts per game AND 
# 2) at least 7tgs over last three games AND 
# 3) season average at least 10yds per reception 

In [2]:
# import the libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import glob
from IPython.display import display, HTML
from datetime import datetime
import nfl_data_py as nfl
import os
import re

In [3]:
# Set Pandas options to display all columns in a single row without wrapping
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

In [4]:
# Function to get the current NFL week and year
def get_current_week():
    current_date = datetime.now()
    season_start_date = datetime(2024, 9, 4)  # Reset this date at the start of the NFL season
    current_week = ((current_date - season_start_date).days // 7) + 1
    return current_week

# Set the current NFL year and week
current_year = datetime.now().year
current_week = get_current_week()
seasontype = 2 if current_week <= 18 else 3  # Regular season or playoffs

In [5]:
# Base columns for all positions
base_columns = [
    'season', 'season_type', 'week', 'player_id', 'player_name',
    'position', 'position_group', 'recent_team', 'opponent_team',
    'fantasy_points', 'fantasy_points_ppr'
]

# WR-specific columns (receiving-related)
wr_columns = [
    'receptions', 'targets', 'receiving_yards', 'receiving_tds',
    'receiving_fumbles', 'receiving_fumbles_lost',
    'receiving_air_yards', 'receiving_yards_after_catch',
    'receiving_first_downs', 'receiving_epa',
    'receiving_2pt_conversions', 'racr', 'target_share',
    'air_yards_share', 'wopr'
]


# Define the WR-specific target-related columns
wr_target_columns = ['targets', 'target_share', 'receptions', 'receiving_yards', 'receiving_tds']

# Combine base columns with WR-specific columns
wr_all_columns = base_columns + wr_columns
wr_tgt_columns = base_columns + wr_target_columns

# Import data for all the specified years
years = list(range(2017, current_year + 1))
nfl_data_all_years = nfl.import_weekly_data(
    years=years,
    columns=wr_all_columns
)

# Filter to show only WR positions
wr_data_all_years = nfl_data_all_years[nfl_data_all_years['position'] == 'WR']

# Select only the relevant columns for WRs
wr_tgt_data_all_years = wr_data_all_years[wr_tgt_columns]

Downcasting floats.


In [6]:
# TEST modifications worked
# Function returns WRs with at least 7 targets per game over the entire season with cumulative stats
def wr_tgts_7plus_per_game_current_season():
    # Step 1: Filter the data to include only the current season (current_year)
    wr_current_season = wr_tgt_data_all_years[wr_tgt_data_all_years['season'] == current_year]
    
    # Step 2: Get the most recent week (current NFL week)
    current_nfl_week = get_current_week()

    # Step 3: Group by player to sum cumulative stats (targets, receptions, receiving yards, receiving_tds) and calculate games played
    wr_grouped = wr_current_season.groupby(['season', 'player_id', 'player_name', 'recent_team'], as_index=False).agg({
        'targets': 'sum',               # Total targets over the season
        'receptions': 'sum',            # Total receptions over the season
        'receiving_yards': 'sum',       # Total receiving yards over the season
        'receiving_tds': 'sum',         # Total receiving touchdowns over the season
        'target_share': 'mean',         # Average target share over the season
        'week': 'count'                 # Number of games played (count of weeks)
    }).rename(columns={'week': 'games_played', 'receiving_tds': 'total_receiving_tds'})  # Rename receiving_tds here

    # Step 4: Calculate targets per game, receptions per game, receiving yards per game
    wr_grouped['targets_per_game'] = wr_grouped['targets'] / wr_grouped['games_played']
    wr_grouped['receptions_per_game'] = wr_grouped['receptions'] / wr_grouped['games_played']
    wr_grouped['receiving_yards_per_game'] = wr_grouped['receiving_yards'] / wr_grouped['games_played']

    # Step 5: Filter to keep only WRs with an average of 7 or more targets per game
    wr_filtered = wr_grouped[wr_grouped['targets_per_game'] >= 7].copy()  # Use .copy() to avoid the warning

    # Step 6: Rename 'player_id' to 'nflpy_player_id'
    wr_filtered = wr_filtered.rename(columns={'player_id': 'nflpy_player_id'})

    # Step 7: Add the 'week' column with the current NFL week for all rows using .loc[] to avoid SettingWithCopyWarning
    wr_filtered.loc[:, 'week'] = current_nfl_week

    # Step 8: Round the values of the specified columns
    wr_filtered['targets_per_game'] = wr_filtered['targets_per_game'].round(1)
    wr_filtered['receptions_per_game'] = wr_filtered['receptions_per_game'].round(1)
    wr_filtered['receiving_yards_per_game'] = wr_filtered['receiving_yards_per_game'].round(1)
    wr_filtered['target_share'] = wr_filtered['target_share'].round(3)
    wr_filtered['receiving_yards'] = wr_filtered['receiving_yards'].astype(int)  # Ensure no decimals for receiving_yards

    # Step 9: Reorder the columns and include 'total_receiving_tds'
    wr_filtered = wr_filtered[['season', 'week', 'nflpy_player_id', 'player_name', 'games_played', 'recent_team',
                               'targets', 'targets_per_game', 'target_share', 'receptions', 'receptions_per_game', 
                               'receiving_yards', 'receiving_yards_per_game', 'total_receiving_tds']]  # Already renamed

    # Step 10: Sort by targets per game in descending order
    wr_sorted = wr_filtered.sort_values(by='targets_per_game', ascending=False)

    # Step 11: Show only the first 5 rows
    return wr_sorted.head()


In [7]:
# Filter to show only WR positions
wr_data_all_years = nfl_data_all_years[nfl_data_all_years['position'] == 'WR']

# Select only the relevant columns for WRs
wr_tgt_data_all_years = wr_data_all_years[wr_tgt_columns]

# Call the function to get the filtered WRs for the current season 
wr_filtered_season = wr_tgts_7plus_per_game_current_season()

# Display the filtered data for verification
display(HTML(wr_filtered_season.head().to_html(index=False)))

season,week,nflpy_player_id,player_name,games_played,recent_team,targets,targets_per_game,target_share,receptions,receptions_per_game,receiving_yards,receiving_yards_per_game,total_receiving_tds
2024,8,00-0039337,M.Nabers,5,NYG,60,12.0,0.381,39,7.8,427,85.4,3
2024,8,00-0033908,C.Kupp,3,LA,35,11.7,0.298,23,7.7,198,66.0,2
2024,8,00-0037740,G.Wilson,7,NYJ,75,10.7,0.286,46,6.6,460,65.7,3
2024,8,00-0039067,R.Rice,3,KC,29,9.7,0.328,24,8.0,288,96.0,2
2024,8,00-0038117,W.Robinson,7,NYG,67,9.6,0.286,43,6.1,303,43.3,2


In [8]:
# Update the baseline dataframe by adding boolean columns for Criteria #2 and #3
def update_baseline_with_criteria_flags():
    # Baseline dataframe for the current season
    wr_current_season = wr_tgts_7plus_per_game_current_season()

    # Step 1: Filter the data for the current season and only include rows where players have targets (active games)
    wr_active_games = wr_tgt_data_all_years[(wr_tgt_data_all_years['season'] == current_year) & (wr_tgt_data_all_years['targets'] > 0)]

    # Step 2: Sort the data by player, then by week in descending order to get the most recent games first
    wr_active_games = wr_active_games.sort_values(by=['player_name', 'week'], ascending=[True, False])

    # Step 3: For each player, select the last 3 games where they had stats
    wr_last_3_games = wr_active_games.groupby('player_name').head(3)

    # Step 4: Group by player and sum the targets over the last 3 games
    wr_last_3_games_grouped = wr_last_3_games.groupby(['player_name', 'player_id'], as_index=False).agg({
        'targets': 'sum'  # Total targets over the last 3 games
    }).rename(columns={'targets': 'total_targets_last_3_games'})

    # Step 5: Merge the total targets for the last 3 games back into the current season dataframe
    wr_current_season = wr_current_season.merge(wr_last_3_games_grouped, left_on=['player_name', 'nflpy_player_id'], 
                                                right_on=['player_name', 'player_id'], how='left')

    # Drop 'player_id' from the right-hand dataframe after merge to avoid confusion
    wr_current_season.drop(columns=['player_id'], inplace=True)

    # Step 6: Calculate 'avg_7tgs_last3_games' as total targets over last 3 games divided by 3
    wr_current_season['avg_7tgs_last3_games'] = (wr_current_season['total_targets_last_3_games'] / 3).round(2)

    # Step 7: Create the 'avg_7tgs_last3_games_bool' column (TRUE if average targets per game over the last 3 games >= 7)
    wr_current_season['avg_7tgs_last3_games_bool'] = wr_current_season['avg_7tgs_last3_games'] >= 7
    wr_current_season['avg_7tgs_last3_games_bool'] = wr_current_season['avg_7tgs_last3_games_bool'].fillna(False)

    # Step 8: Add 'season_avg_10yds_per_reception_bool' (TRUE if season average yards per reception >= 10)
    wr_current_season['season_avg_10yds_per_reception'] = (wr_current_season['receiving_yards'] / wr_current_season['receptions']).round(2)
    wr_current_season['season_avg_10yds_per_reception_bool'] = wr_current_season['season_avg_10yds_per_reception'] >= 10

    # Only drop 'meets_criteria_3' if it exists in the dataframe
    if 'meets_criteria_3' in wr_current_season.columns:
        wr_current_season.drop(columns=['meets_criteria_3'], inplace=True)

    # Display the first 5 rows to verify the boolean columns
    display(wr_current_season.head())

    # Return the updated baseline dataframe with the criteria flags
    return wr_current_season

# Call the function to verify the output
wr_with_criteria_flags = update_baseline_with_criteria_flags()


Unnamed: 0,season,week,nflpy_player_id,player_name,games_played,recent_team,targets,targets_per_game,target_share,receptions,receptions_per_game,receiving_yards,receiving_yards_per_game,total_receiving_tds,total_targets_last_3_games,avg_7tgs_last3_games,avg_7tgs_last3_games_bool,season_avg_10yds_per_reception,season_avg_10yds_per_reception_bool
0,2024,8,00-0039337,M.Nabers,5,NYG,60,12.0,0.381,39,7.8,427,85.4,3,35,11.67,True,10.95,True
1,2024,8,00-0033908,C.Kupp,3,LA,35,11.7,0.298,23,7.7,198,66.0,2,35,11.67,True,8.61,False
2,2024,8,00-0037740,G.Wilson,7,NYJ,75,10.7,0.286,46,6.6,460,65.7,3,41,13.67,True,10.0,True
3,2024,8,00-0039067,R.Rice,3,KC,29,9.7,0.328,24,8.0,288,96.0,2,29,9.67,True,12.0,True
4,2024,8,00-0038117,W.Robinson,7,NYG,67,9.6,0.286,43,6.1,303,43.3,2,29,9.67,True,7.05,False


In [9]:
# Split the 'player_name' column in the baseline dataframe into 'FirstName' and 'LastName'
def split_player_name_column(df):
    # Split the 'player_name' column by '.' into two columns: 'FirstName' and 'LastName'
    df[['FirstName', 'LastName']] = df['player_name'].str.split('.', expand=True)
    
    # Normalize the 'FirstName' and 'LastName' columns: strip spaces and convert to lowercase
    df['FirstName'] = df['FirstName'].str.strip().str.lower()
    df['LastName'] = df['LastName'].str.strip().str.lower()
    
    # Drop the original 'player_name' column if no longer needed
    df.drop(columns=['player_name'], inplace=True)
    
    return df

# Apply the function to split the player name
wr_with_criteria_flags = split_player_name_column(wr_with_criteria_flags)

# Display the updated dataframe to verify the split
display(wr_with_criteria_flags.head())

Unnamed: 0,season,week,nflpy_player_id,games_played,recent_team,targets,targets_per_game,target_share,receptions,receptions_per_game,receiving_yards,receiving_yards_per_game,total_receiving_tds,total_targets_last_3_games,avg_7tgs_last3_games,avg_7tgs_last3_games_bool,season_avg_10yds_per_reception,season_avg_10yds_per_reception_bool,FirstName,LastName
0,2024,8,00-0039337,5,NYG,60,12.0,0.381,39,7.8,427,85.4,3,35,11.67,True,10.95,True,m,nabers
1,2024,8,00-0033908,3,LA,35,11.7,0.298,23,7.7,198,66.0,2,35,11.67,True,8.61,False,c,kupp
2,2024,8,00-0037740,7,NYJ,75,10.7,0.286,46,6.6,460,65.7,3,41,13.67,True,10.0,True,g,wilson
3,2024,8,00-0039067,3,KC,29,9.7,0.328,24,8.0,288,96.0,2,29,9.67,True,12.0,True,r,rice
4,2024,8,00-0038117,7,NYG,67,9.6,0.286,43,6.1,303,43.3,2,29,9.67,True,7.05,False,w,robinson


In [10]:
# Function to scrape salary changes from FantasyPros website
def scrape_salary_changes():
    # URL of the FantasyPros salary changes page
    url = "https://www.fantasypros.com/daily-fantasy/nfl/fanduel-salary-changes.php"
    
    # Fetch the page content
    response = requests.get(url)
    
    # Check if the page was fetched successfully
    if response.status_code != 200:
        print(f"Failed to fetch the page. Status code: {response.status_code}")
        return None
    
    # Parse the page content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Locate the table containing the salary changes (assuming it's the first table)
    table = soup.find('table')  # Adjust if necessary based on the page structure
    
    # Extract the table headers
    headers = [header.text for header in table.find_all('th')]
    
    # Extract the table rows
    rows = []
    for row in table.find_all('tr')[1:]:  # Skip the header row
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]  # Clean up whitespace
        rows.append(cols)
    
    # Create a DataFrame with the scraped data
    salary_changes_df = pd.DataFrame(rows, columns=headers)
        
    # Display the first few rows of the DataFrame
    display(salary_changes_df.head())
    
    return salary_changes_df


In [11]:
# Function to split names based on spaces and handle suffixes and hyphenated names
def clean_name(name):
    # Split by spaces
    name_parts = name.split()
    
    # Check if the second last part is a suffix
    suffixes = ['ii', 'iii', 'jr.', 'sr.']
    
    # If a suffix is found, append it to the last name
    if len(name_parts) > 2 and name_parts[-2].lower() in suffixes:
        first_name = name_parts[0]
        last_name = ' '.join(name_parts[1:])  # Join the last name and suffix together
    else:
        first_name = name_parts[0]
        last_name = ' '.join(name_parts[1:])  # Join everything after the first name
    
    return {'FirstName': first_name, 'LastName': last_name}

# Process salary changes by position without the Suffix column
def process_salary_changes_by_position():
    # Fetch the salary changes data
    salary_changes_df = scrape_salary_changes()

    # Step 1: Extract the 'Player' column and split it into 'Name' and 'Team-Position'
    salary_changes_df[['Name', 'Team-Position']] = salary_changes_df['Player'].str.extract(r'([^\(]+)\((.*)\)')
    
    # Step 2: Further split the 'Name' into 'FirstName' and 'LastName' without handling suffix separately
    name_split = salary_changes_df['Name'].apply(clean_name)
    name_df = pd.DataFrame(name_split.tolist())  # Convert list of dictionaries to DataFrame
    
    # Merge the cleaned name columns (FirstName, LastName) back into the salary DataFrame
    salary_changes_df = pd.concat([salary_changes_df, name_df], axis=1)
    
    # Step 3: Split 'Team-Position' into 'Team' and 'Position'
    salary_changes_df[['Team', 'Position']] = salary_changes_df['Team-Position'].str.extract(r'(\w+)\s*-\s*(\w+)')
    
    # Drop the original 'Player' and 'Team-Position' columns
    salary_changes_df.drop(columns=['Player', 'Team-Position', 'Name'], inplace=True)

    # Normalize the 'FirstName' and 'LastName' columns to lowercase and trim whitespace
    salary_changes_df['FirstName'] = salary_changes_df['FirstName'].str.strip().str.lower()
    salary_changes_df['LastName'] = salary_changes_df['LastName'].str.strip().str.lower()

    # Display the processed salary DataFrame for verification
    display(salary_changes_df.head())
    
    # Step 4: Split the data by position
    df_qb = salary_changes_df[salary_changes_df['Position'] == 'QB']
    df_wr = salary_changes_df[salary_changes_df['Position'] == 'WR']
    df_rb = salary_changes_df[salary_changes_df['Position'] == 'RB']
    
    return df_qb, df_wr, df_rb

# Call the modified process_salary_changes_by_position function
df_qb_salary, df_wr_salary, df_rb_salary = process_salary_changes_by_position()

# Verify the results for wide receivers (df_wr_salary)
display(df_wr_salary.head())

Unnamed: 0,ECR,Player,Kickoff,Opp,This Week,Last Week,Difference
0,-,Riley Sharp (BAL - TE),Sun 1:00PM,@CLE,"$4,000","$4,000",0
1,301,John Metchie III (HOU - WR),Sun 1:00PM,IND,"$4,400","$4,400",0
2,-,Snoop Conner (DAL - RB),Sun 8:20PM,@SF,"$4,000",-,-
3,-,Trey Knox (MIN - TE),Thu 8:15PM,@LAR,"$4,000","$4,000",0
4,-,Kevin Harris (NE - RB),Sun 1:00PM,NYJ,"$4,000","$4,000",0


Unnamed: 0,ECR,Kickoff,Opp,This Week,Last Week,Difference,FirstName,LastName,Team,Position
0,-,Sun 1:00PM,@CLE,"$4,000","$4,000",0,riley,sharp,BAL,TE
1,301,Sun 1:00PM,IND,"$4,400","$4,400",0,john,metchie iii,HOU,WR
2,-,Sun 8:20PM,@SF,"$4,000",-,-,snoop,conner,DAL,RB
3,-,Thu 8:15PM,@LAR,"$4,000","$4,000",0,trey,knox,MIN,TE
4,-,Sun 1:00PM,NYJ,"$4,000","$4,000",0,kevin,harris,NE,RB


Unnamed: 0,ECR,Kickoff,Opp,This Week,Last Week,Difference,FirstName,LastName,Team,Position
1,301,Sun 1:00PM,IND,"$4,400","$4,400",0,john,metchie iii,HOU,WR
6,135,Sun 8:20PM,DAL,"$5,200","$4,800",400,ricky,pearsall,SF,WR
8,-,Sun 1:00PM,@CIN,"$4,000","$4,000",0,joseph,ngata,PHI,WR
9,-,Sun 1:00PM,@HOU,"$4,000","$4,000",0,anthony,gould,IND,WR
11,153,Sun 1:00PM,ATL,"$4,000","$4,200",-200,trey,palmer,TB,WR


In [12]:
# test
# Function to handle name and team matching during the merge, with dynamic column reordering
def preprocess_for_merge(df_salary, df_wr_baseline):
    # Step 1: Rename salary columns for clarity
    df_salary.rename(columns={'This Week': 'current_week_salary', 'Last Week': 'last_week_salary', 'Difference': 'salary_diff'}, inplace=True)

    # Step 2: Standardize the first letter of the first name
    df_salary['FirstName_Initial'] = df_salary['FirstName'].str[0].str.lower()  # Get the first letter
    df_wr_baseline['FirstName_Initial'] = df_wr_baseline['FirstName'].str[0].str.lower()

    # Step 3: Handle cases where LastName appears more than once in df_wr_salary
    df_salary['LastName'] = df_salary.groupby('LastName')['LastName'].transform(lambda x: x.iloc[0] if len(x) > 1 else x)

    # Step 4: Merge based on LastName, FirstName initial, and Team
    merged_df = df_wr_baseline.merge(
        df_salary[['FirstName_Initial', 'FirstName', 'LastName', 'Team', 'current_week_salary', 'last_week_salary', 'salary_diff']],
        left_on=['FirstName_Initial', 'LastName', 'recent_team'],  # Merge on initials, last name, and team
        right_on=['FirstName_Initial', 'LastName', 'Team'],        # Merge with salary dataframe
        how='left'  # Keep all players from the baseline dataframe
    )

    # Step 5: Drop unnecessary columns (like FirstName_Initial, Team, and duplicate FirstName)
    merged_df.drop(columns=['FirstName_Initial', 'Team', 'FirstName_x'], inplace=True)

    # Step 6: Rename 'FirstName_y' to 'FirstName'
    merged_df.rename(columns={'FirstName_y': 'FirstName'}, inplace=True)

    # Step 7: Dynamically reorder columns
    # Key columns to appear first
    key_columns = ['season', 'week', 'nflpy_player_id', 'FirstName', 'LastName', 'current_week_salary', 
                   'last_week_salary', 'salary_diff', 'games_played', 'recent_team']
    
    # Automatically get all other columns not in key_columns
    remaining_columns = [col for col in merged_df.columns if col not in key_columns]

    # Combine the two sets of columns
    ordered_columns = key_columns + remaining_columns

    # Reorder the dataframe
    merged_df = merged_df[ordered_columns]

    return merged_df

# call the function to perform the merge and adjust the columns dynamically
df_wr_merge_baseline_salary = preprocess_for_merge(df_wr_salary, wr_with_criteria_flags)

# Display the result of the merge to verify
display(df_wr_merge_baseline_salary.head())


Unnamed: 0,season,week,nflpy_player_id,FirstName,LastName,current_week_salary,last_week_salary,salary_diff,games_played,recent_team,targets,targets_per_game,target_share,receptions,receptions_per_game,receiving_yards,receiving_yards_per_game,total_receiving_tds,total_targets_last_3_games,avg_7tgs_last3_games,avg_7tgs_last3_games_bool,season_avg_10yds_per_reception,season_avg_10yds_per_reception_bool
0,2024,8,00-0039337,malik,nabers,"$8,300","$8,300",0.0,5,NYG,60,12.0,0.381,39,7.8,427,85.4,3,35,11.67,True,10.95,True
1,2024,8,00-0033908,,kupp,,,,3,LA,35,11.7,0.298,23,7.7,198,66.0,2,35,11.67,True,8.61,False
2,2024,8,00-0037740,garrett,wilson,"$7,400","$7,400",0.0,7,NYJ,75,10.7,0.286,46,6.6,460,65.7,3,41,13.67,True,10.0,True
3,2024,8,00-0039067,rashee,rice,"$4,000","$4,000",0.0,3,KC,29,9.7,0.328,24,8.0,288,96.0,2,29,9.67,True,12.0,True
4,2024,8,00-0038117,wan'dale,robinson,"$6,600","$6,600",0.0,7,NYG,67,9.6,0.286,43,6.1,303,43.3,2,29,9.67,True,7.05,False
