In [1]:
# pulls from the FantasyPros and ESPN API

## **Important: The dataframes are built using csv files in the current working directory so 
# do not delete or comment out these functions or lines of code that create the csv files

In [2]:
# import the libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import glob
from IPython.display import display
from datetime import datetime
import nfl_data_py as nfl
import os

In [3]:
# Define the template URLs for the QB, RB, and WR positions
espn_urls = {
    "QB": "https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/statistics/byathlete?region=us&lang=en&contentorigin=espn&isqualified=false&page={page}&limit=50&category=offense%3Apassing&sort=passing.passingYards%3Adesc&season={year}&seasontype={seasontype}",
    "RB": "https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/statistics/byathlete?region=us&lang=en&contentorigin=espn&isqualified=false&page={page}&limit=50&category=offense%3Arushing&sort=rushing.rushingYards%3Adesc&season={year}&seasontype={seasontype}",
    "WR": "https://site.web.api.espn.com/apis/common/v3/sports/football/nfl/statistics/byathlete?region=us&lang=en&contentorigin=espn&isqualified=false&page={page}&limit=50&category=offense%3Areceiving&sort=receiving.receivingYards%3Adesc&season={year}&seasontype={seasontype}"
}

In [4]:
# **IMPORTANT: this function outputs the combined qb-betting lines df
# Get current year and week for NFL
def get_current_week():
    current_date = datetime.now()
    season_start_date = datetime(2024, 9, 4) ## *** Reset the date at the start of the NFL season ***
    current_week = ((current_date - season_start_date).days // 7) + 1
    return current_week

# Set the current NFL year and week
current_year = datetime.now().year
current_week = get_current_week()
seasontype = 2 if current_week <= 18 else 3  # Regular season or playoffs

In [5]:
# Adjust the fetch function to return both data and pagination info for verification
def fetch_position_data_with_verification(position, url_template):
    page = 1
    all_players = []
    total_pages = 1  # Default to 1 page unless pagination indicates more
    
    while True:
        # Construct the API URL for the current page
        url = url_template.format(page=page, year=current_year, seasontype=seasontype)
        response = requests.get(url)
        
        # If response is successful, process the data
        if response.status_code == 200:
            data = response.json()
            athletes = data.get('athletes', [])
            
            # Get pagination information for verification
            if page == 1:
                pagination = data.get('pagination', {})
                total_pages = pagination.get('pages', 1)  # Total number of pages
            
            if not athletes:
                break  # Stop if no more athletes are available
            
            for athlete_data in athletes:
                athlete = athlete_data['athlete']

                # Extract relevant data for the base columns (excluding stats)
                player_info = {
                    'year': current_year,
                    'week': current_week,
                    'player_id': athlete.get('id', 'N/A'),
                    'player': athlete.get('displayName', 'N/A'),
                    'position': position,
                    'team': athlete.get('teamShortName', 'N/A')
                }

                all_players.append(player_info)
            
            page += 1  # Increment to the next page
        else:
            break  # Stop if there's an error in fetching data
    
    return all_players, total_pages

In [6]:
# Function to convert fetched data into a DataFrame
def create_dataframe(position_data):
    return pd.DataFrame(position_data)
    
# Updated process function to include verification step
def process_and_verify_position_data():
    # Fetch data for each position and track total pages
    qb_data, qb_pages = fetch_position_data_with_verification("QB", espn_urls["QB"])
    rb_data, rb_pages = fetch_position_data_with_verification("RB", espn_urls["RB"])
    wr_data, wr_pages = fetch_position_data_with_verification("WR", espn_urls["WR"])
    
    # Convert fetched data into DataFrames
    df_qb = create_dataframe(qb_data)
    df_rb = create_dataframe(rb_data)
    df_wr = create_dataframe(wr_data)
    
    # Verification output
    print(f"QB: Fetched {len(df_qb)} rows across {qb_pages} pages.")
    print(f"RB: Fetched {len(df_rb)} rows across {rb_pages} pages.")
    print(f"WR: Fetched {len(df_wr)} rows across {wr_pages} pages.")
    
    # Display the first few rows for review
    display(df_qb.head())
    display(df_rb.head())
    display(df_wr.head())
    
    return df_qb, df_rb, df_wr

# Call the function to fetch, verify, and display the data
df_qb, df_rb, df_wr = process_and_verify_position_data()

QB: Fetched 65 rows across 2 pages.
RB: Fetched 233 rows across 5 pages.
WR: Fetched 390 rows across 8 pages.


Unnamed: 0,year,week,player_id,player,position,team
0,2024,7,15864,Geno Smith,QB,SEA
1,2024,7,4361741,Brock Purdy,QB,SF
2,2024,7,2577417,Dak Prescott,QB,DAL
3,2024,7,14880,Kirk Cousins,QB,ATL
4,2024,7,3915511,Joe Burrow,QB,CIN


Unnamed: 0,year,week,player_id,player,position,team
0,2024,7,3043078,Derrick Henry,RB,BAL
1,2024,7,4360569,Jordan Mason,RB,SF
2,2024,7,4241416,Chuba Hubbard,RB,CAR
3,2024,7,3929630,Saquon Barkley,RB,PHI
4,2024,7,4047365,Josh Jacobs,RB,GB


Unnamed: 0,year,week,player_id,player,position,team
0,2024,7,4258173,Nico Collins,WR,HOU
1,2024,7,4362628,Ja'Marr Chase,WR,CIN
2,2024,7,3116165,Chris Godwin,WR,TB
3,2024,7,4047650,DK Metcalf,WR,SEA
4,2024,7,4241389,CeeDee Lamb,WR,DAL


In [7]:
# Function to generate FantasyPros URLs based on the positions
# Function to generate FantasyPros URLs based on the positions
def generate_fantasy_pros_urls(season, positions=None, week=None, scoring=None):
    base_url = f"https://api.fantasypros.com/public/v2/json/nfl/{season}/projections"
    # If positions is not provided, default to QB, RB, WR. Otherwise, use the list directly.
    positions_list = ['QB', 'RB', 'WR'] if positions is None else positions  # Remove split
    scoring_str = scoring.replace("'", "") if scoring else None
    generated_urls = []

    for position in positions_list:
        params = {'position': position}
        if season:
            params['season'] = season
        if week:
            params['week'] = week
        if scoring:
            params['scoring'] = scoring_str
        query_string = requests.compat.urlencode(params)
        full_url = f"{base_url}?{query_string}"
        generated_urls.append(full_url)

    return generated_urls

# Function to fetch data from FantasyPros API
def fetch_data(url, headers=None):
    response = requests.get(url, headers=headers)
    try:
        response.raise_for_status()
        return response.json()  # Return the JSON data
    except requests.RequestException as e:
        print(f"Failed to retrieve {url}. Error: {e}")
        return None

# Function to fetch and handle FantasyPros data for given positions and stats
def fetch_fantasy_pros_data(season, positions=None, week=None, scoring=None):
    api_key = os.getenv('api_key')
    if not api_key:
        print("API key is not set.")
        return None
    
    headers = {'x-api-key': api_key}
    urls = generate_fantasy_pros_urls(season, positions, week, scoring)
    all_data = []
    
    for url in urls:
        print(f"Fetching FantasyPros data from: {url}")
        response = fetch_data(url, headers)
        if response and 'players' in response:
            players_data = response['players']
            for player in players_data:
                # Extract general columns
                player_info = {
                    'name': player['name'],
                    'points': player['stats'].get('points', 0),
                    'points_ppr': player['stats'].get('points_ppr', 0),
                    'points_half': player['stats'].get('points_half', 0)
                }
                # Extract position-specific columns based on position
                position = player.get('position_id')
                if position == 'QB':
                    player_info.update({
                        'passing_attempts': player['stats'].get('pass_att', 0),
                        'passing_completions': player['stats'].get('pass_cmp', 0),
                        'passing_yards': player['stats'].get('pass_yds', 0),
                        'passing_tds': player['stats'].get('pass_tds', 0)
                    })
                elif position == 'RB':
                    player_info.update({
                        'rushing_attempts': player['stats'].get('rush_att', 0),
                        'rushing_yards': player['stats'].get('rush_yds', 0),
                        'rushing_tds': player['stats'].get('rush_tds', 0),
                        'receptions': player['stats'].get('rec_rec', 0),
                        'reception_yards': player['stats'].get('rec_yds', 0),
                        'reception_tds': player['stats'].get('rec_tds', 0)
                    })
                elif position == 'WR':
                    player_info.update({
                        'receptions': player['stats'].get('rec_rec', 0),
                        'reception_yards': player['stats'].get('rec_yds', 0),
                        'reception_tds': player['stats'].get('rec_tds', 0)
                    })
                all_data.append(player_info)
    
    return pd.DataFrame(all_data)

In [8]:
# Function to merge ESPN data with FantasyPros data, keeping only the relevant columns for each position
def merge_espn_fantasypros(espn_df, fantasypros_df, position):
    # Extract relevant columns based on position
    if position == 'QB':
        # Extract only QB relevant columns
        fantasypros_df = fantasypros_df[['name', 'points', 'points_ppr', 'points_half', 
                                         'passing_attempts', 'passing_completions', 'passing_yards', 'passing_tds']]
    elif position == 'RB':
        # Extract only RB relevant columns
        fantasypros_df = fantasypros_df[['name', 'points', 'points_ppr', 'points_half', 
                                         'rushing_attempts', 'rushing_yards', 'rushing_tds', 
                                         'receptions', 'reception_yards', 'reception_tds']]
    elif position == 'WR':
        # Extract only WR relevant columns
        fantasypros_df = fantasypros_df[['name', 'points', 'points_ppr', 'points_half', 
                                         'receptions', 'reception_yards', 'reception_tds']]

    # Merge on 'player' from ESPN and 'name' from FantasyPros
    merged_df = pd.merge(espn_df, fantasypros_df, left_on='player', right_on='name', how='left')
    
    # Drop the redundant 'name' column from FantasyPros
    merged_df.drop(columns=['name'], inplace=True)
    
    return merged_df

In [9]:
# Function to fetch, merge, and save ESPN and FantasyPros data for all positions
def process_and_merge_fantasypros_data(df_qb, df_rb, df_wr, scoring='STD'):
    # Fetch current season and week dynamically
    current_week = get_current_week()
    season = datetime.now().year

    # Fetch FantasyPros data for all positions
    fantasypros_data = fetch_fantasy_pros_data(season=season, positions=['QB', 'RB', 'WR'], week=current_week, scoring=scoring)

    # Merging ESPN dataframes with FantasyPros data, keeping only relevant columns
    df_qb_merged = merge_espn_fantasypros(df_qb, fantasypros_data, 'QB')
    df_rb_merged = merge_espn_fantasypros(df_rb, fantasypros_data, 'RB')
    df_wr_merged = merge_espn_fantasypros(df_wr, fantasypros_data, 'WR')


    # Display the merged dataframes
    display(df_qb_merged.head())
    display(df_rb_merged.head())
    display(df_wr_merged.head())
    
    return df_qb_merged, df_rb_merged, df_wr_merged

# Call the function to fetch, merge, save, and display the data
df_qb_merged, df_rb_merged, df_wr_merged = process_and_merge_fantasypros_data(df_qb, df_rb, df_wr)


Fetching FantasyPros data from: https://api.fantasypros.com/public/v2/json/nfl/2024/projections?position=QB&season=2024&week=7&scoring=STD
Fetching FantasyPros data from: https://api.fantasypros.com/public/v2/json/nfl/2024/projections?position=RB&season=2024&week=7&scoring=STD
Fetching FantasyPros data from: https://api.fantasypros.com/public/v2/json/nfl/2024/projections?position=WR&season=2024&week=7&scoring=STD


Unnamed: 0,year,week,player_id,player,position,team,points,points_ppr,points_half,passing_attempts,passing_completions,passing_yards,passing_tds
0,2024,7,15864,Geno Smith,QB,SEA,17.62,17.62,17.62,37.83,26.32,264.91,1.44
1,2024,7,4361741,Brock Purdy,QB,SF,17.78,17.78,17.78,32.58,21.21,256.14,1.63
2,2024,7,2577417,Dak Prescott,QB,DAL,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2024,7,14880,Kirk Cousins,QB,ATL,16.76,16.76,16.76,34.13,22.74,260.32,1.71
4,2024,7,3915511,Joe Burrow,QB,CIN,17.91,17.91,17.91,34.03,22.86,256.04,1.68


Unnamed: 0,year,week,player_id,player,position,team,points,points_ppr,points_half,rushing_attempts,rushing_yards,rushing_tds,receptions,reception_yards,reception_tds
0,2024,7,3043078,Derrick Henry,RB,BAL,15.74,16.96,16.35,19.31,91.51,0.9,1.23,10.21,0.05
1,2024,7,4360569,Jordan Mason,RB,SF,11.18,12.78,11.98,16.29,69.7,0.5,1.6,11.42,0.04
2,2024,7,4241416,Chuba Hubbard,RB,CAR,12.47,15.75,14.11,16.63,72.65,0.41,3.28,23.88,0.1
3,2024,7,3929630,Saquon Barkley,RB,PHI,14.59,17.39,15.99,19.14,85.71,0.56,2.8,20.31,0.14
4,2024,7,4047365,Josh Jacobs,RB,GB,11.0,13.44,12.22,15.52,65.18,0.4,2.44,16.91,0.09


Unnamed: 0,year,week,player_id,player,position,team,points,points_ppr,points_half,receptions,reception_yards,reception_tds
0,2024,7,4258173,Nico Collins,WR,HOU,,,,,,
1,2024,7,4362628,Ja'Marr Chase,WR,CIN,11.49,17.53,14.51,6.04,82.43,0.56
2,2024,7,3116165,Chris Godwin,WR,TB,10.6,17.17,13.89,6.57,77.93,0.47
3,2024,7,4047650,DK Metcalf,WR,SEA,9.92,15.2,12.56,5.28,72.42,0.47
4,2024,7,4241389,CeeDee Lamb,WR,DAL,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
def scrape_salary_changes():
    # URL of the FantasyPros salary changes page
    url = "https://www.fantasypros.com/daily-fantasy/nfl/fanduel-salary-changes.php"
    
    # Fetch the page content
    response = requests.get(url)
    
    # Check if the page was fetched successfully
    if response.status_code != 200:
        print(f"Failed to fetch the page. Status code: {response.status_code}")
        return
    
    # Parse the page content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Locate the table containing the salary changes (assuming it's the first table)
    table = soup.find('table')  # Adjust if necessary based on the page structure
    
    # Extract the table headers
    headers = [header.text for header in table.find_all('th')]
    
    # Extract the table rows
    rows = []
    for row in table.find_all('tr')[1:]:  # Skip the header row
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]  # Clean up whitespace
        rows.append(cols)
    
    # Create a DataFrame with the scraped data
    salary_changes_df = pd.DataFrame(rows, columns=headers)
        
    # Display the first few rows of the DataFrame
    display(salary_changes_df.head())
    
    return salary_changes_df

# Run the function to scrape and display salary changes
scrape_salary_changes()

In [None]:
def fetch_salary_changes():
    # Directly scrape and return the salary changes DataFrame
    print("Fetching salary changes data by scraping...")
    salary_changes_df = scrape_salary_changes()
    return salary_changes_df

In [None]:
def process_salary_changes_by_position():
    # Step 1: Fetch the salary changes data in memory
    salary_changes_df = fetch_salary_changes()
    
    # Step 2: Split 'Player' column into three separate columns (FirstName, LastName, Team-Position)
    salary_changes_df[['FirstName', 'LastName', 'Team-Position']] = salary_changes_df['Player'].str.extract(r'(\w+)\s+(\w+)\s+\((.*)\)')
    
    # Step 3: Drop the original 'Player' column and the first column (Rankings)
    salary_changes_df.drop(columns=['Player', salary_changes_df.columns[0]], inplace=True)
    
    # Step 4: Split 'Team-Position' into separate 'Team' and 'Position' columns
    salary_changes_df[['Team', 'Position']] = salary_changes_df['Team-Position'].str.extract(r'(\w+)\s*-\s*(\w+)')
    
    # Step 5: Drop 'Team-Position' and any unwanted columns like 'Kickoff' and 'Opp'
    salary_changes_df.drop(columns=['Team-Position', 'Kickoff', 'Opp'], inplace=True, errors='ignore')
    
    # Step 6: Reorder the columns to match the desired order
    salary_changes_df = salary_changes_df[['FirstName', 'LastName', 'Team', 'Position', 'This Week', 'Last Week', 'Difference']]
    
    # Step 7: Split the DataFrame by position
    df_qb = salary_changes_df[salary_changes_df['Position'] == 'QB']
    df_wr = salary_changes_df[salary_changes_df['Position'] == 'WR']
    df_rb = salary_changes_df[salary_changes_df['Position'] == 'RB']
    
    # Display the first few rows of each DataFrame for verification
    display(df_qb.head())
    display(df_wr.head())
    display(df_rb.head())
    
    return df_qb, df_wr, df_rb

# Call the function to process the salary changes by position
df_qb_salary, df_wr_salary, df_rb_salary = process_salary_changes_by_position()


In [None]:
# merge salary and projections
def merge_salary_and_projections(df_qb_salary, df_rb_salary, df_wr_salary, df_qb_proj, df_rb_proj, df_wr_proj):
    """
    This function takes the salary DataFrames and the projection DataFrames 
    and merges them based on player names for QB, RB, and WR positions.
    """

    # Split 'player' column into 'FirstName' and 'LastName' in projection DataFrames
    df_qb_proj[['FirstName', 'LastName']] = df_qb_proj['player'].str.split(' ', n=1, expand=True)
    df_rb_proj[['FirstName', 'LastName']] = df_rb_proj['player'].str.split(' ', n=1, expand=True)
    df_wr_proj[['FirstName', 'LastName']] = df_wr_proj['player'].str.split(' ', n=1, expand=True)

    # Merge salary and projections for QB
    df_qb_merge_projection_salary = pd.merge(df_qb_proj, df_qb_salary, how='left', left_on=['FirstName', 'LastName'], right_on=['FirstName', 'LastName'])
    print(f"QB merged data: {df_qb_merge_projection_salary.shape[0]} rows")

    # Merge salary and projections for RB
    df_rb_merge_projection_salary = pd.merge(df_rb_proj, df_rb_salary, how='left', left_on=['FirstName', 'LastName'], right_on=['FirstName', 'LastName'])
    print(f"RB merged data: {df_rb_merge_projection_salary.shape[0]} rows")

    # Merge salary and projections for WR
    df_wr_merge_projection_salary = pd.merge(df_wr_proj, df_wr_salary, how='left', left_on=['FirstName', 'LastName'], right_on=['FirstName', 'LastName'])
    print(f"WR merged data: {df_wr_merge_projection_salary.shape[0]} rows")

    # Display the first few rows of each merged DataFrame for verification
    display(df_qb_merge_projection_salary.head())
    display(df_rb_merge_projection_salary.head())
    display(df_wr_merge_projection_salary.head())

    return df_qb_merge_projection_salary, df_rb_merge_projection_salary, df_wr_merge_projection_salary


# Fetch, merge, and return projections from ESPN and FantasyPros (already implemented)
df_qb_proj, df_rb_proj, df_wr_proj = process_and_merge_fantasypros_data(df_qb, df_rb, df_wr, scoring='STD')

# Now, merge these with salary data
df_qb_merge_projection_salary, df_rb_merge_projection_salary, df_wr_merge_projection_salary = merge_salary_and_projections(
    df_qb_salary, df_rb_salary, df_wr_salary,  # Salary DataFrames
    df_qb_proj, df_rb_proj, df_wr_proj  # Projection DataFrames from ESPN and FantasyPros
)


In [None]:
# modify_cols_merge_salary_and_projections
# def modify_cols_merge_salary_and_projections(projections_df, salary_df):
#     """
#     This function merges salary and projection data based on FirstName and LastName,
#     and reorders columns accordingly while removing unnecessary columns.
#     """
    
#     # Ensure salary_df has FirstName and LastName already
#     if 'FirstName' not in salary_df.columns or 'LastName' not in salary_df.columns:
#         print("Error: 'FirstName' and/or 'LastName' columns are missing in salary DataFrame.")
#         print(f"Available columns in salary_df: {salary_df.columns}")
#         return None
    
#     # Ensure projections have player names split into firstName and lastName
#     projections_df[['firstName', 'lastName']] = projections_df['player'].str.split(' ', n=1, expand=True)
    
#     # Merge the two dataframes
#     merged_df = pd.merge(projections_df, salary_df, left_on=['firstName', 'lastName'], right_on=['FirstName', 'LastName'], how='left')

#     # Drop unwanted columns ('FirstName_x', 'LastName_x', 'FirstName_y', 'LastName_y', 'Team', 'Position')
#     columns_to_drop = ['FirstName_x', 'LastName_x', 'FirstName_y', 'LastName_y', 'Team', 'Position']
#     existing_columns_to_drop = [col for col in columns_to_drop if col in merged_df.columns]  # Only drop if they exist
#     merged_df.drop(columns=existing_columns_to_drop, inplace=True)

#     # Debugging: Check columns after dropping
#     # print(f"Columns after dropping: {merged_df.columns}")

#     # Reorder columns to move 'firstName' before 'lastName' right after 'player'
#     cols = list(merged_df.columns)
#     player_index = cols.index('player')
#     first_last_name = ['firstName', 'lastName']
    
#     # Insert 'firstName' before 'lastName'
#     for name in first_last_name[::-1]:  # Reverse the order to insert correctly
#         cols.insert(player_index + 1, cols.pop(cols.index(name)))
    
#     merged_df = merged_df[cols]
    
#     return merged_df

# # Call the function with in-memory DataFrames
# df_qb_merge2_projection_salary, df_rb_merge2_projection_salary, df_wr_merge2_projection_salary = merge2_salary_and_projections(
#     df_qb_proj, df_rb_proj, df_wr_proj,  # Projections DataFrames
#     df_qb_salary, df_rb_salary, df_wr_salary  # Salary DataFrames
# )

In [None]:
# def merge2_salary_and_projections(df_qb_proj, df_rb_proj, df_wr_proj, df_qb_salary, df_rb_salary, df_wr_salary):
#     """
#     Merges salary and projections data for QB, RB, and WR positions.
#     """
#     # Merge the projections and salary changes for QB
#     qb_projections_salary_merged_df = modify_cols_merge_salary_and_projections(df_qb_proj, df_qb_salary)
    
#     # Merge the projections and salary changes for RB
#     rb_projections_salary_merged_df = modify_cols_merge_salary_and_projections(df_rb_proj, df_rb_salary)
    
#     # Merge the projections and salary changes for WR
#     wr_projections_salary_merged_df = modify_cols_merge_salary_and_projections(df_wr_proj, df_wr_salary)
    
#     # Return the merged DataFrames
#     return qb_projections_salary_merged_df, rb_projections_salary_merged_df, wr_projections_salary_merged_df

# # Ensure this call is correct:
# df_qb_merge2_projection_salary, df_rb_merge2_projection_salary, df_wr_merge2_projection_salary = merge2_salary_and_projections(
#     df_qb_proj, df_rb_proj, df_wr_proj,  # Projections DataFrames
#     df_qb_salary, df_rb_salary, df_wr_salary  # Salary DataFrames
# )


In [None]:
# test 1
# Modify Columns & Merge Salary/Projections
def modify_cols_merge_salary_and_projections(projections_df, salary_df):
    """
    This function merges salary and projection data based on FirstName and LastName,
    and reorders columns accordingly while removing unnecessary columns.
    """
    # Ensure salary_df has FirstName and LastName already
    if 'FirstName' not in salary_df.columns or 'LastName' not in salary_df.columns:
        print("Error: 'FirstName' and/or 'LastName' columns are missing in salary DataFrame.")
        print(f"Available columns in salary_df: {salary_df.columns}")
        return None
    
    # Ensure projections have player names split into firstName and lastName
    projections_df[['firstName', 'lastName']] = projections_df['player'].str.split(' ', n=1, expand=True)

    # Debugging: Print columns to ensure they exist
    print("Projections DataFrame columns:", projections_df.columns)
    print("Salary DataFrame columns:", salary_df.columns)
    
    # Merge the two dataframes
    merged_df = pd.merge(projections_df, salary_df, left_on=['firstName', 'lastName'], right_on=['FirstName', 'LastName'], how='left')

    # Drop unwanted columns
    columns_to_drop = ['FirstName_x', 'LastName_x', 'FirstName_y', 'LastName_y', 'Team', 'Position']
    existing_columns_to_drop = [col for col in columns_to_drop if col in merged_df.columns]  # Only drop if they exist
    merged_df.drop(columns=existing_columns_to_drop, inplace=True)

    # Reorder columns to move 'firstName' before 'lastName' right after 'player'
    cols = list(merged_df.columns)
    player_index = cols.index('player')
    first_last_name = ['firstName', 'lastName']
    
    for name in first_last_name[::-1]:  # Reverse the order to insert correctly
        cols.insert(player_index + 1, cols.pop(cols.index(name)))
    
    merged_df = merged_df[cols]
    
    return merged_df

# Merge Salary and Projections
def merge2_salary_and_projections(df_qb_proj, df_rb_proj, df_wr_proj, df_qb_salary, df_rb_salary, df_wr_salary):
    """
    Merges salary and projections data for QB, RB, and WR positions.
    """
    # Merge the projections and salary changes for QB
    qb_projections_salary_merged_df = modify_cols_merge_salary_and_projections(df_qb_proj, df_qb_salary)
    print(f"QB merged data: {qb_projections_salary_merged_df.shape[0]} rows")
    
    # Merge the projections and salary changes for RB
    rb_projections_salary_merged_df = modify_cols_merge_salary_and_projections(df_rb_proj, df_rb_salary)
    print(f"RB merged data: {rb_projections_salary_merged_df.shape[0]} rows")
    
    # Merge the projections and salary changes for WR
    wr_projections_salary_merged_df = modify_cols_merge_salary_and_projections(df_wr_proj, df_wr_salary)
    print(f"WR merged data: {wr_projections_salary_merged_df.shape[0]} rows")
    
    return qb_projections_salary_merged_df, rb_projections_salary_merged_df, wr_projections_salary_merged_df


# Call the function with in-memory DataFrames
df_qb_merge2_projection_salary, df_rb_merge2_projection_salary, df_wr_merge2_projection_salary = merge2_salary_and_projections(
    df_qb_proj, df_rb_proj, df_wr_proj,  # Projections DataFrames
    df_qb_salary, df_rb_salary, df_wr_salary  # Salary DataFrames
)

In [None]:
def scrape_rostered_percentage_over_weeks(positions, scoring='PPR', range_type='week'):
    current_week = get_current_week()
    
    # Dictionary to store DataFrames for each position
    rostered_dataframes = {}
    
    for position in positions:
        # Initialize an empty DataFrame to accumulate data
        rostered_df = pd.DataFrame()
        
        # Loop over weeks from current_week down to 1
        for week in range(current_week, 0, -1):
            # Construct the URL for the given position and week
            url = f"https://www.fantasypros.com/nfl/stats/{position}.php?week={week}&scoring={scoring}&range={range_type}"
            
            # Fetch the page content
            response = requests.get(url)
            
            # Check if the page was fetched successfully
            if response.status_code != 200:
                print(f"Failed to fetch the page for {position} week {week}. Status code: {response.status_code}")
                continue
            
            # Parse the page content using BeautifulSoup
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Locate the table containing the stats data
            table = soup.find('table')
            
            # Extract the table headers
            headers = [header.text.strip() for header in table.find_all('th')]
            
            # Find the indices of the "Player" and "ROST" columns
            if "Player" not in headers or "ROST" not in headers:
                print(f"'Player' or 'ROST' column not found for {position} week {week}.")
                continue
            
            player_index = headers.index("Player")
            rost_index = headers.index("ROST")
            
            # Extract the table rows
            rows = []
            for row in table.find_all('tr')[1:]:  # Skip the header row
                cols = row.find_all('td')
                cols = [ele.get_text(strip=True) for ele in cols]  # Clean up whitespace
                
                # Only process rows with the expected number of columns
                if len(cols) >= rost_index + 1:
                    player_name = cols[player_index]  # Player name
                    rost_percentage = cols[rost_index]  # Rostered percentage
                    rows.append([player_name, rost_percentage])
            
            # Create a DataFrame for the current week
            week_df = pd.DataFrame(rows, columns=["Player", f"wk{week}"])
            
            # Convert the rostered percentage to float (remove '%' sign)
            week_df[f"wk{week}"] = week_df[f"wk{week}"].str.replace('%', '').astype(float)
            
            if rostered_df.empty:
                # First week's data, initialize the cumulative DataFrame
                rostered_df = week_df
            else:
                # Merge with the cumulative DataFrame
                rostered_df = pd.merge(rostered_df, week_df, on='Player', how='outer')
        
        # After looping through all weeks, sort columns in descending order
        # First, get the list of columns except 'Player'
        cols = rostered_df.columns.tolist()
        cols.remove('Player')
        # Sort the week columns in descending order
        cols_sorted = sorted(cols, key=lambda x: int(x[2:]), reverse=True)
        # Rearrange the columns
        rostered_df = rostered_df[['Player'] + cols_sorted]
        
        # Optionally, you can sort the DataFrame by current week's ROST
        if f"wk{current_week}" in rostered_df.columns:
            rostered_df = rostered_df.sort_values(by=f"wk{current_week}", ascending=False)
        
        # Store the DataFrame for the position
        rostered_dataframes[position] = rostered_df
        
        # Display the first few rows of the DataFrame for verification
        display(rostered_df.head())
    
    print("Scraping completed for all positions.")
    
    # Return the DataFrames for each position
    return rostered_dataframes

# Scrape rostered percentage over weeks for all positions
positions = ['qb', 'wr', 'rb', 'te', 'flex', 'dst']
rostered_dataframes = scrape_rostered_percentage_over_weeks(positions, scoring='PPR', range_type='week')

In [None]:
# Define the merge_with_rostered_data function
def merge_with_rostered_data(projections_df, rostered_df):
    """
    Merges the projections + salary DataFrame with the rostered percentage data for each player.
    """

    # Split the 'Player' column in the rostered data into 'firstName' and 'lastName'
    rostered_df[['firstName', 'lastName']] = rostered_df['Player'].str.extract(r'(\w+)\s+(.+)\(')
    
    # Merge the projections + salary DataFrame with the rostered DataFrame on firstName and lastName
    merged_df = pd.merge(projections_df, rostered_df[['firstName', 'lastName', 'wk6', 'wk5', 'wk4', 'wk3', 'wk2', 'wk1']], 
                         on=['firstName', 'lastName'], how='left')

    # Rename the columns for clarity
    column_renames = {
        'This Week': 'This Week Salary',
        'Last Week': 'Last Week Salary',
        'Difference': 'Salary Differential',
        'wk6': 'wk6 %rostered',
        'wk5': 'wk5 %rostered',
        'wk4': 'wk4 %rostered',
        'wk3': 'wk3 %rostered',
        'wk2': 'wk2 %rostered',
        'wk1': 'wk1 %rostered'
    }
    
    merged_df.rename(columns=column_renames, inplace=True)
    
    return merged_df


In [None]:
# Assuming rostered_dataframes is the dictionary containing the rostered data scraped for each position
df_qb_merged_projections_salary_rostered = merge_with_rostered_data(df_qb_merge2_projection_salary, rostered_dataframes['qb'])
df_rb_merged_projections_salary_rostered = merge_with_rostered_data(df_rb_merge2_projection_salary, rostered_dataframes['rb'])
df_wr_merged_projections_salary_rostered = merge_with_rostered_data(df_wr_merge2_projection_salary, rostered_dataframes['wr'])

# Display the first few rows for each position
print("QB Merged Projections, Salary, and Rostered DataFrame:")
display(df_qb_merged_projections_salary_rostered.head())

print("\nRB Merged Projections, Salary, and Rostered DataFrame:")
display(df_rb_merged_projections_salary_rostered.head())

print("\nWR Merged Projections, Salary, and Rostered DataFrame:")
display(df_wr_merged_projections_salary_rostered.head())


In [None]:
# Define the base URLs for each position
base_urls = {
    'qb': 'https://www.fantasypros.com/nfl/red-zone-stats/qb.php?week={week}&range=week',
    'rb': 'https://www.fantasypros.com/nfl/red-zone-stats/rb.php?week={week}&range=week',
    'wr': 'https://www.fantasypros.com/nfl/red-zone-stats/wr.php?week={week}&range=week',
    'te': 'https://www.fantasypros.com/nfl/red-zone-stats/te.php?week={week}&range=week'
}


In [None]:
# Function to ensure unique column names
def ensure_unique_column_names(columns):
    col_count = {}
    new_columns = []
    
    for col in columns:
        if col in col_count:
            col_count[col] += 1
            new_columns.append(f"{col}_{col_count[col]}")  # Append suffix to duplicate column
        else:
            col_count[col] = 0
            new_columns.append(col)
    
    return new_columns

# Scrape red zone stats for all weeks up to the current week
def scrape_red_zone_stats():
    current_week = get_current_week()  # Get the current week
    position_data = {}  # Dictionary to store data for each position
    current_week_data = {}  # Dictionary to store current week data for each position
    
    # Loop over positions
    for position, url_template in base_urls.items():
        all_weeks_data = pd.DataFrame()  # DataFrame to store data for all weeks
        
        # Loop over weeks from week 1 to current_week
        for week in range(1, current_week + 1):
            url = url_template.format(week=week)
            response = requests.get(url)
            
            if response.status_code != 200:
                print(f"Failed to fetch data for {position} week {week}. Status: {response.status_code}")
                continue
            
            # Parse the page content
            soup = BeautifulSoup(response.content, 'html.parser')
            table = soup.find('table')
            
            if not table:
                print(f"No table found for {position} week {week}.")
                continue
            
            # Extract the headers and data rows
            headers = [header.text.strip() for header in table.find_all('th')]
            rows = [[col.text.strip() for col in row.find_all('td')] for row in table.find_all('tr')[1:]]
            
            # Create a DataFrame for the current week
            week_df = pd.DataFrame(rows, columns=headers)
            week_df['Week'] = week  # Add a 'Week' column
            
            # Normalize column names by converting them to lowercase
            week_df.columns = week_df.columns.str.lower()
            
            # Ensure unique column names (to handle duplicates)
            week_df.columns = ensure_unique_column_names(week_df.columns)
            
            # Split the 'Player' column into 'FirstName', 'LastName', and 'Team'
            week_df[['firstName', 'lastName', 'team']] = week_df['player'].str.extract(r'(\w+)\s+(\w+)\s+\((.*)\)')
            
            # Drop the original 'Player' column
            week_df.drop(columns=['player'], inplace=True)
            
            if position == 'qb':
                # Rename columns for passing stats
                week_df = week_df.rename(columns={
                    'comp': 'pass_comp',
                    'att': 'pass_att', 
                    'pct': 'pass_pct',
                    'yds': 'pass_yds',
                    'y/a': 'pass_y/a',
                    'td': 'pass_td'
                })
                # Drop specific QB columns by name
                week_df.drop(columns=['pass_att_1', 'pass_yds_1', 'pass_td_1', 'pass_int', 'pass_sacks'], errors='ignore', inplace=True)
                selected_columns = ['firstName', 'lastName', 'team', 'week', 'pass_comp', 'pass_att', 'pass_pct', 'pass_yds', 'pass_y/a', 'pass_td']
            
            elif position == 'rb':
                # Rename rushing and receiving columns
                week_df = week_df.rename(columns={
                    'att': 'rush_att',
                    'yds': 'rush_yds',
                    'y/a': 'rush_y/a',
                    'td': 'rush_td',
                    'pct': 'rush_pct',
                    'rec': 'rec_rec',
                    'tgt': 'rec_tgt',
                    'rec pct': 'rec_pct',
                    'yds_1': 'rec_yds',
                    'y/r': 'rec_y/r',
                    'td_1': 'rec_td',
                    'tgt pct': 'tgt_pct'
                })
                # Drop specific RB columns by name
                week_df.drop(columns=['rush_yds_1', 'rush_td_1'], errors='ignore', inplace=True)
                selected_columns = ['firstName', 'lastName', 'team', 'week', 'rush_att', 'rush_yds', 'rush_y/a', 'rush_td', 'rush_pct', 
                                    'rec_rec', 'rec_tgt', 'rec_pct', 'rec_yds', 'rec_y/r', 'rec_td', 'tgt_pct']
            
            elif position in ['wr', 'te']:
                # Rename receiving columns and drop rushing stats
                week_df = week_df.rename(columns={
                    'rec': 'rec_rec',
                    'tgt': 'rec_tgt',
                    'rec pct': 'rec_pct',
                    'yds': 'rec_yds',
                    'y/r': 'rec_y/r',
                    'td': 'rec_td',
                    'tgt pct': 'rec_tgt_pct'
                })
                # Drop specific WR and TE columns by name
                week_df.drop(columns=['rec_yds_1', 'rec_td_1'], errors='ignore', inplace=True)
                selected_columns = ['firstName', 'lastName', 'team', 'week', 'rec_rec', 'rec_tgt', 'rec_pct', 'rec_yds', 'rec_y/r', 'rec_td', 'rec_tgt_pct']
            
            # Filter the DataFrame to the selected columns
            selected_columns = [col for col in selected_columns if col in week_df.columns]  # Only keep existing columns
            week_df = week_df[selected_columns]  # Filter the DataFrame

            # Add 'red_zone_' prefix to all columns except the first four columns
            week_df.columns = ['firstName', 'lastName', 'team', 'week'] + [f"red_zone_{col}" for col in week_df.columns[4:]]
            
            # Append to the cumulative DataFrame
            all_weeks_data = pd.concat([all_weeks_data, week_df], ignore_index=True)
        
        # Store the data for all weeks
        position_data[position] = all_weeks_data
        
        # Also store the data for the current week only
        current_week_data[position] = all_weeks_data[all_weeks_data['week'] == current_week]
    
    return position_data, current_week_data

# Example usage
position_data, current_week_data = scrape_red_zone_stats()

# Display the first few rows for each position (all weeks)
for position in position_data:
    print(f"\nAll weeks data for {position.upper()}:")
    display(position_data[position].head())


In [None]:
# Assign the variables outside the function
df_qb_redzone_all_weeks = position_data['qb']
df_qb_redzone_current_week = current_week_data['qb']

df_rb_redzone_all_weeks = position_data['rb']
df_rb_redzone_current_week = current_week_data['rb']

df_wr_redzone_all_weeks = position_data['wr']
df_wr_redzone_current_week = current_week_data['wr']

df_te_redzone_all_weeks = position_data['te']
df_te_redzone_current_week = current_week_data['te']

# Display QB red zone data for all weeks
print("QB Red Zone Data for All Weeks:")
display(df_qb_redzone_all_weeks.head())

# Display QB red zone data for the current week
print("\nQB Red Zone Data for Current Week:")
display(df_qb_redzone_current_week.head())

In [None]:
# Function to optionally save the red zone data to CSV files in the current working directory
def save_redzone_data_to_csv(save_to_csv=False):
    if save_to_csv:
        # Get the current working directory
        current_directory = os.getcwd()

        # Save QB red zone data to CSV in the current directory
        df_qb_redzone_all_weeks.to_csv(os.path.join(current_directory, 'df_qb_redzone_all_weeks.csv'), index=False)
        df_qb_redzone_current_week.to_csv(os.path.join(current_directory, 'df_qb_redzone_current_week.csv'), index=False)
        
        # Save RB red zone data to CSV in the current directory
        df_rb_redzone_all_weeks.to_csv(os.path.join(current_directory, 'df_rb_redzone_all_weeks.csv'), index=False)
        df_rb_redzone_current_week.to_csv(os.path.join(current_directory, 'df_rb_redzone_current_week.csv'), index=False)
        
        # Save WR red zone data to CSV in the current directory
        df_wr_redzone_all_weeks.to_csv(os.path.join(current_directory, 'df_wr_redzone_all_weeks.csv'), index=False)
        df_wr_redzone_current_week.to_csv(os.path.join(current_directory, 'df_wr_redzone_current_week.csv'), index=False)
        
        # Save TE red zone data to CSV in the current directory
        df_te_redzone_all_weeks.to_csv(os.path.join(current_directory, 'df_te_redzone_all_weeks.csv'), index=False)
        df_te_redzone_current_week.to_csv(os.path.join(current_directory, 'df_te_redzone_current_week.csv'), index=False)
        
        print(f"CSV files have been saved in the current directory: {current_directory}")

save_redzone_data_to_csv(save_to_csv=True)