### Author: Rodolfo Elenes

Date Created: 8/12/2025

Change log:
8/12/2025 - Initialized

# Notebook to do list
    1.) 1993 was the ONLY 2 bye week system year, ensure this is processed correctly
    2.) 1999 Chargers, 2000 Bengals, and 2001 Cardinals the only week 1 bye week teams
            -and tampa + miami in 2017
    3.) The 1999 & 2000 Browns had a bye week in week 17
    
# Enhancements
    1.) Make sure notebook does not rerun for existing rows

##### Imports

In [1]:
import pandas as pd
import time
from datetime import datetime
from pathlib import Path
import warnings
pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")

##### Notebook functions

In [2]:
def get_team_bye_weeks(team_name, pfr_abv, seasons):
#   Function name: get_team_bye_weeks
#   Description: Generates a dataframe with all of the team's bye weeks since 1990
#   Parameters: team_name, pfr_abv, seasons
#        team_name(str): The team's official name
#        pfr_abv(str): The abbreviation used by Pro Football Reference in their URL
#        seasons(list): All of the seasons since 1990
#   Return values: df
#        df(pandas dataframe): The dataframe with the team's data on all of its bye weeks since 1990

    bye_week_list = []
    df_seasons_list = [] # Helps process 1993s 2 bye week year + possible pd.read_html errors
        
    # Go through every season 
    for year in seasons:
        url = f"https://www.pro-football-reference.com/teams/{pfr_abv}/{year}.htm"
        try:
            df_schedule = pd.read_html(url, header=[0, 1])[1]
        except Exception as e:
            print(f"Unavailable link. Please check: {url}")
            print("\nError:", e, "\n")
            continue

        # Flatten out multiindex dataframe from pd.read_html
        cols = pd.DataFrame(df_schedule.columns.tolist())
        cols.iloc[:, 0] = cols.iloc[:, 0].replace("Unnamed:.*", pd.NA, regex=True).fillna(method='ffill')
        df_schedule.columns = pd.MultiIndex.from_frame(cols)
        df_schedule.columns = [
            f"{a}_{b}".strip('_') if b else a 
            for a, b in df_schedule.columns
        ]
        
        # Setup dictionary contents that will be fed into final dataframe
        df_schedule = df_schedule[['nan_Week', 'nan_Opp']]        
        check_df_sched = df_schedule[(df_schedule['nan_Opp'] == 'Bye Week')]
        if check_df_sched.shape[0] == 0:
            check_df_sched = get_missing_bye(df_schedule, year)
            print(f"Successfully added the missing bye week to the {team_name} {year} season.")
        for i in range(check_df_sched.shape[0]): # Helps 1993s 2 bye week season
            bye_week = check_df_sched.iloc[i]['nan_Week']
            bye_week_list.append(bye_week)
            df_seasons_list.append(year)
        print(f"Acquired {year} season.")
        time.sleep(6) 
        
    # Create final dataframe from dictionary
    bye_week_dict = {"Season": df_seasons_list, "Bye Week": bye_week_list}
    df = pd.DataFrame(columns = ['Season', 'Bye Week'], data = bye_week_dict)
    df['Team'] = team_name
    df = df[['Team', 'Season', 'Bye Week']]
    
    return df

In [3]:
def get_missing_bye(df, year):
#   Function name: get_missing_bye
#   Description: Calculates missing bye week for special cases (2001 Arizona Cardinals Week 1)
#   Parameters: df, year
#        df(pandas dataframe): The dataframe that contains a missing week of data
#        year(str): The year of the NFL season
#   Return values: df
#        df(pandas dataframe): The corrected dataframe with the new row of data

    if year >= 2021:
        week_games = list(range(1, 19))
    else:
        week_games = list(range(1, 18))
    games_played = df['nan_Week'].tolist()  # get weeks value from season
    missing_week = list(set(week_games) - set(games_played))  # get games missed
    df.loc[-1] = {'nan_Week': missing_week[0], 'nan_Opp': 'Bye Week'}  # Add bye week row
    df = df.reset_index(drop=True)
    df = df[(df['nan_Opp'] == 'Bye Week')]
    
    return df

##### Other Functions

In [4]:
def save_df(df, save_location, csv_name):
#   Function name: save_df
#   Description: This function is used to save any dataframe as a csv
#   Parameters: df, save_location, csv_name
#        df(pandas dataframe): The target dataframe
#        save_location(str): Specified location for the csv file to be saved
#        csv_name(str): Name of the csv file
    
    # creates folder if not existence
    output_dir = Path(save_location)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    save_loctn = f"{save_location}/{csv_name}"
    print(f"Saving {csv_name} at {save_loctn}")
    df.to_csv(save_loctn, index = False)
    print(f"Successfully saved {csv_name}!")

In [5]:
def create_exempt_list():
#   Function name: get_missing_bye
#   Description: Checks for existing bye_week.csv files, so they arent reprocessed again
#   Return values: exempt_list
#        exempt_list(list): The list of teams that have already been processed by this notebook

    save_location = "../tables/bye_weeks_xref/teams"

    # creates folder if not existence
    output_dir = Path(save_location)
    output_dir.mkdir(parents=True, exist_ok=True)

    directory_path = Path(save_location)  # Replace with your directory path
    file_paths = [entry for entry in directory_path.iterdir() if entry.is_file()]
    file_names = [file.name for file in file_paths]

    exempt_list = []
    for i in file_names:
        exempt_list.append(i.split("_")[0])
        
    return exempt_list

In [6]:
def concatenate_all_files():
#   Function name: concatenate_all_files
#   Description: This function is used to save the final dataframe as a csv file by collecting all individual team csv files

    folder_location = "../tables/bye_weeks_xref"
    directory_path = Path(folder_location + "/teams")  # Replace with your directory path
    file_paths = [entry for entry in directory_path.iterdir() if entry.is_file()]
    file_names = [file.name for file in file_paths]

    df = pd.DataFrame(columns = ['Team', 'Season', 'Bye Week']) # Final Dataframe
    
    for file in file_names:
        df_temp = pd.read_csv(folder_location + "/teams/" + file)
        df = pd.concat([df, df_temp])
        
    display(df)
    save_df(df, folder_location, 'bye_weeks_xref.csv')

##### Main

In [7]:
def main():
#   Function name: main
#   Description: The entry function of the notebook

    teams_df = pd.read_csv("../tables/team_info_xref.csv")
    teams_df = teams_df[['Team', 'PFR_ABV']]
    exempt_list = create_exempt_list()
    teams_df = teams_df[~teams_df.Team.isin(exempt_list)].reset_index(drop=True)
    
    for row in range(teams_df.shape[0]):
        seasons = list(range(1990, datetime.now().year)) # 1990 was the year the NFL introduced bye weeks
        team_entry = teams_df.loc[row]
        team_name = team_entry.loc['Team']
        pfr_abv = team_entry.loc['PFR_ABV']
        print(f"Getting the bye weeks for the {team_name}")
        df_temp = get_team_bye_weeks(team_name, pfr_abv, seasons)
        print(f"Final dataframe for {team_name}:")
        display(df_temp)
        save_df(df_temp, '../tables/bye_weeks_xref/teams', f'{team_name}_bye_weeks.csv')
    if teams_df.shape[0] > 0:
        concatenate_all_files()
    else:
        print("Files are up to date!")
    
main()

Getting the bye weeks for the Houston Texans
Unavailable link. Please check: https://www.pro-football-reference.com/teams/htx/1990.htm

Error: HTTP Error 404: Not Found 

Unavailable link. Please check: https://www.pro-football-reference.com/teams/htx/1991.htm

Error: HTTP Error 404: Not Found 

Unavailable link. Please check: https://www.pro-football-reference.com/teams/htx/1992.htm

Error: HTTP Error 404: Not Found 

Unavailable link. Please check: https://www.pro-football-reference.com/teams/htx/1993.htm

Error: HTTP Error 404: Not Found 

Unavailable link. Please check: https://www.pro-football-reference.com/teams/htx/1994.htm

Error: HTTP Error 404: Not Found 

Unavailable link. Please check: https://www.pro-football-reference.com/teams/htx/1995.htm

Error: HTTP Error 404: Not Found 

Unavailable link. Please check: https://www.pro-football-reference.com/teams/htx/1996.htm

Error: HTTP Error 404: Not Found 

Unavailable link. Please check: https://www.pro-football-reference.com/te

Unnamed: 0,Team,Season,Bye Week
0,Houston Texans,2002,5
1,Houston Texans,2003,5
2,Houston Texans,2004,7
3,Houston Texans,2005,3
4,Houston Texans,2006,5
5,Houston Texans,2007,10
6,Houston Texans,2008,2
7,Houston Texans,2009,10
8,Houston Texans,2010,7
9,Houston Texans,2011,11


Saving Houston Texans_bye_weeks.csv at ../tables/bye_weeks_xref/teams/Houston Texans_bye_weeks.csv
Successfully saved Houston Texans_bye_weeks.csv!


Unnamed: 0,Team,Season,Bye Week
0,Arizona Cardinals,1990,5
1,Arizona Cardinals,1991,14
2,Arizona Cardinals,1992,4
3,Arizona Cardinals,1993,5
4,Arizona Cardinals,1993,12
...,...,...,...
31,Washington Commanders,2020,8
32,Washington Commanders,2021,9
33,Washington Commanders,2022,14
34,Washington Commanders,2023,14


Saving bye_weeks_xref.csv at ../tables/bye_weeks_xref/bye_weeks_xref.csv
Successfully saved bye_weeks_xref.csv!
