In [None]:
def save_df(df, save_location, csv_name):
#   Function name: save_df
#   Description: This function is used to save any dataframe as a csv
#   Parameters: df, save_location, csv_name
#        df(pandas dataframe): The target dataframe
#        save_location(str): Specified location for the csv file to be saved
#        csv_name(str): Name of the csv file
    
    # creates folder if not existence
    output_dir = Path(save_location)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    save_loctn = f"{save_location}/{csv_name}"
    print(f"Saving {csv_name} at {save_loctn}")
    df.to_csv(save_loctn, index = False)
    print(f"Successfully saved {csv_name}!")

In [None]:
# def construct_df_roster(con_memory):
# #   Function name: construct_roster_df
# #   Description: Create the base of the roster table
# #   Parameters: con_memory
# #        con_memory(ducbdb object): used to carry duckdb queries
# #   Return values: df
# #        df(pandas dataframe): The dataframe with all runningbacks in the ../src/rosters folder
    
#     # Allocate all rosters.csv files
#     save_location = "../src/rosters"
#     directory_path = Path(save_location)
#     file_paths = [entry for entry in directory_path.iterdir() if entry.is_file()]
#     file_names = [file.name for file in file_paths]
#     df = pd.DataFrame()
#     for i in file_names:
#         df_temp = pd.read_csv(save_location + "/" + i)
#         df = pd.concat([df, df_temp])

#     # Create a dataframe from all the rosters.csv files
#     df = con_memory.execute(""" SELECT Season, full_name AS name, first_name, last_name, team, position, 
#                                 depth_chart_position, pfr_id FROM df 
#                                 WHERE position IN ('RB', 'FB', 'HB') 
#                                 """).fetchdf()
#     df = df[['season', 'name', 'team', 'position']]
    
#     # Make team names consistent with team_info_xref table
#     team_nm_fixes = [('ARZ', 'ARI'), ('BLT', 'BAL'), ('CLV', 'CLE'), ('GB', 'GNB'), ('HST', 'HOU'),  
#                      ('KC', 'KAN'), ('LA', 'LAR'), ('LV', 'LVR'), ('NE', 'NWE'), ('NO', 'NOR'),
#                      ('SD', 'SDG'), ('SF', 'SFO'), ('SL', 'STL'), ('TB', 'TAM')]
#     for wrong_nm, right_nm in team_nm_fixes:
#         df['team'] = np.where(df.team == wrong_nm, right_nm, df.team)
    
#     df['position'] = 'RB'
    
#     return df

In [None]:
def construct_df_roster(con_memory):
#   Function name: construct_roster_df
#   Description: Create the base of the roster table
#   Parameters: con_memory
#        con_memory(ducbdb object): used to carry duckdb queries
#   Return values: df
#        df(pandas dataframe): The dataframe with all runningbacks in the ../src/rosters folder
    
    # Allocate all rosters.csv files
    save_location = "../src/rosters"
    directory_path = Path(save_location)
    file_paths = [entry for entry in directory_path.iterdir() if entry.is_file()]
    file_names = [file.name for file in file_paths]
    df = pd.DataFrame()
    for i in file_names:
        df_temp = pd.read_csv(save_location + "/" + i)
        df = pd.concat([df, df_temp])

    # Make team names consistent with team_info_xref table
    team_nm_fixes = [('ARZ', 'ARI'), ('BLT', 'BAL'), ('CLV', 'CLE'), ('GB', 'GNB'), ('HST', 'HOU'),  
                     ('KC', 'KAN'), ('LA', 'LAR'), ('LV', 'LVR'), ('NE', 'NWE'), ('NO', 'NOR'),
                     ('SD', 'SDG'), ('SF', 'SFO'), ('SL', 'STL'), ('TB', 'TAM')]
    for wrong_nm, right_nm in team_nm_fixes:
        df['team'] = np.where(df.team == wrong_nm, right_nm, df.team)
    
    # Create a dataframe from all the rosters.csv files
    df_teams = construct_df_teams()
    df = con_memory.execute("""SELECT df.* EXCLUDE team, df_teams.Team, df_teams.ABV FROM df JOIN df_teams 
                               ON df.team = df_teams.ABV
                               WHERE position IN ('RB', 'FB', 'HB')""").fetchdf()
    df.columns = df.columns.str.capitalize()
    df['Position'] = 'RB'
    df['ABV'] = df.Abv
    df['pfr_id'] = df.Pfr_id
    df['Player'] = df.Full_name
    df = df[['Team', 'ABV', 'Season', 'Position', 'Player', 'Birth_date', 'Height', 'Weight', 'pfr_id', 'Years_exp']] \
        .sort_values(by=['Season', 'Team', 'Player']).reset_index(drop=True)
    
    return df

In [None]:
def construct_df_teams():
#   Function name: construct_df_teams
#   Description: Create the base of the teams table
#   Return values: df
#        df(pandas dataframe): The dataframe with all NFL teams and their aliases

    # Construct df_teams by flattening team_info_xref.csv
    df_teams = pd.read_csv("../tables/team_info_xref.csv")
    df_teams['TmLegacy'] = ''
    df_ABV = df_teams.dropna(subset=['ABV2']).reset_index(drop=True)
    row_loc = -1 # the pointer for the last row of the dataframe
    for row in range(df_ABV.shape[0]):
        team_entry = df_ABV.loc[row]
        pfr_abv = team_entry.loc['PFR_ABV']
        team_name = team_entry.loc['Team']
        team_name2 = team_entry.loc['Team2']
        team_name3 = team_entry.loc['Team3']
        ABV2 = team_entry.loc['ABV2']
        ABV3 = team_entry.loc['ABV3']
        TmLegacy2 = team_entry.loc['TmLegacy2']
        TmLegacy3 = team_entry.loc['TmLegacy3']
        df_teams.loc[row_loc] = {'Team': team_name2, 'ABV': ABV2, 'PFR_ABV': pfr_abv, 'TmLegacy': TmLegacy2}
        row_loc = row_loc - 1
        if str(ABV3) != 'nan':
            df_teams.loc[row_loc] = {'Team': team_name3, 'ABV': ABV3, 'PFR_ABV': pfr_abv, 'TmLegacy': TmLegacy3}
            row_loc = row_loc - 1
    df_teams = df_teams[['Team', 'ABV', 'PFR_ABV', 'TmLegacy']].sort_values('PFR_ABV').reset_index(drop=True)        
    df_teams['short_name'] = df_teams.Team.str.split(" ").str[-1]
    df_teams['short_name'] = np.where(df_teams.Team == 'Washington Football Team', 'Washington', df_teams.short_name)
    
    # Derive Team Legacy (how long the club aliases have existed)
    tmlegacy = df_teams['TmLegacy']
    df_teams = df_teams.drop('TmLegacy', axis=1).drop_duplicates()
    df_teams['TmLegacy'] = tmlegacy
    df_teams['TmLegacy'] = df_teams['TmLegacy'].fillna('')
    
    return df_teams

In [None]:
def concatenate_all_files(file_name, sub_folder):
#   Function name: concatenate_all_files
#   Description: This function is used to save the final dataframe as a csv file by collecting all individual sub csv files
#   Parameters: file_name, sub_folder
#        file_name(str): name of the main folder and also main file name
#        sub_folder(str): name of the sub_folder inside the main folder used to hold sub .csv files

    folder_location = f"../tables/{file_name}"
    directory_path = Path(folder_location + f"/{sub_folder}")
    file_paths = [entry for entry in directory_path.iterdir() if entry.is_file()]
    file_names = [file.name for file in file_paths]

    df = pd.DataFrame()
    for file in file_names:
        df_temp = pd.read_csv(folder_location + f"/{sub_folder}/" + file)
        df = pd.concat([df, df_temp])
        
    display(df)
    save_df(df, folder_location, f'{file_name}.csv')

In [None]:
import_dict = {"save_df()": True, "construct_df_roster()": True, "construct_df_teams()": True, 
               "concatenate_all_files()": True}
import_list = []
for function, boolean in import_dict.items():
    if boolean == True:
        import_list.append(function)

print(f"Importing following functions: {str(import_list).replace("[", "").replace("]", "").replace("'", "")}")