In [5]:

import os
import glob
import pandas as pd

def workers_synhh(directory_path, excel_path):
    """
    Process synthetic households csv files from the specified directory path,
    calculate the total number of workers by MGRA for each forecast year,
    and save the summary to an Excel file.

    Args:
    directory_path (str): Path to the directory containing the CSV files
    excel_path (str): Path to save the Excel file

    Returns:
    None
    """
    # Loading the CSV files
    files_csv = glob.glob(os.path.join(directory_path, 'synthetic_households*.csv'))

    # Initializing an empty list and appending the popsim outputs to the empty list
    syn_hhs = []

    for file in files_csv:
        df = pd.read_csv(file)
        file_name = os.path.basename(file)[:-4]
        syn_hhs.append((df, file_name))

    # Numbers of workers by MGRA for each forecast year
    workers_all = pd.DataFrame()

    for df, file_name in syn_hhs:
        workers_mgra = df.groupby('mgra')['workers'].sum().rename(file_name)
        if workers_all.empty:
            workers_all = pd.DataFrame(workers_mgra)
        else:
            workers_all = pd.merge(workers_all, workers_mgra, left_index=True, right_index=True, how='outer')

    workers_all['Workers_mean'] = workers_all.mean(axis=1).round().astype(int)
    workers_all['Workers_mdn'] = workers_all.median(axis=1).round().astype(int)
    workers_all.reset_index(inplace=True)

    # Creating workers by MGRA Excel output
    if not os.path.exists(excel_path):
        workers_all.to_excel(excel_path, index=True)
    else:
        print(f"The file {excel_path} already exists and cannot be overwritten.")


directory_path = r''
excel_path = r''

workers_synhh(directory_path, excel_path)