In [None]:
import pandas as pd

# Load initial master file (September data) with columns like 'Jan 24', 'Feb 24', ..., 'Dec 24'
master_df = pd.read_csv('master_sep.csv')
master_df['Source'] = master_df['Source'].str.strip()

# Define months and MoM columns
months = ["Jan 24", "Feb 24", "Mar 24", "Apr 24", "May 24", "Jun 24", "Jul 24", "Aug 24", "Sep 24", "Oct 24", "Nov 24", "Dec 24"]

# Ensure MoM columns are present in master file, filled with 0 by default
for month in months:
    if month not in master_df.columns:
        master_df[month] = 0

# Function to update MoM for each month
def update_mom(master_df, month, gha_file, open_pos_file):
    # Load monthly GHA and Open Position data
    gha_df = pd.read_csv(gha_file)
    open_pos_df = pd.read_csv(open_pos_file)
    
    # Update 'GHA' source MoM
    gha_ids = set(gha_df['Pos ID'])
    master_df.loc[(master_df['Source'] == 'GHA') & (master_df['Pos ID'].isin(gha_ids)), month] = 1

    # Update 'Open Position' source MoM
    open_pos_ids = set(open_pos_df['Pos ID'])
    master_df.loc[(master_df['Source'] == 'Open Position') & (master_df['Pos ID'].isin(open_pos_ids)), month] = 1
    
    # Identify and add new GHA records to master
    new_gha_ids = gha_ids - set(master_df.loc[master_df['Source'] == 'GHA', 'Pos ID'])
    new_gha_rows = gha_df[gha_df['Pos ID'].isin(new_gha_ids)].copy()
    new_gha_rows['Source'] = 'GHA'
    for m in months:
        new_gha_rows[m] = 1 if m == month else 0
    master_df = pd.concat([master_df, new_gha_rows], ignore_index=True)
    
    # Identify and add new Open Position records to master
    new_open_pos_ids = open_pos_ids - set(master_df.loc[master_df['Source'] == 'Open Position', 'Pos ID'])
    new_open_pos_rows = open_pos_df[open_pos_df['Pos ID'].isin(new_open_pos_ids)].copy()
    new_open_pos_rows['Source'] = 'Open Position'
    for m in months:
        new_open_pos_rows[m] = 1 if m == month else 0
    master_df = pd.concat([master_df, new_open_pos_rows], ignore_index=True)
    
    return master_df

# Update master_df with each month
monthly_files = {
    "Jan 24": ("gha_jan.csv", "open_pos_jan.csv"),
    "Feb 24": ("gha_feb.csv", "open_pos_feb.csv"),
    # Add entries for all months up to the current month
}

for month, (gha_file, open_pos_file) in monthly_files.items():
    master_df = update_mom(master_df, month, gha_file, open_pos_file)

# Save the updated master file
master_df.to_csv("updated_master_file.csv", index=False)
