In [1]:
import pandas as pd

# Initialize an empty DataFrame to capture changes
changes_df = pd.DataFrame()

# Initialize dictionaries to track the last known mappings
position_employee_map = {}  # Tracks last known Employee ID for each Position ID
additional_cols_map = {}    # Tracks last known values for additional columns per Position ID

# Columns to check for changes
additional_columns = ['Global Career Band', 'BF Level 4 Name', 'Work Location Country/Territory Name']

# Loop through each month and the corresponding GHA file
for month, gha_file in zip(months, gha_files):
    # Load the GHA data for the current month
    gha_month_df = pd.read_excel(gha_file, engine='openpyxl')
    gha_month_df = gha_month_df.rename(columns={'Position Number': 'Position ID', 'Employee ID': 'Employee ID_gha'})
    
    # Iterate over each row in the current month's GHA data
    for _, row in gha_month_df.iterrows():
        pos_id = row['Position ID']
        emp_id = row['Employee ID_gha']
        
        # Track if any change was detected for this row
        changes_detected = False
        description_list = []
        
        # Check for changes in Position ID -> Employee ID mapping
        if pos_id in position_employee_map:
            if position_employee_map[pos_id] != emp_id:
                changes_detected = True
                description_list.append('Employee ID Changed')
                position_employee_map[pos_id] = emp_id  # Update the map with the new Employee ID
        else:
            # Add the initial Position ID -> Employee ID mapping
            position_employee_map[pos_id] = emp_id

        # Check for changes in additional columns
        for col in additional_columns:
            current_value = row[col]
            if pos_id in additional_cols_map:
                if additional_cols_map[pos_id][col] != current_value:
                    changes_detected = True
                    description_list.append(f"{col} Changed")
                    additional_cols_map[pos_id][col] = current_value  # Update the map with the new column value
            else:
                # Initialize map for additional columns if not already present
                additional_cols_map[pos_id] = {col: current_value}

        # If changes were detected, append this row to changes_df
        if changes_detected:
            change_record = gha_month_df[gha_month_df['Position ID'] == pos_id].copy()
            change_record['Change Month'] = month
            change_record['Description'] = ', '.join(description_list)
            changes_df = pd.concat([changes_df, change_record], ignore_index=True)

# Select only the columns present in the master DataFrame, excluding MoM data and adding "Change Month" and "Description"
master_columns = [col for col in master_df.columns if col not in month_columns]
final_changes_df = changes_df[master_columns + ['Change Month', 'Description']]

# Display the resulting DataFrame with captured changes
print("Changes DataFrame:")
print(final_changes_df)


  Position ID Employee ID  Jan 24  Feb 24  Mar 24
0      000123        E001       1       1       0
1      000456        E002       1       0       0
2      000789        E003       0       0       1
