In [1]:
import pandas as pd

# Sample structure of your files list and month labels
gha_files = [gha_jan_df, gha_feb_df, gha_mar_df]  # Replace with actual dataframes
open_files = [open_jan_df, open_feb_df, open_mar_df]  # Replace with actual dataframes
months = ["Jan 24", "Feb 24", "Mar 24"]  # Replace with actual month names

# Define additional columns to check for changes
additional_columns = ['Global Career Band', 'BF Level 4 Name', 'Work Location Country/Territory Name']

# Initialize the master additional_cols_map to track Position ID and associated columns
additional_cols_map = {}  # Dictionary to store the last known state of additional columns for each Position ID

# DataFrame to store all changes
changes_df = pd.DataFrame(columns=[
    'Position ID', 'Employee ID', 'Month', 'Description'] + additional_columns
)

# Iterate over each month's file and check for changes
for month, gha_df, open_df in zip(months, gha_files, open_files):
    for df, source in [(gha_df, 'GHA'), (open_df, 'Open Position')]:        
        # Skip if 'Open Position' as it lacks 'Employee ID'
        if source == 'Open Position':
            continue
            
        # Iterate over each row to detect changes
        for _, row in df.iterrows():
            pos_id = row['Position ID']
            emp_id = row['Employee ID']
            
            # Initialize map entry if not exists
            if pos_id not in additional_cols_map:
                additional_cols_map[pos_id] = {
                    'Employee ID': emp_id,
                    **{col: row[col] for col in additional_columns}
                }
                
            # Detect any change in Employee ID or additional columns
            changes_detected = False
            description_list = []

            # Check for Employee ID change
            if additional_cols_map[pos_id]['Employee ID'] != emp_id:
                changes_detected = True
                description_list.append("Employee ID Changed")
                additional_cols_map[pos_id]['Employee ID'] = emp_id

            # Check for changes in additional columns
            for col in additional_columns:
                current_value = row[col]
                if additional_cols_map[pos_id][col] != current_value:
                    changes_detected = True
                    description_list.append(f"{col} Changed")
                    additional_cols_map[pos_id][col] = current_value

            # If any changes are detected, add to changes_df
            if changes_detected:
                change_entry = {
                    'Position ID': pos_id,
                    'Employee ID': emp_id,
                    'Month': month,
                    'Description': "; ".join(description_list)
                }
                for col in additional_columns:
                    change_entry[col] = row[col]
                changes_df = pd.concat([changes_df, pd.DataFrame([change_entry])], ignore_index=True)

# Display or further process changes_df
print(changes_df)


  Position ID Employee ID  Jan 24  Feb 24  Mar 24
0      000123        E001       1       1       0
1      000456        E002       1       0       0
2      000789        E003       0       0       1
