In [None]:
import pandas as pd

def track_changes(master_df, gha_mom, mom_open_pos, current_month, columns_to_check):
    """
    Tracks changes and new records between the base master_df and GHA/Open Positions monthly files.
    
    Args:
    - master_df (DataFrame): The master DataFrame.
    - gha_mom (DataFrame): The GHA monthly data.
    - mom_open_pos (DataFrame): The Open Positions monthly data.
    - current_month (str): The current month being processed.
    - columns_to_check (list): List of attributes to check for changes.

    Returns:
    - updated_master_df (DataFrame): The updated master DataFrame.
    - updated_records_df (DataFrame): DataFrame with all updates (new and changed records).
    """
    # Standardize ID columns to string
    for df in [master_df, gha_mom, mom_open_pos]:
        df['Position ID'] = df['Position ID'].astype(str)
    master_df['Employee ID'] = master_df['Employee ID'].astype(str)
    gha_mom['Employee ID'] = gha_mom['Employee ID'].astype(str)
    
    # DataFrames to store updated records
    updated_records = []

    # Track changes for GHA records
    for idx, row in gha_mom.iterrows():
        # Check if record exists in master_df
        match = master_df[(master_df['Position ID'] == row['Position ID']) &
                          (master_df['Employee ID'] == row['Employee ID'])]

        if not match.empty:
            # If record exists, check for attribute changes
            changes = []
            for col in columns_to_check:
                if row[col] != match.iloc[0][col]:
                    changes.append(col)
            if changes:
                updated_row = row.copy()
                updated_row['Source'] = 'GHA'
                updated_row['Month of Change'] = current_month
                updated_row['Reason of Change'] = '; '.join([f"{col} Changed" for col in changes])
                updated_records.append(updated_row)
        else:
            # If record does not exist in master_df, it's a new record
            new_row = row.copy()
            new_row['Source'] = 'GHA'
            new_row['Month of Change'] = current_month
            new_row['Reason of Change'] = 'New Record'
            updated_records.append(new_row)

    # Track changes for Open Positions records
    for idx, row in mom_open_pos.iterrows():
        # Check if record exists in master_df (no Employee ID for Open Positions)
        match = master_df[master_df['Position ID'] == row['Position ID']]

        if not match.empty:
            # If record exists, check for attribute changes (excluding Work Location City and Employee ID)
            changes = []
            for col in columns_to_check:
                if col not in ['Work Location City', 'Employee ID']:
                    if row[col] != match.iloc[0][col]:
                        changes.append(col)
            if changes:
                updated_row = row.copy()
                updated_row['Source'] = 'Open Positions'
                updated_row['Month of Change'] = current_month
                updated_row['Reason of Change'] = '; '.join([f"{col} Changed" for col in changes])
                updated_records.append(updated_row)
        else:
            # If record does not exist in master_df, it's a new record
            new_row = row.copy()
            new_row['Source'] = 'Open Positions'
            new_row['Month of Change'] = current_month
            new_row['Reason of Change'] = 'New Record'
            updated_records.append(new_row)

    # Convert updated records to a DataFrame
    updated_records_df = pd.DataFrame(updated_records)

    # Append updated records to the master DataFrame
    updated_master_df = pd.concat([master_df, updated_records_df], ignore_index=True)

    return updated_master_df, updated_records_df


# Example usage
columns_to_check = ['BF Level 3', 'BF Level 4', 'Work Location Country/Territory Name', 'Global Career Band']
gha_mom = pd.read_csv('gha_mom_Jan24.csv')
mom_open_pos = pd.read_csv('mom_open_pos_Jan24.csv')
current_month = 'Jan-24'

master_df, updated_records_df = track_changes(master_df, gha_mom, mom_open_pos, current_month, columns_to_check)

# Write updated records to a single Excel sheet
updated_records_df.to_excel('Updated_Records.xlsx', index=False)


In [None]:
import pandas as pd

# List of columns to track for changes
columns_to_check = ['Global Career Band', 'BF Level 4 Name', 'Work Location Country/Territory Name', 'Work Location City']

def track_changes_across_months(master_df, mom_gha_file, mom_open_positions_file, month):
    # Ensure consistency in data types
    master_df['Position ID'] = master_df['Position ID'].astype(str)
    mom_gha_file['Position ID'] = mom_gha_file['Position ID'].astype(str)
    mom_gha_file['Employee ID'] = mom_gha_file['Employee ID'].astype(str)
    mom_open_positions_file['Position ID'] = mom_open_positions_file['Position ID'].astype(str)

    # Initialize DataFrames to store changed and updated records
    changed_rows = []
    updated_rows = []

    # Combine the merged dataframe with previous month's data
    merged_df = master_df.copy()

    # Track changes in GHA file (source == 'gha')
    for index, row in merged_df.iterrows():
        pos_id = row['Position ID']
        emp_id = row['Employee ID']
        
        # Filter the mom_gha_file for the matching Position ID and Employee ID
        gha_match = mom_gha_file[(mom_gha_file['Position ID'] == pos_id) & (mom_gha_file['Employee ID'] == emp_id)]

        if not gha_match.empty:
            changes = []
            for col in columns_to_check:
                if row[col] != gha_match.iloc[0][col]:  # Compare values for the specified columns
                    changes.append(f"{col} Changed")

            if changes:
                updated_row = row.copy()
                updated_row['Description'] = '; '.join(changes)
                updated_row['Month Changed'] = month
                updated_rows.append(updated_row)

            # If combination of Position ID and Employee ID has changed (new employee or position), track as changed
            if len(gha_match) > 1:  # More than one match, indicating a position/employee mismatch
                changed_row = row.copy()
                changed_row['Description'] = 'Position/Employee ID mismatch'
                changed_row['Month Changed'] = month
                changed_rows.append(changed_row)

    # Track changes in Open Positions file (source == 'open positions')
    for index, row in merged_df.iterrows():
        pos_id = row['Position ID']

        # Filter the mom_open_positions_file for the matching Position ID
        open_pos_match = mom_open_positions_file[mom_open_positions_file['Position ID'] == pos_id]

        if not open_pos_match.empty:
            changes = []
            for col in columns_to_check:
                # Exclude Work Location City and Employee ID from the comparison for Open Position
                if col != 'Work Location City' and col != 'Employee ID' and row[col] != open_pos_match.iloc[0][col]:
                    changes.append(f"{col} Changed")

            if changes:
                updated_row = row.copy()
                updated_row['Description'] = '; '.join(changes)
                updated_row['Month Changed'] = month
                updated_rows.append(updated_row)

    # Convert lists to DataFrames
    changed_rows_df = pd.DataFrame(changed_rows)
    updated_rows_df = pd.DataFrame(updated_rows)

    return changed_rows_df, updated_rows_df

# Example usage:
current_month = 'Mar-24'
changed_rows_df, updated_rows_df = track_changes_across_months(master_df, mom_gha_file, mom_open_positions_file, current_month)

# Display the result
print("Changed Rows DataFrame:")
print(changed_rows_df)

print("\nUpdated Rows DataFrame:")
print(updated_rows_df)


In [None]:
# Initialize new_records with the same columns as master_df plus 'Month_Added'
new_records = pd.DataFrame(columns=master_df.columns.tolist() + ['Month_Added'])

# Define function to process each month
def process_single_month(master_df, month, gha_file, open_pos_file, new_records):
    max_length = master_df['Position ID'].str.len().max()

    # Standardize Position IDs
    gha_file['Position ID'] = gha_file['Position ID'].str.zfill(max_length)
    open_pos_file['Position ID'] = open_pos_file['Position ID'].str.zfill(max_length)
    
    # Update master_df with 1/0 based on presence in GHA and Open Positions
    master_df[month] = np.where(
        master_df['Source'] == 'GHA', 
        master_df['Position ID'].apply(lambda x: 1 if x in gha_file['Position ID'].values else 0),
        np.where(
            master_df['Source'] == 'Open Positions', 
            master_df['Position ID'].apply(lambda x: 1 if x in open_pos_file['Position ID'].values else 0),
            np.nan
        )
    )
    
    # Identify new records in GHA not in master_df or previously in new_records
    gha_new_records = gha_file[~gha_file['Position ID'].isin(pd.concat([master_df['Position ID'], new_records['Position ID']]))]
    gha_new_records = gha_new_records.assign(Source='GHA', Month_Added=month)

    # Identify new records in Open Positions not in master_df or previously in new_records
    open_pos_new_records = open_pos_file[~open_pos_file['Position ID'].isin(pd.concat([master_df['Position ID'], new_records['Position ID']]))]
    open_pos_new_records = open_pos_new_records.assign(Source='Open Positions', Month_Added=month)

    # Combine new records found this month into one DataFrame
    new_month_records = pd.concat([gha_new_records, open_pos_new_records], ignore_index=True)

    # Add presence columns for each month in new_records, filling prior months with 0
    for mth in master_df.columns[2:]:  # Assuming month columns start from index 2
        if mth < month:
            new_month_records[mth] = 0  # Set prior months to 0
        elif mth == month:
            new_month_records[mth] = new_month_records.apply(
                lambda row: 1 if row['Position ID'] in gha_file['Position ID'].values or row['Position ID'] in open_pos_file['Position ID'].values else 0,
                axis=1
            )
        else:
            new_month_records[mth] = np.nan

    # Trim new_month_records to only include columns in master_df + 'Month_Added'
    new_month_records = new_month_records[master_df.columns.tolist() + ['Month_Added']]

    # Append to new_records DataFrame
    new_records = pd.concat([new_records, new_month_records], ignore_index=True)

    return master_df, new_records

# Process each month
for month, gha_file in gha_monthly_files.items():
    open_pos_file = open_pos_monthly_files[month]
    master_df, new_records = process_single_month(master_df, month, gha_file, open_pos_file, new_records)

# Fill any remaining NaN values in new_records with 0 for months not reached yet
for col in master_df.columns[2:]:  # Month columns assumed to start from index 2
    new_records[col].fillna(0, inplace=True)

# Display the updated DataFrames
print("Updated Master DataFrame:\n", master_df)
print("\nNew Records DataFrame:\n", new_records)


In [2]:
# Declare new_records with relevant columns
new_records = pd.DataFrame(columns=['Position ID', 'Source', 'Month Added'])

def process_monthly_data(master_df, gha_monthly_files, open_pos_monthly_files):
    global new_records
    max_length = master_df['Position ID'].str.len().max()
    
    for month in gha_monthly_files.keys():
        gha_df = gha_monthly_files[month].copy()
        open_pos_df = open_pos_monthly_files[month].copy()

        # Standardize Position IDs
        gha_df['Position ID'] = gha_df['Position ID'].str.zfill(max_length)
        open_pos_df['Position ID'] = open_pos_df['Position ID'].str.zfill(max_length)
        
        # Update master_df with 1/0 based on presence in GHA and Open Positions
        master_df[month] = np.where(
            master_df['Source'] == 'GHA', 
            master_df['Position ID'].apply(lambda x: 1 if x in gha_df['Position ID'].values else 0),
            np.where(
                master_df['Source'] == 'Open Positions', 
                master_df['Position ID'].apply(lambda x: 1 if x in open_pos_df['Position ID'].values else 0),
                np.nan
            )
        )
        
        # Identify new records in GHA not in master_df or previously in new_records
        gha_new_records = gha_df[~gha_df['Position ID'].isin(pd.concat([master_df['Position ID'], new_records['Position ID']]))]
        gha_new_records = gha_new_records.assign(Source='GHA', Month_Added=month)

        # Identify new records in Open Positions not in master_df or previously in new_records
        open_pos_new_records = open_pos_df[~open_pos_df['Position ID'].isin(pd.concat([master_df['Position ID'], new_records['Position ID']]))]
        open_pos_new_records = open_pos_new_records.assign(Source='Open Positions', Month_Added=month)

        # Append unique new records from both GHA and Open Positions
        new_records = pd.concat([new_records, gha_new_records, open_pos_new_records], ignore_index=True)
    
    return master_df, new_records

# Call the function and get updated master_df and new_records
master_df, new_records = process_monthly_data(master_df, gha_monthly_files, open_pos_monthly_files)

# Display the resulting DataFrames
print("Updated Master DataFrame:\n", master_df)
print("\nNew Records DataFrame:\n", new_records)


Updated Master DataFrame:
   Position ID          Source  Jan 24  Feb 24  Mar 24
0      000123             GHA     1.0     0.0     1.0
1      000456  Open Positions     1.0     0.0     0.0
2      000789             GHA     0.0     1.0     0.0
3      001001  Open Positions     0.0     1.0     1.0

New Records DataFrame:
   Position ID          Source Month Added Month_Added
0      002002             GHA         NaN      Feb 24
1      004004  Open Positions         NaN      Feb 24
2      003003             GHA         NaN      Mar 24
3      005005  Open Positions         NaN      Mar 24


In [None]:
import pandas as pd

# Additional columns to track for changes and their descriptions
additional_cols = ['Global Career Band', 'BF Level 4 Name', 'Work Location Country/Territory Name']
additional_cols_descriptions = {col: f"{col} Changed" for col in additional_cols}

# Initialize changes DataFrame
changes_df = pd.DataFrame(columns=master_df.columns.tolist() + ['Month', 'Description'])

for month, gha_file, open_file in zip(months, gha_files, open_files):
    # Load GHA and Open Position data for the current month
    gha_df = pd.read_excel(gha_file, sheet_name='Headcount - Employee Detail')
    open_pos_df = pd.read_excel(open_file)
    
    # Rename 'Position Number' to 'Position ID' to match master_df
    gha_df.rename(columns={'Position Number': 'Position ID'}, inplace=True)
    open_pos_df.rename(columns={'Position Number': 'Position ID'}, inplace=True)

    # Standardize Position ID and Employee ID to strings, with zero-padding for Position ID
    gha_df['Position ID'] = gha_df['Position ID'].astype(str).str.zfill(max_digits)
    gha_df['Employee ID'] = gha_df['Employee ID'].astype(str)
    open_pos_df['Position ID'] = open_pos_df['Position ID'].astype(str).str.zfill(max_digits)

    # Print column names to verify alignment
    print("Debug: Columns in master_df:", master_df.columns)
    print("Debug: Columns in gha_df:", gha_df.columns)
    
    # Merge on Position ID and Employee ID, with unique suffixes
    merged_df = pd.merge(
        master_df, gha_df,
        on=['Position ID', 'Employee ID'],
        how='outer',
        suffixes=('_master', '_gha')
    )
    
    # Check merged columns to verify expected columns are present
    print("Debug: Columns in merged_df after merging:", merged_df.columns)
    
    # If expected columns are missing, print a warning and skip the iteration
    if 'Employee ID_master' not in merged_df.columns or 'Employee ID_gha' not in merged_df.columns:
        print("Warning: Expected columns 'Employee ID_master' and 'Employee ID_gha' not found.")
        continue
    
    # Track changes in Position-Employee combinations
    for _, row in merged_df.iterrows():
        pos_id = row['Position ID']
        master_emp_id, gha_emp_id = row.get('Employee ID_master'), row.get('Employee ID_gha')
        
        # Detect Employee ID changes for the same Position ID
        if pd.notna(master_emp_id) and pd.notna(gha_emp_id) and master_emp_id != gha_emp_id:
            changes_df = changes_df.append({
                **row[['Position ID', 'Employee ID_master']],
                'Month': month,
                'Description': 'Position-Employee Combination Changed'
            }, ignore_index=True)

        # Detect changes in additional columns
        for col in additional_cols:
            master_value, gha_value = row.get(f"{col}_master"), row.get(f"{col}_gha")
            if pd.notna(master_value) and pd.notna(gha_value) and master_value != gha_value:
                changes_df = changes_df.append({
                    **row[['Position ID', 'Employee ID_master']],
                    'Month': month,
                    'Description': additional_cols_descriptions[col]
                }, ignore_index=True)

# Drop duplicates and keep the first occurrence
changes_df.drop_duplicates(subset=['Position ID', 'Employee ID_master', 'Description'], inplace=True)

# Output the changes DataFrame
print("Final changes_df:\n", changes_df)


In [None]:
import pandas as pd

# Additional columns and descriptions for changes
additional_cols = ['Global Career Band', 'BF Level 4 Name', 'Work Location Country/Territory Name']
additional_cols_descriptions = {col: f"{col} Changed" for col in additional_cols}

# Initialize the changes DataFrame with the required columns
changes_df = pd.DataFrame(columns=master_df.columns.tolist() + ['Month', 'Description'])

for month, gha_file, open_file in zip(months, gha_files, open_files):
    # Load data
    gha_df = pd.read_excel(gha_file, sheet_name='Headcount - Employee Detail')
    open_pos_df = pd.read_excel(open_file)
    
    # Standardize 'Position ID' formatting
    gha_df['Position ID'] = gha_df['Position ID'].astype(str).str.zfill(max_digits)
    open_pos_df['Position ID'] = open_pos_df['Position ID'].astype(str).str.zfill(max_digits)
    
    # Filter out rows with unspecified or empty Position IDs or Employee IDs in GHA data
    gha_df = gha_df[(gha_df['Position ID'] != 'unspecified') & (gha_df['Position ID'] != '')]
    gha_df = gha_df[(gha_df['Employee ID'] != 'unspecified') & (gha_df['Employee ID'] != '')]

    # Merge GHA data with master data
    print("Debug: GHA Columns before merging:", gha_df.columns)
    print("Debug: Master Columns before merging:", master_df.columns)
    
    merged_df = pd.merge(
        master_df, gha_df, 
        on=['Position ID', 'Employee ID'], 
        how='outer', 
        suffixes=('_master', '_gha')
    )
    
    # Debug column names after merging
    print("Debug: Columns in merged_df after merging:", merged_df.columns)
    
    # If columns aren't named as expected, skip further processing in this loop iteration
    if 'Employee ID_master' not in merged_df.columns or 'Employee ID_gha' not in merged_df.columns:
        print("Warning: Expected columns 'Employee ID_master' and 'Employee ID_gha' not found.")
        continue
    
    # Detect changes
    for _, row in merged_df.iterrows():
        pos_id, emp_id = row['Position ID'], row['Employee ID']
        master_emp_id, gha_emp_id = row['Employee ID_master'], row['Employee ID_gha']
        
        # Record changes in Position ID and Employee ID combinations
        if pd.notna(master_emp_id) and pd.notna(gha_emp_id) and master_emp_id != gha_emp_id:
            changes_df = changes_df.append({
                **row[['Position ID', 'Employee ID_master']],
                'Month': month,
                'Description': 'Position-Employee Combination Changed'
            }, ignore_index=True)

        # Check for additional column changes
        for col in additional_cols:
            master_value, gha_value = row.get(f"{col}_master"), row.get(f"{col}_gha")
            if pd.notna(master_value) and pd.notna(gha_value) and master_value != gha_value:
                changes_df = changes_df.append({
                    **row[['Position ID', 'Employee ID_master']],
                    'Month': month,
                    'Description': additional_cols_descriptions[col]
                }, ignore_index=True)

# Remove duplicates
changes_df.drop_duplicates(subset=['Position ID', 'Employee ID_master', 'Description'], inplace=True)

# Output the changes DataFrame
print(changes_df)


In [1]:
#option 1
import pandas as pd

# Define maximum digits for 'Position ID' formatting, based on master file creation
max_digits = max(len(str(pos_id)) for pos_id in master_df['Position ID'].astype(str))

# Define additional columns to check for changes, with corresponding descriptions
additional_cols = ['Global Career Band', 'BF Level 4 Name', 'Work Location Country/Territory Name']
additional_cols_descriptions = {col: f"{col} Changed" for col in additional_cols}

# Initialize empty DataFrame to store changes
changes_df = pd.DataFrame(columns=master_df.columns.tolist() + ['Month', 'Description'])

for month, gha_file, open_file in zip(months, gha_files, open_files):
    # Load and prepare GHA and Open Position data for the current month
    gha_df = pd.read_excel(gha_file, sheet_name='Headcount - Employee Detail')
    open_pos_df = pd.read_excel(open_file)
    
    # Ensure consistent 'Position ID' format (use zfill if needed)
    if 'Position Number' in gha_df.columns:
        gha_df.rename(columns={'Position Number': 'Position ID'}, inplace=True)
    gha_df['Position ID'] = gha_df['Position ID'].astype(str).str.zfill(max_digits)
    open_pos_df['Position ID'] = open_pos_df['Position ID'].astype(str).str.zfill(max_digits)
    
    # Filter out records where 'Position ID' or 'Employee ID' is unspecified or blank
    gha_df = gha_df[(gha_df['Position ID'] != 'unspecified') & (gha_df['Position ID'] != '')]
    gha_df = gha_df[(gha_df['Employee ID'] != 'unspecified') & (gha_df['Employee ID'] != '')]
    
    # Merge GHA data with master_df based on Position ID and Employee ID
    merged_df = pd.merge(master_df, gha_df, on=['Position ID', 'Employee ID'], how='outer', suffixes=('_master', '_gha'))

    # Identify changes in 'Position ID' and 'Employee ID' combinations, as well as additional columns
    for _, row in merged_df.iterrows():
        pos_id, emp_id = row['Position ID'], row['Employee ID']
        master_emp_id, gha_emp_id = row['Employee ID_master'], row['Employee ID_gha']
        
        # Check for changes in Position ID and Employee ID combinations
        if pd.notna(master_emp_id) and pd.notna(gha_emp_id) and master_emp_id != gha_emp_id:
            # Log change details if Position ID has a different Employee ID
            changes_df = changes_df.append({
                **row[['Position ID', 'Employee ID_master']],
                'Month': month,
                'Description': 'Position-Employee Combination Changed'
            }, ignore_index=True)

        # Check for changes in additional columns
        for col in additional_cols:
            master_value, gha_value = row[f"{col}_master"], row.get(f"{col}_gha")
            if pd.notna(master_value) and pd.notna(gha_value) and master_value != gha_value:
                changes_df = changes_df.append({
                    **row[['Position ID', 'Employee ID_master']],
                    'Month': month,
                    'Description': additional_cols_descriptions[col]
                }, ignore_index=True)

# Drop duplicates to avoid recording repeated changes in subsequent months
changes_df.drop_duplicates(subset=['Position ID', 'Employee ID_master', 'Description'], inplace=True)

# View or save changes_df to verify the output
print(changes_df)


  Position ID Employee ID  Jan 24  Feb 24  Mar 24
0      000123        E001       1       1       0
1      000456        E002       1       0       0
2      000789        E003       0       0       1


In [None]:
# option 2
import pandas as pd

# Define the list of GHA and Open Position files along with months for reference
gha_files = [...]  # List of file paths for GHA MoM files
open_pos_files = [...]  # List of file paths for Open Position MoM files
months = ["Jan 24", "Feb 24", "Mar 24", "Apr 24", ...]  # Months to track for MoM columns

# Load master file
master_df = pd.read_excel('master_file.xlsx')
master_df['Position ID'] = master_df['Position ID'].astype(str).str.zfill(6)

# Initialize a DataFrame to store changes
changes_df = pd.DataFrame(columns=master_df.columns.tolist() + ['Month', 'Description'])

# Define the additional columns to check for changes
additional_cols = ['Global Career Band', 'BF Level 4 Name', 'Work Location Country/Territory Name']

# Define a helper function to standardize Position IDs across all files
def standardize_position_ids(df):
    max_digits = master_df['Position ID'].str.len().max()
    df['Position ID'] = df['Position ID'].astype(str).str.zfill(max_digits)
    return df

# Loop through each month and each file to detect changes
for month, gha_file, open_pos_file in zip(months, gha_files, open_pos_files):
    # Load GHA and Open Position files for the current month
    gha_df = pd.read_excel(gha_file, sheet_name='Headcount - Employee Detail')
    gha_df = standardize_position_ids(gha_df)
    
    # Apply checks only if Position ID is specified
    gha_df_filtered = gha_df[gha_df['Position ID'] != 'unspecified']
    
    # Iterate through each row in gha_df_filtered to check against master_df
    for _, row in gha_df_filtered.iterrows():
        pos_id = row['Position ID']
        emp_id = row['Employee ID'] if 'Employee ID' in row else 'unspecified'

        # Find the master record for this Position ID
        master_record = master_df[master_df['Position ID'] == pos_id]
        
        if not master_record.empty:
            # Check for a change in Employee ID
            master_emp_id = master_record['Employee ID'].values[0]
            if emp_id != 'unspecified' and emp_id != master_emp_id:
                change_desc = f"Employee ID Changed from {master_emp_id} to {emp_id}"
                change_row = row.append(pd.Series({'Month': month, 'Description': change_desc}))
                changes_df = changes_df.append(change_row, ignore_index=True)

            # Check for changes in additional columns
            for col in additional_cols:
                if col in row and master_record[col].values[0] != row[col]:
                    change_desc = f"{col} Changed from {master_record[col].values[0]} to {row[col]}"
                    change_row = row.append(pd.Series({'Month': month, 'Description': change_desc}))
                    changes_df = changes_df.append(change_row, ignore_index=True)
                    
        else:
            # If no master record exists for this Position ID, it's a new addition
            change_desc = "New Position ID Entry"
            change_row = row.append(pd.Series({'Month': month, 'Description': change_desc}))
            changes_df = changes_df.append(change_row, ignore_index=True)

# Final step: Append changes_df to the master file
changes_df = changes_df[master_df.columns]  # Select only master columns
final_master_df = pd.concat([master_df, changes_df], ignore_index=True)

# Save the updated master file
final_master_df.to_excel('updated_master_file.xlsx', index=False)
