In [6]:
import pandas as pd
import numpy as np

In [7]:
import pandas as pd

comp_data = {
    'Staff First Name': ['John', 'John', 'John'],
    'Staff Last Name': ['Doe', 'Doe', 'Doe'],
    'Pay Period Start Date': ['2024-06-01', '2024-06-01', '2024-06-01'],
    'Location ID': ['LOC1', 'LOC2', 'LOC3'],
    'Schedule Duration (hours)': [2, 4, 6],
    'Pay Code Label': ['CALL', 'Regular', 'Regular'],
    'Hours Rounded': [0, 4, 6]
}

comp_df = pd.DataFrame(comp_data)
display(comp_df)


Unnamed: 0,Staff First Name,Staff Last Name,Pay Period Start Date,Location ID,Schedule Duration (hours),Pay Code Label,Hours Rounded
0,John,Doe,2024-06-01,LOC1,2,CALL,0
1,John,Doe,2024-06-01,LOC2,4,Regular,4
2,John,Doe,2024-06-01,LOC3,6,Regular,6


In [None]:
bonus_data = {
    'Name': ['John Doe', 'John Doe', 'John Doe'],
    'Facility': ['LOC1', None, None],
    'Bonus Type': ['Retention', 'Incentive', 'Relocation'],
    'Effective Date': ['2024-06-01', '2024-06-01', '2024-06-01'],
    'Total Amount': [100, 300, 200]
}

bonus_df = pd.DataFrame(bonus_data)
display(bonus_df)


Unnamed: 0,Staff First Name,Staff Last Name,Facility,Bonus Type,Effective Date,Total Amount
0,John,Doe,LOC1,Retention,2024-06-01,100
1,John,Doe,,Incentive,2024-06-01,300


In [None]:
import numpy as np

def distribute_bonus(comp_df, bonus_df):
    comp_df = comp_df.copy()
    bonus_df = bonus_df.copy()

    # Convert dates and add Month
    comp_df['Pay Period Start Date'] = pd.to_datetime(comp_df['Pay Period Start Date'])
    comp_df['Month'] = comp_df['Pay Period Start Date'].dt.to_period('M').astype(str)
    bonus_df['Effective Date'] = pd.to_datetime(bonus_df['Effective Date'])
    bonus_df['Month'] = bonus_df['Effective Date'].dt.to_period('M').astype(str)

    # Create Full Name in comp_df and bonus_df
    comp_df['Full Name'] = comp_df['Staff First Name'].str.strip() + ' ' + comp_df['Staff Last Name'].str.strip()
    bonus_df['Name'] = bonus_df['Name'].str.strip()

    # Calculate Effective Hours
    comp_df['Effective Hours'] = np.where(
        comp_df['Pay Code Label'].str.upper() == 'CALL',
        comp_df['Schedule Duration (hours)'],
        comp_df['Hours Rounded']
    )

    # Aggregate hours per staff/location/month
    location_hours = comp_df.groupby(
        ['Full Name', 'Month', 'Location ID'],
        as_index=False
    )['Effective Hours'].sum().rename(columns={'Effective Hours': 'Location_Hours'})

    # === 1. Exact Facility Match ===
    exact_matches = pd.merge(
        location_hours,
        bonus_df[bonus_df['Facility'].notna()],
        left_on=['Full Name', 'Month', 'Location ID'],
        right_on=['Name', 'Month', 'Facility']
    )
    exact_matches['Bonus_Distributed'] = exact_matches['Total Amount']
    exact_matches['Bonus_Type'] = 'Exact Facility Match'

    # Track already matched locations
    matched_keys = exact_matches[['Full Name', 'Month', 'Location ID']].copy()
    matched_keys['key'] = matched_keys['Full Name'] + '|' + matched_keys['Month'] + '|' + matched_keys['Location ID']

    # === 2. Equal Distribution for NULL Facility ===
    null_facility = bonus_df[bonus_df['Facility'].isna()]
    equal_dist = pd.merge(
        location_hours,
        null_facility,
        left_on=['Full Name', 'Month'],
        right_on=['Name', 'Month']
    )
    equal_dist['key'] = equal_dist['Full Name'] + '|' + equal_dist['Month'] + '|' + equal_dist['Location ID']

    # Exclude locations already matched above
    equal_dist = equal_dist[~equal_dist['key'].isin(matched_keys['key'])]

    # Distribute equally across remaining locations
    loc_counts = equal_dist.groupby(
        ['Full Name', 'Month', 'Bonus Type']
    )['Location ID'].transform('count')

    equal_dist['Bonus_Distributed'] = round(equal_dist['Total Amount'] / loc_counts, 2)
    equal_dist['Bonus_Type'] = 'Equal Distribution (Facility=null)'

    # Combine results
    final_output = pd.concat([exact_matches, equal_dist], ignore_index=True)

    # Final output formatting
    final_output = final_output[[
        'Full Name', 'Month', 'Location ID', 'Facility',
        'Bonus Type', 'Location_Hours', 'Total Amount', 'Bonus_Distributed', 'Bonus_Type'
    ]].sort_values(by=['Full Name', 'Month', 'Bonus_Type', 'Location ID'])

    final_output.reset_index(drop=True, inplace=True)
    return final_output


In [10]:
final_result = distribute_bonus(comp_df, bonus_df)
display(final_result)

Unnamed: 0,Staff First Name,Staff Last Name,Month,Location ID,Facility,Bonus Type,Location_Hours,Total Amount,Bonus_Distributed,Bonus_Type
0,John,Doe,2024-06,LOC2,,Incentive,4,300,150.0,Equal Distribution (Facility=null)
1,John,Doe,2024-06,LOC3,,Incentive,6,300,150.0,Equal Distribution (Facility=null)
2,John,Doe,2024-06,LOC1,LOC1,Retention,2,100,100.0,Exact Facility Match
