In [13]:
import pandas as pd
import numpy as np

In [20]:
import pandas as pd

comp_data = {
    'Staff First Name': ['John', 'John', 'John'],
    'Staff Last Name': ['Doe', 'Doe', 'Doe'],
    'Pay Period Start Date': ['2024-06-01', '2024-06-01', '2024-06-01'],
    'Location ID': ['LOC1', 'LOC2', 'LOC3'],
    'Schedule Duration (hours)': [2, 4, 6],
    'Pay Code Label': ['CALL', 'Regular', 'Regular'],
    'Hours Rounded': [0, 4, 6]
}

comp_df = pd.DataFrame(comp_data)
display(comp_df)


Unnamed: 0,Staff First Name,Staff Last Name,Pay Period Start Date,Location ID,Schedule Duration (hours),Pay Code Label,Hours Rounded
0,John,Doe,2024-06-01,LOC1,2,CALL,0
1,John,Doe,2024-06-01,LOC2,4,Regular,4
2,John,Doe,2024-06-01,LOC3,6,Regular,6


In [21]:
bonus_data = {
    'Staff First Name': ['John', 'John'],
    'Staff Last Name': ['Doe', 'Doe'],
    'Facility': ['LOC1', None],
    'Bonus Type': ['Retention', 'Incentive'],
    'Effective Date': ['2024-06-01', '2024-06-01'],
    'Total Amount': [100, 300]
}

bonus_df = pd.DataFrame(bonus_data)
display(bonus_df)

Unnamed: 0,Staff First Name,Staff Last Name,Facility,Bonus Type,Effective Date,Total Amount
0,John,Doe,LOC1,Retention,2024-06-01,100
1,John,Doe,,Incentive,2024-06-01,300


In [22]:
import numpy as np

def distribute_bonus(comp_df, bonus_df):
    comp_df = comp_df.copy()
    bonus_df = bonus_df.copy()

    comp_df['Pay Period Start Date'] = pd.to_datetime(comp_df['Pay Period Start Date'])
    comp_df['Month'] = comp_df['Pay Period Start Date'].dt.to_period('M').astype(str)

    # Calculate Effective Hours
    comp_df['Effective Hours'] = np.where(
        comp_df['Pay Code Label'].str.upper() == 'CALL',
        comp_df['Schedule Duration (hours)'],
        comp_df['Hours Rounded']
    )

    # Location-level hours
    location_hours = comp_df.groupby(
        ['Staff First Name', 'Staff Last Name', 'Month', 'Location ID'],
        as_index=False
    )['Effective Hours'].sum().rename(columns={'Effective Hours': 'Location_Hours'})

    bonus_df['Effective Date'] = pd.to_datetime(bonus_df['Effective Date'])
    bonus_df['Month'] = bonus_df['Effective Date'].dt.to_period('M').astype(str)

    # 1. Exact Facility Match
    exact_matches = pd.merge(
        location_hours, bonus_df[bonus_df['Facility'].notna()],
        left_on=['Staff First Name', 'Staff Last Name', 'Month', 'Location ID'],
        right_on=['Staff First Name', 'Staff Last Name', 'Month', 'Facility']
    )
    exact_matches['Bonus_Distributed'] = exact_matches['Total Amount']
    exact_matches['Bonus_Type'] = 'Exact Facility Match'

    # 2. Equal Distribution for null Facility
    matched_keys = exact_matches[['Staff First Name', 'Staff Last Name', 'Month', 'Location ID']].copy()
    matched_keys['key'] = (
        matched_keys['Staff First Name'] + '|' +
        matched_keys['Staff Last Name'] + '|' +
        matched_keys['Month'] + '|' +
        matched_keys['Location ID']
    )

    null_facility = bonus_df[bonus_df['Facility'].isna()]
    equal_dist = pd.merge(
        location_hours, null_facility,
        on=['Staff First Name', 'Staff Last Name', 'Month']
    )
    equal_dist['key'] = (
        equal_dist['Staff First Name'] + '|' +
        equal_dist['Staff Last Name'] + '|' +
        equal_dist['Month'] + '|' +
        equal_dist['Location ID']
    )

    # Filter out already matched locations
    equal_dist = equal_dist[~equal_dist['key'].isin(matched_keys['key'])]

    # Distribute equally
    loc_counts = equal_dist.groupby(
        ['Staff First Name', 'Staff Last Name', 'Month', 'Bonus Type']
    )['Location ID'].transform('count')

    equal_dist['Bonus_Distributed'] = round(equal_dist['Total Amount'] / loc_counts, 2)
    equal_dist['Bonus_Type'] = 'Equal Distribution (Facility=null)'

    # Combine and return
    final_output = pd.concat([exact_matches, equal_dist], ignore_index=True)

    final_output = final_output[
        ['Staff First Name', 'Staff Last Name', 'Month', 'Location ID', 'Facility',
         'Bonus Type', 'Location_Hours', 'Total Amount', 'Bonus_Distributed', 'Bonus_Type']
    ]

    final_output.sort_values(by=['Staff First Name', 'Month', 'Bonus_Type', 'Location ID'], inplace=True)
    final_output.reset_index(drop=True, inplace=True)

    return final_output


In [23]:
final_result = distribute_bonus(comp_df, bonus_df)
display(final_result)

Unnamed: 0,Staff First Name,Staff Last Name,Month,Location ID,Facility,Bonus Type,Location_Hours,Total Amount,Bonus_Distributed,Bonus_Type
0,John,Doe,2024-06,LOC2,,Incentive,4,300,150.0,Equal Distribution (Facility=null)
1,John,Doe,2024-06,LOC3,,Incentive,6,300,150.0,Equal Distribution (Facility=null)
2,John,Doe,2024-06,LOC1,LOC1,Retention,2,100,100.0,Exact Facility Match
