In [7]:
import pandas as pd
import numpy as np

In [8]:

data = {
    'Staff First Name': [
        'John', 'John', 'John',  # June - John in 3 locations
        'Alice', 'Alice',        # May - Alice in 2 locations
        'Bob',                   # May - Bob one location
        'Carol', 'Carol', 'Carol', # July - Carol in 3 locations
        'Dave', 'Dave',          # May - Dave in 2 locations
        'Eva',                   # July - Eva one location
        'Frank', 'Frank',        # June - Frank in 2 locations
        'Grace'                  # July - Grace one location
    ],
    'Staff Last Name': [
        'Doe', 'Doe', 'Doe',
        'Smith', 'Smith',
        'Brown',
        'Taylor', 'Taylor', 'Taylor',
        'Lee', 'Lee',
        'White',
        'Green', 'Green',
        'Hall'
    ],
    'Pay Period Start Date': [
        '2024-06-01', '2024-06-01', '2024-06-01',
        '2024-05-01', '2024-05-01',
        '2024-05-01',
        '2024-07-01', '2024-07-01', '2024-07-01',
        '2024-05-01', '2024-05-01',
        '2024-07-01',
        '2024-06-01', '2024-06-01',
        '2024-07-01'
    ],
    'Location ID': [
        'LOC1', 'LOC2', 'LOC3',
        'LOC1', 'LOC2',
        'LOC1',
        'LOC1', 'LOC2', 'LOC3',
        'LOC1', 'LOC2',
        'LOC1',
        'LOC2', 'LOC3',
        'LOC1'
    ],
    'Pay Code ID': [
        'Regular', 'CALL', 'Regular',
        'CALL', 'Regular',
        'Regular',
        'Regular', 'CALL', 'Bonus',
        'Regular', 'CALL',
        'Regular',
        'CALL', 'Regular',
        'CALL'
    ],
    'Schedule Duration (hours)': [
        0, 3, 0,
        7, 0,
        4,
        5, 6, 0,
        6, 5,
        6,
        4, 0,
        7
    ],
    'Hours Rounded': [
        4, 0, 3,
        0, 8,
        4,
        5, 0, 0,
        6, 0,
        6,
        0, 7,
        7
    ]
}

df = pd.DataFrame(data)

In [9]:
display(df)

Unnamed: 0,Staff First Name,Staff Last Name,Pay Period Start Date,Location ID,Pay Code ID,Schedule Duration (hours),Hours Rounded
0,John,Doe,2024-06-01,LOC1,Regular,0,4
1,John,Doe,2024-06-01,LOC2,CALL,3,0
2,John,Doe,2024-06-01,LOC3,Regular,0,3
3,Alice,Smith,2024-05-01,LOC1,CALL,7,0
4,Alice,Smith,2024-05-01,LOC2,Regular,0,8
5,Bob,Brown,2024-05-01,LOC1,Regular,4,4
6,Carol,Taylor,2024-07-01,LOC1,Regular,5,5
7,Carol,Taylor,2024-07-01,LOC2,CALL,6,0
8,Carol,Taylor,2024-07-01,LOC3,Bonus,0,0
9,Dave,Lee,2024-05-01,LOC1,Regular,6,6


In [10]:


# 2. Convert Pay Period Start Date to datetime and extract Month
df['Pay Period Start Date'] = pd.to_datetime(df['Pay Period Start Date'])
df['Month'] = df['Pay Period Start Date'].dt.to_period('M').astype(str)

# 3. Filter rows where Pay Code ID not in excluded list
excluded = ['Bonus', 'call back', 'expenses']
df_filtered = df[~df['Pay Code ID'].str.lower().isin([e.lower() for e in excluded])].copy()

# 4. Calculate 'Effective Hours' per row using condition
df_filtered['Effective Hours'] = np.where(
    df_filtered['Pay Code ID'].str.upper() == 'CALL',
    df_filtered['Schedule Duration (hours)'],
    df_filtered['Hours Rounded']
)

# 5. Group by staff, month, and location for location-wise sum
location_hours = df_filtered.groupby(
    ['Staff First Name', 'Staff Last Name', 'Month', 'Location ID'],
    as_index=False
)['Effective Hours'].sum().rename(columns={'Effective Hours': 'Location_Hours'})

# 6. Group by staff and month for total hours (across all locations)
total_hours = location_hours.groupby(
    ['Staff First Name', 'Staff Last Name', 'Month'],
    as_index=False
)['Location_Hours'].sum().rename(columns={'Location_Hours': 'Total_Hours'})

# 7. Merge and calculate % contribution
result = pd.merge(location_hours, total_hours,
                  on=['Staff First Name', 'Staff Last Name', 'Month'])

result['Location_Percentage'] = round(
    (result['Location_Hours'] / result['Total_Hours']) * 100, 2
)

# 8. Display result
display(result)

# 9. Save to Excel
result.to_excel('staff_location_contribution.xlsx', index=False)


Unnamed: 0,Staff First Name,Staff Last Name,Month,Location ID,Location_Hours,Total_Hours,Location_Percentage
0,Alice,Smith,2024-05,LOC1,7,15,46.67
1,Alice,Smith,2024-05,LOC2,8,15,53.33
2,Bob,Brown,2024-05,LOC1,4,4,100.0
3,Carol,Taylor,2024-07,LOC1,5,11,45.45
4,Carol,Taylor,2024-07,LOC2,6,11,54.55
5,Dave,Lee,2024-05,LOC1,6,11,54.55
6,Dave,Lee,2024-05,LOC2,5,11,45.45
7,Eva,White,2024-07,LOC1,6,6,100.0
8,Frank,Green,2024-06,LOC2,4,11,36.36
9,Frank,Green,2024-06,LOC3,7,11,63.64


In [11]:
# Sample Bonus data for staff
bonus_data = {
    'Staff First Name': ['John', 'Alice', 'Bob', 'Carol', 'Dave', 'Eva', 'Frank', 'Grace'],
    'Staff Last Name': ['Doe', 'Smith', 'Brown', 'Taylor', 'Lee', 'White', 'Green', 'Hall'],
    'Bonus': [150, 200, 100, 250, 180, 120, 160, 140]
}

df_bonus = pd.DataFrame(bonus_data)

# Join the aggregated location hours dataframe with bonus data on Staff First and Last Name
final_df = pd.merge(
    result,
    df_bonus,
    on=['Staff First Name', 'Staff Last Name'],
    how='left'  # Use left join to keep all rows from aggregated hours
)

display(final_df)


Unnamed: 0,Staff First Name,Staff Last Name,Month,Location ID,Location_Hours,Total_Hours,Location_Percentage,Bonus
0,Alice,Smith,2024-05,LOC1,7,15,46.67,200
1,Alice,Smith,2024-05,LOC2,8,15,53.33,200
2,Bob,Brown,2024-05,LOC1,4,4,100.0,100
3,Carol,Taylor,2024-07,LOC1,5,11,45.45,250
4,Carol,Taylor,2024-07,LOC2,6,11,54.55,250
5,Dave,Lee,2024-05,LOC1,6,11,54.55,180
6,Dave,Lee,2024-05,LOC2,5,11,45.45,180
7,Eva,White,2024-07,LOC1,6,6,100.0,120
8,Frank,Green,2024-06,LOC2,4,11,36.36,160
9,Frank,Green,2024-06,LOC3,7,11,63.64,160


In [12]:
# Distribute Bonus amount location-wise based on Location_Percentage
final_df['Bonus_Distributed'] = round(
    (final_df['Bonus'] * final_df['Location_Percentage'] / 100), 2
)

display(final_df[['Staff First Name', 'Staff Last Name', 'Month', 'Location ID', 
                'Location_Hours', 'Total_Hours', 'Location_Percentage', 
                'Bonus', 'Bonus_Distributed']])

Unnamed: 0,Staff First Name,Staff Last Name,Month,Location ID,Location_Hours,Total_Hours,Location_Percentage,Bonus,Bonus_Distributed
0,Alice,Smith,2024-05,LOC1,7,15,46.67,200,93.34
1,Alice,Smith,2024-05,LOC2,8,15,53.33,200,106.66
2,Bob,Brown,2024-05,LOC1,4,4,100.0,100,100.0
3,Carol,Taylor,2024-07,LOC1,5,11,45.45,250,113.62
4,Carol,Taylor,2024-07,LOC2,6,11,54.55,250,136.38
5,Dave,Lee,2024-05,LOC1,6,11,54.55,180,98.19
6,Dave,Lee,2024-05,LOC2,5,11,45.45,180,81.81
7,Eva,White,2024-07,LOC1,6,6,100.0,120,120.0
8,Frank,Green,2024-06,LOC2,4,11,36.36,160,58.18
9,Frank,Green,2024-06,LOC3,7,11,63.64,160,101.82
