In [36]:
import pandas as pd

# Compensation data
df_comp = pd.DataFrame([
    {
        'Staff First Name': 'John', 'Staff Last Name': 'Doe', 'Location ID': 'LOC1',
        'Schedule Task Name': 'Task A', 'Schedule Start Date': '2024-06-03', 'Schedule Start Time': '08:00',
        'Schedule End Date': '2024-06-03', 'Schedule End Time': '12:00', 'Schedule Duration (hours)': 4.0
    },
    {
        'Staff First Name': 'Alice', 'Staff Last Name': 'Smith', 'Location ID': 'LOC2',
        'Schedule Task Name': 'Task B', 'Schedule Start Date': '2024-06-03', 'Schedule Start Time': '09:00',
        'Schedule End Date': '2024-06-03', 'Schedule End Time': '13:00', 'Schedule Duration (hours)': 4.0
    },
    {
        'Staff First Name': 'Alice', 'Staff Last Name': 'Smith', 'Location ID': 'LOC1',
        'Schedule Task Name': 'Task C', 'Schedule Start Date': '2024-06-04', 'Schedule Start Time': '10:00',
        'Schedule End Date': '2024-06-04', 'Schedule End Time': '14:00', 'Schedule Duration (hours)': 4.0
    },
    {
        'Staff First Name': 'Bob', 'Staff Last Name': 'Lee', 'Location ID': 'LOC3',
        'Schedule Task Name': 'Task D', 'Schedule Start Date': '2024-06-05', 'Schedule Start Time': '07:00',
        'Schedule End Date': '2024-06-05', 'Schedule End Time': '11:00', 'Schedule Duration (hours)': 4.0
    }
])

# Date conversions
df_comp['Schedule Start Date'] = pd.to_datetime(df_comp['Schedule Start Date'])
df_comp['Schedule End Date'] = pd.to_datetime(df_comp['Schedule End Date'])
display(df_comp)


Unnamed: 0,Staff First Name,Staff Last Name,Location ID,Schedule Task Name,Schedule Start Date,Schedule Start Time,Schedule End Date,Schedule End Time,Schedule Duration (hours)
0,John,Doe,LOC1,Task A,2024-06-03,08:00,2024-06-03,12:00,4.0
1,Alice,Smith,LOC2,Task B,2024-06-03,09:00,2024-06-03,13:00,4.0
2,Alice,Smith,LOC1,Task C,2024-06-04,10:00,2024-06-04,14:00,4.0
3,Bob,Lee,LOC3,Task D,2024-06-05,07:00,2024-06-05,11:00,4.0


In [37]:
# MCR data
df_mcr = pd.DataFrame([
    {
        'DOS': '2024-06-03', 'FACILITY': 'LOC1', 'PROVIDER': 'John Doe', 'PROVIDER 2': 'Alice Smith',
        'PROVIDER TYPE': 'Surgeon', 'PATIENT': 'P001', 'START TIME': '08:00', 'END TIME': '12:00',
        'CALCULATED TIME': 4.0, 'FILE NAME': 'f1.pdf', '# OF PAGES': 3, 'NOTES': ''
    },
    {
        'DOS': '2024-06-03', 'FACILITY': 'LOC2', 'PROVIDER': 'Alice Smith', 'PROVIDER 2': '',
        'PROVIDER TYPE': 'Surgeon', 'PATIENT': 'P002', 'START TIME': '09:00', 'END TIME': '13:00',
        'CALCULATED TIME': 4.0, 'FILE NAME': 'f2.pdf', '# OF PAGES': 2, 'NOTES': ''
    },
    {
        'DOS': '2024-06-05', 'FACILITY': 'LOC3', 'PROVIDER': 'Bob Lee', 'PROVIDER 2': '',
        'PROVIDER TYPE': 'Surgeon', 'PATIENT': 'P003', 'START TIME': '07:00', 'END TIME': '11:00',
        'CALCULATED TIME': 4.0, 'FILE NAME': 'f3.pdf', '# OF PAGES': 4, 'NOTES': ''
    }
])

df_mcr['DOS'] = pd.to_datetime(df_mcr['DOS'])
display(df_mcr)


Unnamed: 0,DOS,FACILITY,PROVIDER,PROVIDER 2,PROVIDER TYPE,PATIENT,START TIME,END TIME,CALCULATED TIME,FILE NAME,# OF PAGES,NOTES
0,2024-06-03,LOC1,John Doe,Alice Smith,Surgeon,P001,08:00,12:00,4.0,f1.pdf,3,
1,2024-06-03,LOC2,Alice Smith,,Surgeon,P002,09:00,13:00,4.0,f2.pdf,2,
2,2024-06-05,LOC3,Bob Lee,,Surgeon,P003,07:00,11:00,4.0,f3.pdf,4,


In [38]:
# Step 1: Unpivot MCR for PROVIDER and PROVIDER 2
df_mcr_long = pd.melt(
    df_mcr,
    id_vars=['DOS', 'FACILITY', 'START TIME', 'END TIME', 'CALCULATED TIME'],
    value_vars=['PROVIDER', 'PROVIDER 2'],
    var_name='Provider Role',
    value_name='PROVIDER_NAME'
)

# Remove blank providers
df_mcr_long = df_mcr_long[df_mcr_long['PROVIDER_NAME'].str.strip() != ''].copy()

# Step 2: Split name into First and Last
df_mcr_long[['Staff First Name', 'Staff Last Name']] = df_mcr_long['PROVIDER_NAME'].str.split(' ', n=1, expand=True)

# Add Month columns for merge
df_mcr_long['Month'] = df_mcr_long['DOS'].dt.to_period('M').astype(str)
df_mcr_long['Location ID'] = df_mcr_long['FACILITY']

df_comp['Month'] = df_comp['Schedule Start Date'].dt.to_period('M').astype(str)

# Step 3: Perform the join
df_result = pd.merge(
    df_mcr_long,
    df_comp,
    on=['Staff First Name', 'Staff Last Name', 'Location ID', 'Month'],
    how='left'
)

# Step 4: Final cleanup and rename
df_result_final = df_result[[
    'Staff First Name', 'Staff Last Name', 'Location ID', 'Month',
    'Schedule Task Name', 'Schedule Start Date', 'Schedule Start Time',
    'Schedule End Date', 'Schedule End Time', 'Schedule Duration (hours)',
    'DOS', 'CALCULATED TIME'
]].rename(columns={
    'CALCULATED TIME': 'Actual Worked Hours'
}).sort_values(by=['Staff First Name', 'Month', 'Location ID', 'DOS'])

df_result_final.reset_index(drop=True, inplace=True)

# Final output
df_result_final


Unnamed: 0,Staff First Name,Staff Last Name,Location ID,Month,Schedule Task Name,Schedule Start Date,Schedule Start Time,Schedule End Date,Schedule End Time,Schedule Duration (hours),DOS,Actual Worked Hours
0,Alice,Smith,LOC1,2024-06,Task C,2024-06-04,10:00,2024-06-04,14:00,4.0,2024-06-03,4.0
1,Alice,Smith,LOC2,2024-06,Task B,2024-06-03,09:00,2024-06-03,13:00,4.0,2024-06-03,4.0
2,Bob,Lee,LOC3,2024-06,Task D,2024-06-05,07:00,2024-06-05,11:00,4.0,2024-06-05,4.0
3,John,Doe,LOC1,2024-06,Task A,2024-06-03,08:00,2024-06-03,12:00,4.0,2024-06-03,4.0
