Skip to content

Commit

Permalink
Merge pull request #64 from MITLibraries/HRQB-34-base-salary-change-p…
Browse files Browse the repository at this point in the history
…ercent

HRQB 34 - Add base salary change percent
  • Loading branch information
ghukill committed Jun 17, 2024
2 parents f0273e8 + a71230f commit cd654e9
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 16 deletions.
71 changes: 56 additions & 15 deletions hrqb/tasks/employee_salary_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,9 @@ def requires(self) -> list[luigi.Task]: # pragma: nocover

def get_dataframe(self) -> pd.DataFrame:
dw_salary_df = self.named_inputs["ExtractDWEmployeeSalaryHistory"].read()
qb_emp_appts_df = self.named_inputs["ExtractQBEmployeeAppointments"].read()

# merge with employee appointment data for QB appointment record identifier
qb_emp_appts_df = qb_emp_appts_df[
[
"Record ID#",
"HR Appointment Key",
"Begin Date",
"End Date",
]
].rename(
columns={
"Record ID#": "related_employee_appointment_id",
"Begin Date": "appointment_begin_date",
"End Date": "appointment_end_date",
}
)
qb_emp_appts_df = self._get_employee_appointments()
salary_df = dw_salary_df.merge(
qb_emp_appts_df,
how="left",
Expand All @@ -63,6 +49,9 @@ def get_dataframe(self) -> pd.DataFrame:
salary_df["original_effort"] = salary_df["original_effort"] / 100.0
salary_df["temp_effort"] = salary_df["temp_effort"] / 100.0

# set base salary change percentage from previous record, for same position
salary_df = self._set_base_salary_change_percent(salary_df)

# mint a unique, deterministic value for the merge "Key" field
salary_df["key"] = salary_df.apply(
lambda row: md5_hash_from_values(
Expand Down Expand Up @@ -90,13 +79,65 @@ def get_dataframe(self) -> pd.DataFrame:
"original_base_amount": "Base Salary",
"original_hourly_rate": "Hourly",
"original_effort": "Effort %",
"base_change_percent": "Salary Change %",
"temp_change_base_amount": "Temp Base Salary",
"temp_change_hourly_rate": "Temp Hourly",
"temp_effort": "Temp Effort %",
"key": "Key",
}
return salary_df[fields.keys()].rename(columns=fields)

def _get_employee_appointments(self) -> pd.DataFrame:
qb_emp_appts_df = self.named_inputs["ExtractQBEmployeeAppointments"].read()
return qb_emp_appts_df[
[
"Record ID#",
"HR Appointment Key",
"Begin Date",
"End Date",
]
].rename(
columns={
"Record ID#": "related_employee_appointment_id",
"Begin Date": "appointment_begin_date",
"End Date": "appointment_end_date",
}
)

def _set_base_salary_change_percent(self, salary_df: pd.DataFrame) -> pd.DataFrame:
"""Create column with percentage change between sequential salaries.
This method:
1. sorts by appointment MIT ID and appointment dates
2. groups the salary dataframe by MIT ID and unique appointment identifier
3. select the base salary from the PREVIOUS salary
4. calculates percentage change
"""
new_salary_df = salary_df.copy()
new_salary_df["previous_base_amount"] = (
new_salary_df.sort_values(
[
"mit_id",
"appointment_begin_date",
"appointment_end_date",
]
)
.groupby(["mit_id", "hr_appt_key"])["original_base_amount"]
.shift(1)
)
new_salary_df["base_change_percent"] = round(
(
new_salary_df["original_base_amount"]
/ new_salary_df["previous_base_amount"]
- 1.0
),
3,
)
new_salary_df["base_change_percent"] = new_salary_df["base_change_percent"].where(
new_salary_df["previous_base_amount"].notna(), 0.0
)
return new_salary_df


class LoadEmployeeSalaryHistory(QuickbaseUpsertTask):
table_name = luigi.Parameter("Employee Salary History")
Expand Down
54 changes: 53 additions & 1 deletion tests/tasks/test_employee_salary_history.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# ruff: noqa: PLR2004, PD901
# ruff: noqa: PLR2004, PD901, SLF001

import numpy as np
import pandas as pd

from hrqb.utils import md5_hash_from_values

Expand Down Expand Up @@ -60,3 +63,52 @@ def test_task_transform_employee_salary_history_key_expected_from_input_data(
emp_salary_row["End Date"],
]
)


def test_task_transform_employee_salary_history_set_base_change_percent(
task_transform_employee_salary_history_complete,
):
df = pd.DataFrame(
[
("123456789", "123", "2020-01-01", "2021-06-30", 10_000),
("123456789", "123", "2020-07-01", "2021-12-31", 10_300),
("123456789", "123", "2021-01-01", "2021-06-30", 15_000),
("123456789", "456", "2021-07-01", "2022-06-30", 20_000),
("123456789", "456", "2022-07-01", "2022-12-31", 22_500),
("123456789", "456", "2023-01-01", "2999-12-31", 24_750),
],
columns=[
"mit_id",
"hr_appt_key",
"appointment_begin_date",
"appointment_end_date",
"original_base_amount",
],
)
new_df = (
task_transform_employee_salary_history_complete._set_base_salary_change_percent(
df
)
)
np.testing.assert_array_equal(
new_df.previous_base_amount.values,
[
np.nan, # first position, so no previous salary
10_000.0,
10_300.0,
np.nan, # new position, so previous salary None
20_000.0,
22_500.0,
],
)
np.testing.assert_array_equal(
new_df.base_change_percent.values,
[
0.0, # first position, so no change
0.03,
0.456,
0.0, # new position, so no change
0.125,
0.1,
],
)

0 comments on commit cd654e9

Please sign in to comment.