Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions src/microplex_us/pipelines/us.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@
"taxable_interest_income",
"tax_exempt_interest_income",
"capital_gains",
"long_term_capital_gains_before_response",
"long_term_capital_gains",
"short_term_capital_gains",
"non_sch_d_capital_gains",
Expand All @@ -303,6 +304,7 @@
)
PUF_SUPPORT_CLONE_TOP_TAIL_SCALE_VARIABLES: tuple[str, ...] = (
"capital_gains",
"long_term_capital_gains_before_response",
"long_term_capital_gains",
"short_term_capital_gains",
"non_sch_d_capital_gains",
Expand Down Expand Up @@ -5812,7 +5814,8 @@ def add(variable: str) -> bool:
add(variable)

if not add("capital_gains"):
add("long_term_capital_gains")
if not add("long_term_capital_gains_before_response"):
add("long_term_capital_gains")
add("short_term_capital_gains")
add("non_sch_d_capital_gains")

Expand Down Expand Up @@ -5872,10 +5875,19 @@ def _apply_puf_support_clone_top_tail_guard(
return clone, summary

integrated_set = set(integrated_variables)

def is_integrated_or_export_alias(variable: str) -> bool:
if variable in integrated_set:
return True
return (
variable == "long_term_capital_gains_before_response"
and "long_term_capital_gains" in integrated_set
)

scale_variables = [
variable
for variable in self.config.puf_support_clone_top_tail_scale_variables
if variable in clone.columns and variable in integrated_set
if variable in clone.columns and is_integrated_or_export_alias(variable)
]
if not scale_variables:
summary["max_rough_agi_after"] = summary["max_rough_agi_before"]
Expand Down
44 changes: 44 additions & 0 deletions tests/pipelines/test_us.py
Original file line number Diff line number Diff line change
Expand Up @@ -4100,6 +4100,50 @@ def test_puf_support_clone_top_tail_guard_avoids_redundant_income_totals(self):
pd.testing.assert_frame_equal(guarded, clone)
assert summary["affected_rows"] == 0

def test_puf_support_clone_top_tail_guard_scales_exported_ltcg_alias(self):
pipeline = USMicroplexPipeline(
USMicroplexBuildConfig(
synthesis_backend="seed",
puf_support_clone_enabled=True,
puf_support_clone_top_tail_rough_agi_cap=78_999_999.0,
)
)
clone = pd.DataFrame(
{
"employment_income": [100_000.0],
"long_term_capital_gains": [70_000_000.0],
"long_term_capital_gains_before_response": [95_000_000.0],
}
)

guarded, summary = pipeline._apply_puf_support_clone_top_tail_guard(
clone,
integrated_variables=["long_term_capital_gains"],
)
rough_agi, rough_agi_variables = pipeline._puf_support_clone_top_tail_rough_agi(
guarded
)

assert rough_agi.iloc[0] == pytest.approx(78_999_999.0)
assert rough_agi_variables == [
"employment_income",
"long_term_capital_gains_before_response",
]
assert guarded["employment_income"].iloc[0] == pytest.approx(100_000.0)
assert (
guarded["long_term_capital_gains_before_response"].iloc[0]
< clone["long_term_capital_gains_before_response"].iloc[0]
)
assert (
guarded["long_term_capital_gains"].iloc[0]
< clone["long_term_capital_gains"].iloc[0]
)
assert summary["affected_rows"] == 1
assert summary["scale_basis_variables"] == [
"long_term_capital_gains_before_response"
]
assert "long_term_capital_gains_before_response" in summary["scaled_variables"]

def test_puf_support_clone_top_tail_guard_can_be_disabled(self):
pipeline = USMicroplexPipeline(
USMicroplexBuildConfig(
Expand Down
Loading