Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions src/microplex_us/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,16 @@ def _nonnegative_series(frame: pd.DataFrame, column: str) -> pd.Series:
)


# Share of a dividend total that is qualified when no observed qualified/
# non-qualified breakdown is available (e.g. CPS DIV_VAL, which reports only a
# total). Basis: SOI 2015 PUF E00650/E00600 = $204.0B/$260.9B = 0.782 qualified.
# Splitting an unsplit total by this share avoids zeroing
# qualified_dividend_income on every CPS-native dividend row (which previously
# dumped 100% into non-qualified and inverted the national qualified vs
# non-qualified split relative to the SOI targets).
UNSPLIT_DIVIDEND_QUALIFIED_SHARE = 0.78


def normalize_dividend_columns(frame: pd.DataFrame) -> pd.DataFrame:
"""Normalize dividends onto an atomic basis, then derive totals."""
result = frame.copy()
Expand All @@ -660,7 +670,14 @@ def normalize_dividend_columns(frame: pd.DataFrame) -> pd.DataFrame:
if has_qualified and has_non_qualified:
component_total = qualified + non_qualified
total_only = component_total.eq(0.0) & total.gt(0.0)
non_qualified = non_qualified.where(~total_only, total)
# Allocate an unsplit total by the SOI qualified share rather than
# defaulting the whole amount to non-qualified.
qualified = qualified.where(
~total_only, total * UNSPLIT_DIVIDEND_QUALIFIED_SHARE
)
non_qualified = non_qualified.where(
~total_only, total * (1.0 - UNSPLIT_DIVIDEND_QUALIFIED_SHARE)
)
component_total = qualified + non_qualified
normalized_total = component_total.where(component_total.ne(0.0), total)
elif has_qualified:
Expand All @@ -686,8 +703,8 @@ def normalize_dividend_columns(frame: pd.DataFrame) -> pd.DataFrame:
normalized_total = pd.Series(normalized_total, index=result.index, dtype=float)
else:
normalized_total = total.astype(float)
non_qualified = normalized_total.copy()
qualified = pd.Series(0.0, index=result.index, dtype=float)
qualified = normalized_total * UNSPLIT_DIVIDEND_QUALIFIED_SHARE
non_qualified = normalized_total * (1.0 - UNSPLIT_DIVIDEND_QUALIFIED_SHARE)

result["qualified_dividend_income"] = qualified.astype(float)
result["non_qualified_dividend_income"] = non_qualified.astype(float)
Expand Down
34 changes: 32 additions & 2 deletions tests/test_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import pandas as pd
import pytest
from microplex.core import EntityType

from microplex_us.variables import (
Expand Down Expand Up @@ -57,12 +58,41 @@ def test_normalize_dividend_columns_coalesces_sparse_total_aliases_by_row():

normalized = normalize_dividend_columns(frame)

assert normalized["qualified_dividend_income"].tolist() == [0.0, 5.0, 0.0]
assert normalized["non_qualified_dividend_income"].tolist() == [80.0, 25.0, 0.0]
# Row 0 carries only a dividend total (80) with no observed split, so it is
# allocated by the SOI qualified share instead of defaulting 100% to
# non-qualified. Rows 1-2 keep their observed components unchanged.
assert normalized["qualified_dividend_income"].tolist() == pytest.approx(
[62.4, 5.0, 0.0]
)
assert normalized["non_qualified_dividend_income"].tolist() == pytest.approx(
[17.6, 25.0, 0.0]
)
assert normalized["ordinary_dividend_income"].tolist() == [80.0, 30.0, 0.0]
assert normalized["dividend_income"].tolist() == [80.0, 30.0, 0.0]


def test_normalize_dividend_columns_splits_unsplit_total_by_qualified_share():
# A row with only a dividend total (e.g. CPS DIV_VAL) and no qualified /
# non-qualified components must be split by the SOI qualified share, not
# left entirely non-qualified (which zeroed qualified dividends nationally
# and inverted the split vs the SOI targets).
frame = pd.DataFrame(
{
"qualified_dividend_income": [0.0],
"non_qualified_dividend_income": [0.0],
"dividend_income": [1_000.0],
}
)

normalized = normalize_dividend_columns(frame)

assert normalized["qualified_dividend_income"].tolist() == pytest.approx([780.0])
assert normalized["non_qualified_dividend_income"].tolist() == pytest.approx(
[220.0]
)
assert normalized["dividend_income"].tolist() == pytest.approx([1_000.0])


def test_normalize_social_security_columns_tracks_unclassified_residual():
frame = pd.DataFrame(
{
Expand Down
Loading