PolicyEngine · MaxGhenis · Jun 3, 2026 · Jun 3, 2026
diff --git a/src/microplex_us/variables.py b/src/microplex_us/variables.py
@@ -642,6 +642,16 @@ def _nonnegative_series(frame: pd.DataFrame, column: str) -> pd.Series:
     )
 
 
+# Share of a dividend total that is qualified when no observed qualified/
+# non-qualified breakdown is available (e.g. CPS DIV_VAL, which reports only a
+# total). Basis: SOI 2015 PUF E00650/E00600 = $204.0B/$260.9B = 0.782 qualified.
+# Splitting an unsplit total by this share avoids zeroing
+# qualified_dividend_income on every CPS-native dividend row (which previously
+# dumped 100% into non-qualified and inverted the national qualified vs
+# non-qualified split relative to the SOI targets).
+UNSPLIT_DIVIDEND_QUALIFIED_SHARE = 0.78
+
+
 def normalize_dividend_columns(frame: pd.DataFrame) -> pd.DataFrame:
     """Normalize dividends onto an atomic basis, then derive totals."""
     result = frame.copy()
@@ -660,7 +670,14 @@ def normalize_dividend_columns(frame: pd.DataFrame) -> pd.DataFrame:
     if has_qualified and has_non_qualified:
         component_total = qualified + non_qualified
         total_only = component_total.eq(0.0) & total.gt(0.0)
-        non_qualified = non_qualified.where(~total_only, total)
+        # Allocate an unsplit total by the SOI qualified share rather than
+        # defaulting the whole amount to non-qualified.
+        qualified = qualified.where(
+            ~total_only, total * UNSPLIT_DIVIDEND_QUALIFIED_SHARE
+        )
+        non_qualified = non_qualified.where(
+            ~total_only, total * (1.0 - UNSPLIT_DIVIDEND_QUALIFIED_SHARE)
+        )
         component_total = qualified + non_qualified
         normalized_total = component_total.where(component_total.ne(0.0), total)
     elif has_qualified:
@@ -686,8 +703,8 @@ def normalize_dividend_columns(frame: pd.DataFrame) -> pd.DataFrame:
         normalized_total = pd.Series(normalized_total, index=result.index, dtype=float)
     else:
         normalized_total = total.astype(float)
-        non_qualified = normalized_total.copy()
-        qualified = pd.Series(0.0, index=result.index, dtype=float)
+        qualified = normalized_total * UNSPLIT_DIVIDEND_QUALIFIED_SHARE
+        non_qualified = normalized_total * (1.0 - UNSPLIT_DIVIDEND_QUALIFIED_SHARE)
 
     result["qualified_dividend_income"] = qualified.astype(float)
     result["non_qualified_dividend_income"] = non_qualified.astype(float)

diff --git a/tests/test_variables.py b/tests/test_variables.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import pandas as pd
+import pytest
 from microplex.core import EntityType
 
 from microplex_us.variables import (
@@ -57,12 +58,41 @@ def test_normalize_dividend_columns_coalesces_sparse_total_aliases_by_row():
 
     normalized = normalize_dividend_columns(frame)
 
-    assert normalized["qualified_dividend_income"].tolist() == [0.0, 5.0, 0.0]
-    assert normalized["non_qualified_dividend_income"].tolist() == [80.0, 25.0, 0.0]
+    # Row 0 carries only a dividend total (80) with no observed split, so it is
+    # allocated by the SOI qualified share instead of defaulting 100% to
+    # non-qualified. Rows 1-2 keep their observed components unchanged.
+    assert normalized["qualified_dividend_income"].tolist() == pytest.approx(
+        [62.4, 5.0, 0.0]
+    )
+    assert normalized["non_qualified_dividend_income"].tolist() == pytest.approx(
+        [17.6, 25.0, 0.0]
+    )
     assert normalized["ordinary_dividend_income"].tolist() == [80.0, 30.0, 0.0]
     assert normalized["dividend_income"].tolist() == [80.0, 30.0, 0.0]
 
 
+def test_normalize_dividend_columns_splits_unsplit_total_by_qualified_share():
+    # A row with only a dividend total (e.g. CPS DIV_VAL) and no qualified /
+    # non-qualified components must be split by the SOI qualified share, not
+    # left entirely non-qualified (which zeroed qualified dividends nationally
+    # and inverted the split vs the SOI targets).
+    frame = pd.DataFrame(
+        {
+            "qualified_dividend_income": [0.0],
+            "non_qualified_dividend_income": [0.0],
+            "dividend_income": [1_000.0],
+        }
+    )
+
+    normalized = normalize_dividend_columns(frame)
+
+    assert normalized["qualified_dividend_income"].tolist() == pytest.approx([780.0])
+    assert normalized["non_qualified_dividend_income"].tolist() == pytest.approx(
+        [220.0]
+    )
+    assert normalized["dividend_income"].tolist() == pytest.approx([1_000.0])
+
+
 def test_normalize_social_security_columns_tracks_unclassified_residual():
     frame = pd.DataFrame(
         {