diff --git a/src/microplex_us/pipelines/us.py b/src/microplex_us/pipelines/us.py index 99a09e2..fcf9084 100644 --- a/src/microplex_us/pipelines/us.py +++ b/src/microplex_us/pipelines/us.py @@ -502,7 +502,9 @@ def _select_ssi_takeup_by_age_amount( person_ids = person_ids.reindex(index) age_values = pd.to_numeric(ages.reindex(index), errors="coerce").fillna(0.0) weight_values = ( - pd.to_numeric(weights.reindex(index), errors="coerce").fillna(0.0).clip(lower=0.0) + pd.to_numeric(weights.reindex(index), errors="coerce") + .fillna(0.0) + .clip(lower=0.0) ) reported_values = ( pd.to_numeric(reported_ssi.reindex(index), errors="coerce") @@ -555,7 +557,9 @@ def _select_until_amount(candidate_mask: np.ndarray, amount: float) -> None: group_summary[group_name] = { "reported_amount": target_amount, "reported_recipients": float( - weight_values.to_numpy(dtype=float)[group_mask & reported_positive].sum() + weight_values.to_numpy(dtype=float)[ + group_mask & reported_positive + ].sum() ), "formula_all_takeup_amount": float(full_amount[group_mask].sum()), "formula_all_takeup_recipients": float( @@ -3980,9 +3984,7 @@ def _calibrate_policyengine_ssi_takeup_from_reported_amounts( "missing_columns": missing_columns, } reported_ssi = ( - pd.to_numeric(persons["ssi"], errors="coerce") - .fillna(0.0) - .clip(lower=0.0) + pd.to_numeric(persons["ssi"], errors="coerce").fillna(0.0).clip(lower=0.0) ) if not reported_ssi.gt(0.0).any(): persons["takes_up_ssi_if_eligible"] = False @@ -4126,6 +4128,7 @@ def build_policyengine_entity_tables( ).transform("sum") persons = self._augment_policyengine_person_inputs(persons) persons["relationship_to_head"] = self._normalize_relationship_to_head(persons) + persons = self._assign_policyengine_household_head_flag(persons) households = self._build_policyengine_households(persons) tax_units, persons = self._build_policyengine_tax_units(persons) @@ -6270,7 +6273,10 @@ def _build_policyengine_tax_units_from_role_flags( "is_tax_unit_spouse", "is_tax_unit_dependent", } - if not role_columns.issubset(persons.columns) or "person_id" not in persons.columns: + if ( + not role_columns.issubset(persons.columns) + or "person_id" not in persons.columns + ): return None person_rows = persons.copy() @@ -6348,9 +6354,7 @@ def _build_policyengine_tax_units_from_role_flags( "tax_unit_id": global_tax_unit_id, "household_id": int(household_id), "filing_status": filing_status, - "member_ids": [ - int(person_id) for person_id in unit_person_ids - ], + "member_ids": [int(person_id) for person_id in unit_person_ids], "filer_ids": [head_id, *spouse_ids], "dependent_ids": dependent_ids, "n_dependents": len(dependent_ids), @@ -6466,12 +6470,9 @@ def _build_policyengine_tax_units_from_existing_ids( .nunique() ) if bool((households_per_tax_unit > 1).any()): - normalized_tax_unit_id = ( - pd.factorize(pd.MultiIndex.from_frame(tax_unit_key), sort=False)[ - 0 - ].astype(np.int64) - + int(start_tax_unit_id) - ) + normalized_tax_unit_id = pd.factorize( + pd.MultiIndex.from_frame(tax_unit_key), sort=False + )[0].astype(np.int64) + int(start_tax_unit_id) person_rows["tax_unit_id"] = normalized_tax_unit_id else: raw_tax_unit_id = raw_tax_unit_id.astype(np.int64) @@ -6562,9 +6563,7 @@ def _resolve_tax_unit_role_flags( & (~head_flag | dependent_hint | ~head_hint) ) resolved_spouse = ( - spouse_flag - & ~resolved_dependent - & (~head_flag | spouse_hint | ~head_hint) + spouse_flag & ~resolved_dependent & (~head_flag | spouse_hint | ~head_hint) ) resolved_head = head_flag & ~resolved_spouse & ~resolved_dependent return resolved_head, resolved_spouse, resolved_dependent @@ -6687,9 +6686,7 @@ def _cohere_tax_unit_role_flags_for_household( }, index=spouse_pool, ) - .sort_values( - ["source_spouse", "relationship", "age", "person_id"] - ) + .sort_values(["source_spouse", "relationship", "age", "person_id"]) .index[0] ) if spouse_index is not None: @@ -6700,26 +6697,21 @@ def _cohere_tax_unit_role_flags_for_household( available & ( dependent_flag - | ( - dependent_hint - & (age.lt(24) | income.le(0.0)) - ) + | (dependent_hint & (age.lt(24) | income.le(0.0))) | (spouse_hint & income.le(0.0)) ) ] = True available = ~(coherent_head | coherent_spouse | coherent_dependent) - coherent_head.loc[ - available - & age.ge(18) - & (head_flag | income.gt(0.0)) - ] = True + coherent_head.loc[available & age.ge(18) & (head_flag | income.gt(0.0))] = True coherent_dependent.loc[ ~(coherent_head | coherent_spouse | coherent_dependent) & (age.lt(18) | dependent_hint | income.le(0.0)) ] = True - coherent_head.loc[~(coherent_head | coherent_spouse | coherent_dependent)] = True + coherent_head.loc[~(coherent_head | coherent_spouse | coherent_dependent)] = ( + True + ) result["_is_tax_unit_head_flag"] = coherent_head result["_is_tax_unit_spouse_flag"] = coherent_spouse @@ -6803,12 +6795,10 @@ def _assign_role_flag_spouses( head_by_person_number = { int(person_number.loc[index]): int(row["person_id"]) for index, row in household_persons.iterrows() - if int(row["person_id"]) in head_set - and int(person_number.loc[index]) > 0 + if int(row["person_id"]) in head_set and int(person_number.loc[index]) > 0 } row_by_person_id = { - int(row["person_id"]): index - for index, row in household_persons.iterrows() + int(row["person_id"]): index for index, row in household_persons.iterrows() } assigned_spouses: set[int] = set() @@ -6969,9 +6959,7 @@ def _infer_policyengine_aca_takeup_for_tax_unit( marketplace = pd.Series(False, index=unit_persons.index, dtype=bool) for column in observed: marketplace |= ( - pd.to_numeric(unit_persons[column], errors="coerce") - .fillna(0.0) - .ne(0.0) + pd.to_numeric(unit_persons[column], errors="coerce").fillna(0.0).ne(0.0) ) return bool(marketplace.any()) @@ -7339,6 +7327,19 @@ def _coerce_policyengine_status_code(self, value: Any) -> int | None: def _assign_family_and_spm_units(self, persons: pd.DataFrame) -> pd.DataFrame: result = persons.copy() + preserved_family_ids = self._normalized_complete_existing_group_ids( + result, + "family_id", + ) + preserved_spm_unit_ids = self._normalized_complete_existing_group_ids( + result, + "spm_unit_id", + ) + if preserved_family_ids is not None and preserved_spm_unit_ids is not None: + result["family_id"] = preserved_family_ids + result["spm_unit_id"] = preserved_spm_unit_ids + return result + family_ids: dict[int, int] = {} spm_unit_ids: dict[int, int] = {} next_family_id = 0 @@ -7363,8 +7364,16 @@ def _assign_family_and_spm_units(self, persons: pd.DataFrame) -> pd.DataFrame: family_ids[int(row.name)] = next_family_id next_family_id += 1 - result["family_id"] = result.index.map(family_ids).astype(np.int64) - result["spm_unit_id"] = result.index.map(spm_unit_ids).astype(np.int64) + result["family_id"] = ( + preserved_family_ids + if preserved_family_ids is not None + else result.index.map(family_ids).astype(np.int64) + ) + result["spm_unit_id"] = ( + preserved_spm_unit_ids + if preserved_spm_unit_ids is not None + else result.index.map(spm_unit_ids).astype(np.int64) + ) return result def _primary_family_member_mask( @@ -7373,9 +7382,7 @@ def _primary_family_member_mask( ) -> pd.Series: """Identify people who belong to the household's primary family.""" - relationship_primary = household_persons["relationship_to_head"].isin( - {0, 1, 2} - ) + relationship_primary = household_persons["relationship_to_head"].isin({0, 1, 2}) if "family_relationship" not in household_persons.columns: return relationship_primary @@ -7393,6 +7400,14 @@ def _assign_marital_units( persons: pd.DataFrame, ) -> pd.DataFrame: result = persons.copy() + preserved_marital_unit_ids = self._normalized_complete_existing_group_ids( + result, + "marital_unit_id", + ) + if preserved_marital_unit_ids is not None: + result["marital_unit_id"] = preserved_marital_unit_ids + return result + marital_unit_by_person: dict[int, int] = {} next_marital_unit_id = 0 @@ -7421,6 +7436,60 @@ def _assign_marital_units( ) return result + def _assign_policyengine_household_head_flag( + self, + persons: pd.DataFrame, + ) -> pd.DataFrame: + result = persons.copy() + derived = ( + pd.to_numeric(result["relationship_to_head"], errors="coerce") + .fillna(-1) + .eq(0) + ) + if "is_household_head" not in result.columns: + result["is_household_head"] = derived + return result + + existing = pd.to_numeric(result["is_household_head"], errors="coerce") + result["is_household_head"] = existing.where(existing.notna(), derived).gt(0.5) + return result + + def _normalized_complete_existing_group_ids( + self, + persons: pd.DataFrame, + id_column: str, + ) -> pd.Series | None: + if id_column not in persons.columns: + return None + raw_ids = persons[id_column] + if raw_ids.isna().any(): + return None + + raw_key = raw_ids.astype("string") + key = pd.DataFrame( + { + "household_id": persons["household_id"], + id_column: raw_key, + }, + index=persons.index, + ) + raw_numeric = pd.to_numeric(raw_ids, errors="coerce") + households_per_raw_id = key.groupby(id_column, dropna=False)[ + "household_id" + ].nunique() + must_factorize = raw_numeric.isna().any() or bool( + households_per_raw_id.gt(1).any() + ) + if must_factorize: + return pd.Series( + pd.factorize(pd.MultiIndex.from_frame(key), sort=False)[0].astype( + np.int64 + ), + index=persons.index, + name=id_column, + ) + return raw_numeric.astype(np.int64).rename(id_column) + def _collapse_group_table( self, persons: pd.DataFrame, @@ -7742,9 +7811,7 @@ def has_any(*columns: str) -> bool: ) if "is_blind" in result.columns: result["is_blind"] = ( - pd.to_numeric(result["is_blind"], errors="coerce") - .fillna(0.0) - .ne(0.0) + pd.to_numeric(result["is_blind"], errors="coerce").fillna(0.0).ne(0.0) ) elif "difficulty_seeing" in result.columns: result["is_blind"] = first_present("difficulty_seeing").gt(0.0) diff --git a/src/microplex_us/policyengine/us.py b/src/microplex_us/policyengine/us.py index a5ab27d..bc83499 100644 --- a/src/microplex_us/policyengine/us.py +++ b/src/microplex_us/policyengine/us.py @@ -149,7 +149,9 @@ def table_for(self, entity: EntityType) -> pd.DataFrame: "marital_units", ) -_ALLOWED_CHECKPOINT_STAGES: frozenset[str] = frozenset({"post_imputation", "post_microsim"}) +_ALLOWED_CHECKPOINT_STAGES: frozenset[str] = frozenset( + {"post_imputation", "post_microsim"} +) def save_us_pipeline_checkpoint( @@ -217,9 +219,7 @@ def load_us_pipeline_checkpoint( checkpoint_dir = Path(path) metadata_path = checkpoint_dir / "metadata.json" if not metadata_path.exists(): - raise FileNotFoundError( - f"US pipeline checkpoint not found at {checkpoint_dir}" - ) + raise FileNotFoundError(f"US pipeline checkpoint not found at {checkpoint_dir}") metadata = json.loads(metadata_path.read_text()) saved_stage = metadata.get("stage") @@ -234,9 +234,7 @@ def load_us_pipeline_checkpoint( if metadata.get(table_name) is None: tables[table_name] = None continue - tables[table_name] = pd.read_parquet( - checkpoint_dir / f"{table_name}.parquet" - ) + tables[table_name] = pd.read_parquet(checkpoint_dir / f"{table_name}.parquet") return PolicyEngineUSEntityTableBundle(**tables), metadata @@ -557,15 +555,13 @@ class PolicyEngineUSVariableMaterializationResult: } ) -POLICYENGINE_US_DATA_OVERRIDABLE_COMPUTED_EXPORT_VARIABLES: frozenset[str] = ( - frozenset( - { - # policyengine-us-data intentionally persists stronger source-data - # inputs for these fallback formulas. - "fsla_overtime_premium", - "meets_ssi_disability_criteria", - } - ) +POLICYENGINE_US_DATA_OVERRIDABLE_COMPUTED_EXPORT_VARIABLES: frozenset[str] = frozenset( + { + # policyengine-us-data intentionally persists stronger source-data + # inputs for these fallback formulas. + "fsla_overtime_premium", + "meets_ssi_disability_criteria", + } ) POLICYENGINE_US_ALLOWED_COMPUTED_EXPORT_VARIABLES: frozenset[str] = ( @@ -612,7 +608,9 @@ def load_strata( ) -> dict[int, PolicyEngineUSStratum]: """Load strata with constraints and optional ancestor chain.""" if not self.db_path.exists(): - raise FileNotFoundError(f"PolicyEngine targets DB not found: {self.db_path}") + raise FileNotFoundError( + f"PolicyEngine targets DB not found: {self.db_path}" + ) available_columns = self._table_columns("strata") has_definition_hash = "definition_hash" in available_columns @@ -677,7 +675,9 @@ def load_targets( ) -> list[PolicyEngineUSDBTarget]: """Load target rows with attached stratum constraints.""" if not self.db_path.exists(): - raise FileNotFoundError(f"PolicyEngine targets DB not found: {self.db_path}") + raise FileNotFoundError( + f"PolicyEngine targets DB not found: {self.db_path}" + ) if self._has_target_overview_view() and best_period: return self._load_targets_via_target_overview( @@ -700,9 +700,7 @@ def load_targets( or domain_variable_is_null is not None or target_cells ): - raise ValueError( - "domain/geography filters require a target_overview view" - ) + raise ValueError("domain/geography filters require a target_overview view") strata_columns = self._table_columns("strata") definition_hash_select = ( @@ -1032,7 +1030,9 @@ def _group_target_rows( "value": float(row["target_value"]), "active": bool(row["active"]), "tolerance": ( - float(row["tolerance"]) if row["tolerance"] is not None else None + float(row["tolerance"]) + if row["tolerance"] is not None + else None ), "source": row["source"], "notes": row["notes"], @@ -1299,7 +1299,9 @@ def _validate_geographic_consistency( and "congressional_district_geoid" in child_equalities ): parent_state = int(parent_equalities["state_fips"]) - district_state = int(child_equalities["congressional_district_geoid"]) // 100 + district_state = ( + int(child_equalities["congressional_district_geoid"]) // 100 + ) if district_state != parent_state: return ( f"Stratum {child.stratum_id} has congressional_district_geoid=" @@ -1632,7 +1634,9 @@ def materialize_policyengine_us_variables( for start in range(0, n_households, batch_size): end = min(start + batch_size, n_households) chunk_ids = household_ids[start:end] - chunk_tables = subset_policyengine_tables_by_households(tables, chunk_ids) + chunk_tables = subset_policyengine_tables_by_households( + tables, chunk_ids + ) chunk_result, chunk_binding = materialize_policyengine_us_variables( chunk_tables, variables=variables, @@ -1732,16 +1736,18 @@ def materialize_policyengine_us_variables_safely( ) try: - materialized_tables, materialized_bindings = materialize_policyengine_us_variables( - tables, - variables=requested_variables, - period=period, - dataset_year=dataset_year, - simulation_cls=simulation_cls, - microsimulation_kwargs=microsimulation_kwargs, - temp_dir=temp_dir, - direct_override_variables=direct_override_variables, - batch_size=batch_size, + materialized_tables, materialized_bindings = ( + materialize_policyengine_us_variables( + tables, + variables=requested_variables, + period=period, + dataset_year=dataset_year, + simulation_cls=simulation_cls, + microsimulation_kwargs=microsimulation_kwargs, + temp_dir=temp_dir, + direct_override_variables=direct_override_variables, + batch_size=batch_size, + ) ) except Exception: return _materialize_policyengine_us_variables_one_by_one( @@ -1780,15 +1786,17 @@ def _materialize_policyengine_us_variables_one_by_one( for variable in requested_variables: try: - materialized_tables, materialized_bindings = materialize_policyengine_us_variables( - working_tables, - variables=(variable,), - period=period, - dataset_year=dataset_year, - simulation_cls=simulation_cls, - microsimulation_kwargs=microsimulation_kwargs, - temp_dir=temp_dir, - direct_override_variables=direct_override_variables, + materialized_tables, materialized_bindings = ( + materialize_policyengine_us_variables( + working_tables, + variables=(variable,), + period=period, + dataset_year=dataset_year, + simulation_cls=simulation_cls, + microsimulation_kwargs=microsimulation_kwargs, + temp_dir=temp_dir, + direct_override_variables=direct_override_variables, + ) ) except Exception as exc: failed_variables[variable] = f"{type(exc).__name__}: {exc}" @@ -1836,7 +1844,9 @@ def _merge_materialized_policyengine_bindings( continue source_table = source_tables.table_for(binding.entity) destination_table = merged_tables.table_for(binding.entity) - destination_table[binding.column] = source_table[binding.column].to_numpy(copy=True) + destination_table[binding.column] = source_table[binding.column].to_numpy( + copy=True + ) return merged_tables @@ -1849,9 +1859,7 @@ def load_policyengine_us_entity_tables( """Load a PE-US time-period dataset into a multientity table bundle.""" period_key = str(period) requested_variables = ( - None - if variables is None - else {str(variable) for variable in variables} + None if variables is None else {str(variable) for variable in variables} ) try: tax_benefit_system = _resolve_policyengine_us_tax_benefit_system( @@ -2121,11 +2129,7 @@ def compile_supported_policyengine_us_household_linear_constraints( def _policyengine_us_target_required_variables(targets: list[TargetSpec]) -> set[str]: - return { - feature - for target in targets - for feature in target.required_features - } + return {feature for target in targets for feature in target.required_features} def policyengine_us_formula_variables_for_targets( @@ -2140,9 +2144,7 @@ def policyengine_us_formula_variables_for_targets( if not required_variables: return set() if tax_benefit_system is None: - tax_benefit_system = _resolve_policyengine_us_tax_benefit_system( - simulation_cls - ) + tax_benefit_system = _resolve_policyengine_us_tax_benefit_system(simulation_cls) variables = getattr(tax_benefit_system, "variables", {}) direct_overrides = set(direct_override_variables) formula_variables: set[str] = set() @@ -2160,7 +2162,9 @@ def policyengine_us_formula_variables_for_targets( def _policyengine_us_variable_is_calculated(variable_metadata: Any) -> bool: if getattr(variable_metadata, "formulas", {}): return True - if getattr(variable_metadata, "adds", ()) or getattr(variable_metadata, "subtracts", ()): + if getattr(variable_metadata, "adds", ()) or getattr( + variable_metadata, "subtracts", () + ): return True is_input_variable = getattr(variable_metadata, "is_input_variable", None) if callable(is_input_variable): @@ -2278,9 +2282,7 @@ def _infer_policyengine_array_entity( except (KeyError, ValueError): pass matching_entities = [ - entity - for entity, length in entity_lengths.items() - if len(values) == length + entity for entity, length in entity_lengths.items() if len(values) == length ] if len(matching_entities) == 1: return matching_entities[0] @@ -2385,11 +2387,13 @@ def _compile_household_coefficients( target_measure = _target_measure(target) if target_binding.column is None or target_measure is None: - raise ValueError(f"Target '{_policyengine_target_name(target)}' has no source column") + raise ValueError( + f"Target '{_policyengine_target_name(target)}' has no source column" + ) - target_values = pd.to_numeric(target_table[target_binding.column], errors="coerce").fillna( - 0.0 - ) + target_values = pd.to_numeric( + target_table[target_binding.column], errors="coerce" + ).fillna(0.0) row_mask = pd.Series(True, index=target_table.index, dtype=bool) household_constraints: list[PolicyEngineUSConstraint | TargetFilter] = [] for constraint in _target_constraints(target): @@ -2398,12 +2402,12 @@ def _compile_household_coefficients( bindings, tables, ) - if ( - constraint_binding.entity in {target_binding.entity, EntityType.HOUSEHOLD} - or _can_align_constraint_to_target_rows( - target_rows=target_table, - constraint_binding=constraint_binding, - ) + if constraint_binding.entity in { + target_binding.entity, + EntityType.HOUSEHOLD, + } or _can_align_constraint_to_target_rows( + target_rows=target_table, + constraint_binding=constraint_binding, ): row_mask &= _evaluate_constraint_mask( target_rows=target_table, @@ -2462,10 +2466,10 @@ def _resolve_binding( if variable in bindings: return bindings[variable] if variable in tables.households.columns: - return PolicyEngineUSVariableBinding(entity=EntityType.HOUSEHOLD, column=variable) - raise KeyError( - f"No PolicyEngine binding configured for variable '{variable}'" - ) + return PolicyEngineUSVariableBinding( + entity=EntityType.HOUSEHOLD, column=variable + ) + raise KeyError(f"No PolicyEngine binding configured for variable '{variable}'") def _resolve_target_binding( @@ -2541,7 +2545,9 @@ def _evaluate_constraint_mask( bindings: dict[str, PolicyEngineUSVariableBinding], household_id_column: str, ) -> pd.Series: - constraint_binding = _resolve_binding(_constraint_feature(constraint), bindings, tables) + constraint_binding = _resolve_binding( + _constraint_feature(constraint), bindings, tables + ) constraint_column = _require_binding_column( constraint_binding, feature=_constraint_feature(constraint), @@ -2551,7 +2557,9 @@ def _evaluate_constraint_mask( return _apply_constraint_filter(target_rows[constraint_column], constraint) if constraint_binding.entity is EntityType.HOUSEHOLD: - household_values = tables.households.set_index(household_id_column)[constraint_column] + household_values = tables.households.set_index(household_id_column)[ + constraint_column + ] aligned = target_household_ids.map(household_values) return _apply_constraint_filter(aligned, constraint) @@ -2636,7 +2644,10 @@ def _align_related_entity_constraint_mask( if persons is None: return None related_id_column = _entity_primary_id_column(target_binding.entity) - if related_id_column not in target_rows.columns or related_id_column not in persons.columns: + if ( + related_id_column not in target_rows.columns + or related_id_column not in persons.columns + ): return None constraint_column = _require_binding_column( constraint_binding, @@ -2669,11 +2680,17 @@ def _evaluate_constraint_on_households( ) table = tables.table_for(binding.entity) - related_household_ids = _household_ids_for_entity_table(table, binding, household_id_column) + related_household_ids = _household_ids_for_entity_table( + table, binding, household_id_column + ) row_matches = _apply_constraint_filter(table[binding_column], constraint) - return row_matches.groupby(related_household_ids).any().reindex( - household_ids, - fill_value=False, + return ( + row_matches.groupby(related_household_ids) + .any() + .reindex( + household_ids, + fill_value=False, + ) ) @@ -2725,7 +2742,9 @@ def _target_value(target: PolicyEngineUSDBTarget | TargetSpec) -> float: return float(target.value) -def _target_aggregation(target: PolicyEngineUSDBTarget | TargetSpec) -> TargetAggregation: +def _target_aggregation( + target: PolicyEngineUSDBTarget | TargetSpec, +) -> TargetAggregation: if isinstance(target, TargetSpec): return target.aggregation if target.variable in DEFAULT_POLICYENGINE_US_VARIABLE_BINDINGS: @@ -2750,7 +2769,11 @@ def _target_constraints( def _constraint_feature(constraint: PolicyEngineUSConstraint | TargetFilter) -> str: - return constraint.feature if isinstance(constraint, TargetFilter) else constraint.variable + return ( + constraint.feature + if isinstance(constraint, TargetFilter) + else constraint.variable + ) def _constraint_operator(constraint: PolicyEngineUSConstraint | TargetFilter) -> str: @@ -2775,9 +2798,14 @@ def build_policyengine_us_export_variable_maps( variable_metadata, direct_override_variables=direct_override_variables, ) + person_table = _with_policyengine_person_export_derivatives(tables.persons) table_specs = ( - ("household", tables.households, {"household_id", "household_weight", "weight"}), - ("person", tables.persons, {"person_id", "household_id"}), + ( + "household", + tables.households, + {"household_id", "household_weight", "weight"}, + ), + ("person", person_table, {"person_id", "household_id"}), ("tax_unit", tables.tax_units, {"tax_unit_id", "household_id"}), ("spm_unit", tables.spm_units, {"spm_unit_id", "household_id"}), ("family", tables.families, {"family_id", "household_id"}), @@ -2816,8 +2844,9 @@ def build_policyengine_us_time_period_arrays( household_id_column=household_id_column, household_weight_column=household_weight_column, ) + person_table = _with_policyengine_person_export_derivatives(tables.persons) persons = _prepare_person_export_table( - tables.persons, + person_table, person_id_column=person_id_column, household_id_column=household_id_column, household_ids=pd.Index(households[household_id_column]), @@ -2856,8 +2885,20 @@ def build_policyengine_us_time_period_arrays( ) group_specs = ( - ("tax_unit", "tax_unit_id", tables.tax_units, tax_unit_variable_map, "household"), - ("spm_unit", "spm_unit_id", tables.spm_units, spm_unit_variable_map, "household"), + ( + "tax_unit", + "tax_unit_id", + tables.tax_units, + tax_unit_variable_map, + "household", + ), + ( + "spm_unit", + "spm_unit_id", + tables.spm_units, + spm_unit_variable_map, + "household", + ), ("family", "family_id", tables.families, family_variable_map, "household"), ( "marital_unit", @@ -2933,7 +2974,9 @@ def _resolve_policyengine_variable_entity( variables = getattr(tax_benefit_system, "variables", {}) variable_metadata = variables.get(variable) if variable_metadata is None: - raise KeyError(f"PolicyEngine variable '{variable}' not found in tax-benefit system") + raise KeyError( + f"PolicyEngine variable '{variable}' not found in tax-benefit system" + ) entity_key = getattr(getattr(variable_metadata, "entity", None), "key", None) if entity_key not in POLICYENGINE_US_ENTITY_KEY_TO_ENTITY_TYPE: raise ValueError( @@ -3053,7 +3096,9 @@ def _attach_policyengine_variables_to_tables( f"{entity.value}, expected {len(table)}" ) table[variable] = values - bindings[variable] = PolicyEngineUSVariableBinding(entity=entity, column=variable) + bindings[variable] = PolicyEngineUSVariableBinding( + entity=entity, column=variable + ) return ( PolicyEngineUSEntityTableBundle( @@ -3107,7 +3152,9 @@ def project_frame_to_time_period_arrays( for source_column, target_variable in column_map.items(): if source_column not in frame.columns: raise ValueError(f"Projection source column not found: {source_column}") - arrays[target_variable] = {str(period): _normalize_h5_value(frame[source_column])} + arrays[target_variable] = { + str(period): _normalize_h5_value(frame[source_column]) + } return arrays @@ -3211,6 +3258,23 @@ def _prepare_person_export_table( return person_table +def _with_policyengine_person_export_derivatives( + persons: pd.DataFrame | None, +) -> pd.DataFrame | None: + if persons is None or "is_household_head" in persons.columns: + return persons + if "relationship_to_head" not in persons.columns: + return persons + + person_table = persons.copy() + relationship = pd.to_numeric( + person_table["relationship_to_head"], + errors="coerce", + ) + person_table["is_household_head"] = relationship.eq(0).fillna(False) + return person_table + + def _resolve_person_group_ids( *, group_name: str, @@ -3292,9 +3356,10 @@ def _extract_membership_ids_from_group_table( name=id_column, ) - if household_id_column in provided_table.columns and not provided_table[ - household_id_column - ].duplicated().any(): + if ( + household_id_column in provided_table.columns + and not provided_table[household_id_column].duplicated().any() + ): household_map = ( provided_table[[id_column, household_id_column]] .assign( @@ -3347,10 +3412,9 @@ def _resolve_group_export_table( normalized_households = _normalize_id_value(group_table[household_id_column]) group_table[household_id_column] = normalized_households expected = group_table[id_column].map(household_map) - mismatch = ( - expected.notna() - & pd.Series(normalized_households, index=group_table.index).ne(expected) - ) + mismatch = expected.notna() & pd.Series( + normalized_households, index=group_table.index + ).ne(expected) if mismatch.any(): raise ValueError( f"{group_name} export table household links are inconsistent with person memberships" @@ -3359,7 +3423,9 @@ def _resolve_group_export_table( group_table[household_id_column] = group_table[id_column].map(household_map) if group_table[household_id_column].isna().any(): - missing = group_table.loc[group_table[household_id_column].isna(), id_column].tolist() + missing = group_table.loc[ + group_table[household_id_column].isna(), id_column + ].tolist() raise ValueError( f"Could not derive household links for {group_name} ids: {missing}" ) @@ -3419,9 +3485,7 @@ def _project_table_to_time_period_arrays( string_values.notna() & string_values.ne(""), other=default_value, ) - values = _normalize_policyengine_us_export_enum_values( - target_variable, values - ) + values = _normalize_policyengine_us_export_enum_values(target_variable, values) arrays[target_variable] = { period_key: _normalize_h5_value(values), } @@ -3463,6 +3527,9 @@ def _normalize_id_value(values: Any) -> np.ndarray: def _normalize_weight_value(values: Any) -> np.ndarray: - return pd.to_numeric(pd.Series(values), errors="coerce").fillna(0.0).astype( - np.float32 - ).to_numpy() + return ( + pd.to_numeric(pd.Series(values), errors="coerce") + .fillna(0.0) + .astype(np.float32) + .to_numpy() + ) diff --git a/tests/pipelines/test_us.py b/tests/pipelines/test_us.py index 4fd7af0..46f8933 100644 --- a/tests/pipelines/test_us.py +++ b/tests/pipelines/test_us.py @@ -829,7 +829,9 @@ def test_build_policyengine_entity_tables_preserves_household_contract_inputs( ) tables = pipeline.build_policyengine_entity_tables(population) - households = tables.households.sort_values("household_id").reset_index(drop=True) + households = tables.households.sort_values("household_id").reset_index( + drop=True + ) assert households["tenure_type"].tolist() == [ "OWNER_WITH_MORTGAGE", @@ -923,6 +925,54 @@ def test_build_policyengine_entity_tables_uses_family_relationship_for_family_un assert person_rows.loc[:3, "family_id"].nunique() == 1 assert person_rows.loc[4, "family_id"] != person_rows.loc[0, "family_id"] + def test_build_policyengine_entity_tables_preserves_complete_existing_group_ids( + self, + ): + pipeline = USMicroplexPipeline(USMicroplexBuildConfig()) + population = pd.DataFrame( + { + "person_id": [1, 2, 3, 4], + "household_id": [10, 10, 10, 20], + "weight": [1.0, 1.0, 1.0, 2.0], + "age": [45, 12, 30, 70], + "income": [60_000.0, 0.0, 20_000.0, 25_000.0], + "relationship_to_head": [0, 2, 3, 0], + "family_id": [1, 1, 2, 1], + "spm_unit_id": [1, 2, 2, 1], + "marital_unit_id": [1, 2, 3, 1], + } + ) + + tables = pipeline.build_policyengine_entity_tables(population) + person_rows = tables.persons.sort_values("person_id").reset_index(drop=True) + + assert len(tables.families) == 3 + assert len(tables.spm_units) == 3 + assert len(tables.marital_units) == 4 + assert person_rows.loc[0, "family_id"] == person_rows.loc[1, "family_id"] + assert person_rows.loc[0, "family_id"] != person_rows.loc[3, "family_id"] + assert person_rows.loc[1, "spm_unit_id"] == person_rows.loc[2, "spm_unit_id"] + assert person_rows.loc[0, "spm_unit_id"] != person_rows.loc[3, "spm_unit_id"] + assert person_rows["marital_unit_id"].nunique() == 4 + + def test_build_policyengine_entity_tables_derives_is_household_head(self): + pipeline = USMicroplexPipeline(USMicroplexBuildConfig()) + population = pd.DataFrame( + { + "person_id": [1, 2, 3], + "household_id": [10, 10, 20], + "weight": [1.0, 1.0, 2.0], + "age": [45, 12, 70], + "income": [60_000.0, 0.0, 25_000.0], + "relationship_to_head": [0, 2, 0], + } + ) + + tables = pipeline.build_policyengine_entity_tables(population) + persons = tables.persons.sort_values("person_id").reset_index(drop=True) + + assert persons["is_household_head"].tolist() == [True, False, True] + def test_build_policyengine_entity_tables_derives_tax_input_columns(self): pipeline = USMicroplexPipeline(USMicroplexBuildConfig()) population = pd.DataFrame( diff --git a/tests/policyengine/test_us.py b/tests/policyengine/test_us.py index b63eb88..d9565db 100644 --- a/tests/policyengine/test_us.py +++ b/tests/policyengine/test_us.py @@ -156,7 +156,18 @@ def _create_policyengine_targets_db(path: Path) -> None: """, [ (10, "snap", 2024, 1, 0, 114_100_000_000.0, 1, 5.0, "CBO", "National SNAP"), - (11, "snap", 2024, 2, 0, 9_500_000_000.0, 1, 10.0, "CBO", "California senior SNAP"), + ( + 11, + "snap", + 2024, + 2, + 0, + 9_500_000_000.0, + 1, + 10.0, + "CBO", + "California senior SNAP", + ), ], ) conn.commit() @@ -176,7 +187,9 @@ def test_load_targets_includes_constraints(self, tmp_path): assert unconstrained.target_id == 10 assert unconstrained.constraints == () assert constrained.target_id == 11 - assert {(c.variable, c.operation, c.value) for c in constrained.constraints} == { + assert { + (c.variable, c.operation, c.value) for c in constrained.constraints + } == { ("age", ">=", "65"), ("state_fips", "==", "06"), } @@ -797,9 +810,7 @@ def test_compiles_db_targets_to_household_linear_constraints(self): reform_id=0, value=200.0, active=True, - constraints=( - PolicyEngineUSConstraint("state_fips", "==", "06"), - ), + constraints=(PolicyEngineUSConstraint("state_fips", "==", "06"),), ), PolicyEngineUSDBTarget( target_id=21, @@ -887,9 +898,7 @@ def test_amount_targets_apply_same_entity_constraints_before_household_aggregati reform_id=0, value=1_500.0, active=True, - constraints=( - PolicyEngineUSConstraint("age", ">=", "65"), - ), + constraints=(PolicyEngineUSConstraint("age", ">=", "65"),), ), ), tables=tables, @@ -905,9 +914,13 @@ def test_amount_targets_apply_same_entity_constraints_before_household_aggregati }, ) - np.testing.assert_allclose(constraints[0].coefficients, np.array([1_000.0, 0.0])) + np.testing.assert_allclose( + constraints[0].coefficients, np.array([1_000.0, 0.0]) + ) - def test_amount_targets_exclude_negative_rows_under_positive_same_entity_filter(self): + def test_amount_targets_exclude_negative_rows_under_positive_same_entity_filter( + self, + ): households = pd.DataFrame( { "household_id": [1, 2], @@ -986,9 +999,7 @@ def test_compiled_constraints_run_through_calibrator(self): reform_id=0, value=200.0, active=True, - constraints=( - PolicyEngineUSConstraint("state_fips", "==", "06"), - ), + constraints=(PolicyEngineUSConstraint("state_fips", "==", "06"),), ), PolicyEngineUSDBTarget( target_id=31, @@ -998,9 +1009,7 @@ def test_compiled_constraints_run_through_calibrator(self): reform_id=0, value=2.0, active=True, - constraints=( - PolicyEngineUSConstraint("snap", ">", "0"), - ), + constraints=(PolicyEngineUSConstraint("snap", ">", "0"),), ), ), tables=tables, @@ -1078,9 +1087,7 @@ def test_canonical_target_specs_compile_to_household_constraints(self): value=1.0, period=2024, aggregation=TargetAggregation.COUNT, - filters=( - TargetFilter(feature="age", operator=">=", value=65), - ), + filters=(TargetFilter(feature="age", operator=">=", value=65),), ), ), tables=tables, @@ -1107,7 +1114,9 @@ def test_canonical_target_specs_compile_to_household_constraints(self): np.testing.assert_allclose(constraints[0].coefficients, np.array([100.0, 0.0])) np.testing.assert_allclose(constraints[1].coefficients, np.array([1.0, 0.0])) - def test_amount_targets_align_tax_unit_constraints_before_household_aggregation(self): + def test_amount_targets_align_tax_unit_constraints_before_household_aggregation( + self, + ): tables = PolicyEngineUSEntityTableBundle( households=pd.DataFrame( { @@ -1233,7 +1242,9 @@ def test_count_targets_align_person_constraints_to_tax_unit_rows(self): np.array([1.0, 1.0]), ) - def test_materializes_formula_variables_before_compiling_constraints(self, tmp_path): + def test_materializes_formula_variables_before_compiling_constraints( + self, tmp_path + ): households = pd.DataFrame( { "household_id": [1, 2], @@ -1321,13 +1332,15 @@ def calculate(self, variable, period=None, map_to=None): return np.array([120.0, 0.0]) raise KeyError(variable) - materialized_tables, materialized_bindings = materialize_policyengine_us_variables( - tables, - variables=("snap",), - period=2024, - dataset_year=2024, - simulation_cls=FakeSimulation, - temp_dir=tmp_path, + materialized_tables, materialized_bindings = ( + materialize_policyengine_us_variables( + tables, + variables=("snap",), + period=2024, + dataset_year=2024, + simulation_cls=FakeSimulation, + temp_dir=tmp_path, + ) ) assert materialized_bindings["snap"] == PolicyEngineUSVariableBinding( @@ -1359,9 +1372,7 @@ def calculate(self, variable, period=None, map_to=None): reform_id=0, value=2.0, active=True, - constraints=( - PolicyEngineUSConstraint("snap", ">", "0"), - ), + constraints=(PolicyEngineUSConstraint("snap", ">", "0"),), ), ), tables=materialized_tables, @@ -1471,13 +1482,15 @@ def calculate(self, variable, period=None, map_to=None): return np.array([75.0]) raise KeyError(variable) - materialized_tables, materialized_bindings = materialize_policyengine_us_variables( - tables, - variables=("snap",), - period=2024, - dataset_year=2024, - simulation_cls=FakeSimulation, - temp_dir=tmp_path, + materialized_tables, materialized_bindings = ( + materialize_policyengine_us_variables( + tables, + variables=("snap",), + period=2024, + dataset_year=2024, + simulation_cls=FakeSimulation, + temp_dir=tmp_path, + ) ) assert materialized_bindings["snap"] == PolicyEngineUSVariableBinding( @@ -1758,13 +1771,27 @@ def test_builds_structural_time_period_arrays_from_entity_tables(self): } assert expected_keys.issubset(arrays) assert set(arrays["household_id"]) == {"2024"} - np.testing.assert_array_equal(arrays["household_id"]["2024"], np.array([10, 20])) - np.testing.assert_array_equal(arrays["person_household_id"]["2024"], np.array([10, 10, 20])) - np.testing.assert_array_equal(arrays["person_tax_unit_id"]["2024"], np.array([100, 100, 200])) - np.testing.assert_array_equal(arrays["person_spm_unit_id"]["2024"], np.array([1000, 1000, 2000])) - np.testing.assert_array_equal(arrays["person_family_id"]["2024"], np.array([5000, 5000, 6000])) - np.testing.assert_array_equal(arrays["person_marital_unit_id"]["2024"], np.array([7000, 7000, 8000])) - np.testing.assert_allclose(arrays["household_weight"]["2024"], np.array([1.5, 2.5], dtype=np.float32)) + np.testing.assert_array_equal( + arrays["household_id"]["2024"], np.array([10, 20]) + ) + np.testing.assert_array_equal( + arrays["person_household_id"]["2024"], np.array([10, 10, 20]) + ) + np.testing.assert_array_equal( + arrays["person_tax_unit_id"]["2024"], np.array([100, 100, 200]) + ) + np.testing.assert_array_equal( + arrays["person_spm_unit_id"]["2024"], np.array([1000, 1000, 2000]) + ) + np.testing.assert_array_equal( + arrays["person_family_id"]["2024"], np.array([5000, 5000, 6000]) + ) + np.testing.assert_array_equal( + arrays["person_marital_unit_id"]["2024"], np.array([7000, 7000, 8000]) + ) + np.testing.assert_allclose( + arrays["household_weight"]["2024"], np.array([1.5, 2.5], dtype=np.float32) + ) assert "person_weight" not in arrays assert "tax_unit_weight" not in arrays assert "spm_unit_weight" not in arrays @@ -1773,6 +1800,71 @@ def test_builds_structural_time_period_arrays_from_entity_tables(self): np.testing.assert_array_equal(arrays["age"]["2024"], np.array([34, 12, 45])) np.testing.assert_allclose(arrays["snap"]["2024"], np.array([1200.0, 300.0])) + def test_derives_household_head_export_from_relationship_to_head(self): + tables = PolicyEngineUSEntityTableBundle( + households=pd.DataFrame( + { + "household_id": [10, 20], + "household_weight": [1.0, 1.0], + } + ), + persons=pd.DataFrame( + { + "person_id": [1, 2, 3], + "household_id": [10, 10, 20], + "relationship_to_head": [0, 2, 0], + } + ), + ) + + arrays = build_policyengine_us_time_period_arrays( + tables, + period=2024, + person_variable_map={"is_household_head": "is_household_head"}, + ) + + np.testing.assert_array_equal( + arrays["is_household_head"]["2024"], + np.array([True, False, True]), + ) + + def test_export_variable_maps_include_derived_household_head(self): + class FakeEntity: + def __init__(self, key): + self.key = key + + class FakeVariable: + def __init__(self, entity): + self.entity = FakeEntity(entity) + + class FakeSystem: + variables = { + "is_household_head": FakeVariable("person"), + } + + tables = PolicyEngineUSEntityTableBundle( + households=pd.DataFrame( + { + "household_id": [10], + "household_weight": [1.0], + } + ), + persons=pd.DataFrame( + { + "person_id": [1], + "household_id": [10], + "relationship_to_head": [0], + } + ), + ) + + export_maps = build_policyengine_us_export_variable_maps( + tables, + tax_benefit_system=FakeSystem(), + ) + + assert export_maps["person"]["is_household_head"] == "is_household_head" + def test_derives_missing_group_tables_from_person_memberships(self): tables = PolicyEngineUSEntityTableBundle( households=pd.DataFrame( @@ -1798,10 +1890,18 @@ def test_derives_missing_group_tables_from_person_memberships(self): period=2024, ) - np.testing.assert_array_equal(arrays["tax_unit_id"]["2024"], np.array([100, 200])) - np.testing.assert_array_equal(arrays["spm_unit_id"]["2024"], np.array([1000, 2000])) - np.testing.assert_array_equal(arrays["family_id"]["2024"], np.array([5000, 6000])) - np.testing.assert_array_equal(arrays["marital_unit_id"]["2024"], np.array([7000, 8000])) + np.testing.assert_array_equal( + arrays["tax_unit_id"]["2024"], np.array([100, 200]) + ) + np.testing.assert_array_equal( + arrays["spm_unit_id"]["2024"], np.array([1000, 2000]) + ) + np.testing.assert_array_equal( + arrays["family_id"]["2024"], np.array([5000, 6000]) + ) + np.testing.assert_array_equal( + arrays["marital_unit_id"]["2024"], np.array([7000, 8000]) + ) assert "family_weight" not in arrays assert "marital_unit_weight" not in arrays @@ -1855,7 +1955,9 @@ class FakeSystem: "child_support_received": FakeVariable("person"), "disability_benefits": FakeVariable("person"), "employment_income_before_lsr": FakeVariable("person"), - "health_insurance_premiums_without_medicare_part_b": FakeVariable("person"), + "health_insurance_premiums_without_medicare_part_b": FakeVariable( + "person" + ), "is_female": FakeVariable("person"), "medicare_part_b_premiums": FakeVariable("person"), "other_medical_expenses": FakeVariable("person"), @@ -2024,10 +2126,7 @@ class FakeSystem: for name in household_contract_inputs }, **{name: FakeVariable("tax_unit") for name in tax_unit_contract_inputs}, - **{ - name: FakeVariable("spm_unit") - for name in spm_unit_contract_inputs - }, + **{name: FakeVariable("spm_unit") for name in spm_unit_contract_inputs}, "self_employed_health_insurance_ald": FakeVariable("tax_unit"), "self_employed_pension_contribution_ald": FakeVariable("tax_unit"), } @@ -2081,11 +2180,13 @@ class FakeSystem: assert export_maps["tax_unit"] == { name: name for name in tax_unit_contract_inputs } - assert { - name: name for name in spm_unit_contract_inputs - }.items() <= export_maps["spm_unit"].items() + assert {name: name for name in spm_unit_contract_inputs}.items() <= export_maps[ + "spm_unit" + ].items() - def test_build_policyengine_us_export_variable_maps_blocks_computed_direct_overrides(self): + def test_build_policyengine_us_export_variable_maps_blocks_computed_direct_overrides( + self, + ): class FakeEntity: def __init__(self, key): self.key = key @@ -2167,7 +2268,9 @@ class FakeSystem: assert export_maps["tax_unit"] == {} assert "snap" not in export_maps["spm_unit"].values() - def test_build_policyengine_us_export_variable_maps_drops_reported_social_security_retirement_alias(self): + def test_build_policyengine_us_export_variable_maps_drops_reported_social_security_retirement_alias( + self, + ): class FakeEntity: def __init__(self, key): self.key = key @@ -2202,7 +2305,9 @@ class FakeSystem: tax_benefit_system=FakeSystem(), ) - assert "social_security_retirement_reported" not in export_maps["person"].values() + assert ( + "social_security_retirement_reported" not in export_maps["person"].values() + ) def test_build_policyengine_us_export_variable_maps_drops_computed_alias_inputs( self, @@ -2291,7 +2396,9 @@ def test_default_policyengine_us_export_surface_avoids_formula_aggregates(self): "medicare_part_b_premiums_reported" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES ) - assert "traditional_ira_contributions" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES + assert ( + "traditional_ira_contributions" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES + ) assert ( "traditional_ira_contributions_desired" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES @@ -2301,7 +2408,10 @@ def test_default_policyengine_us_export_surface_avoids_formula_aggregates(self): "roth_ira_contributions_desired" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES ) - assert "self_employed_pension_contributions" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES + assert ( + "self_employed_pension_contributions" + not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES + ) assert ( "self_employed_pension_contributions_desired" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES @@ -2317,10 +2427,18 @@ def test_default_policyengine_us_export_surface_avoids_formula_aggregates(self): assert "takes_up_ssi_if_eligible" in SAFE_POLICYENGINE_US_EXPORT_VARIABLES assert "ssi" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES assert "ssi_reported" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES - assert "self_employed_health_insurance_ald" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES - assert "self_employed_pension_contribution_ald" not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES + assert ( + "self_employed_health_insurance_ald" + not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES + ) + assert ( + "self_employed_pension_contribution_ald" + not in SAFE_POLICYENGINE_US_EXPORT_VARIABLES + ) - def test_resolve_policyengine_excluded_export_variables_preserves_explicit_overrides(self): + def test_resolve_policyengine_excluded_export_variables_preserves_explicit_overrides( + self, + ): class FakeVariable: def __init__(self, adds=None, subtracts=None, formulas=None): self.adds = adds or [] @@ -2346,7 +2464,9 @@ class FakeSystem: assert excluded == {"self_employed_pension_contribution_ald"} - def test_build_policyengine_us_export_variable_maps_supports_exact_pre_sim_names(self): + def test_build_policyengine_us_export_variable_maps_supports_exact_pre_sim_names( + self, + ): class FakeEntity: def __init__(self, key): self.key = key @@ -2389,7 +2509,9 @@ class FakeSystem: "non_sch_d_capital_gains": "non_sch_d_capital_gains", }.items() <= export_maps["person"].items() - def test_build_policyengine_us_export_variable_maps_prefers_exact_pre_sim_names(self): + def test_build_policyengine_us_export_variable_maps_prefers_exact_pre_sim_names( + self, + ): class FakeEntity: def __init__(self, key): self.key = key @@ -2670,9 +2792,7 @@ def test_build_time_period_arrays_defaults_absent_export_inputs(self): assert arrays["takes_up_aca_if_eligible"]["2024"].tolist() == [True] assert arrays["takes_up_eitc"]["2024"].tolist() == [True] assert arrays["would_file_taxes_voluntarily"]["2024"].tolist() == [False] - assert arrays["spm_unit_pre_subsidy_childcare_expenses"]["2024"].tolist() == [ - 0 - ] + assert arrays["spm_unit_pre_subsidy_childcare_expenses"]["2024"].tolist() == [0] assert arrays["spm_unit_tenure_type"]["2024"].tolist() == [b"RENTER"] assert arrays["takes_up_snap_if_eligible"]["2024"].tolist() == [True] @@ -2705,9 +2825,7 @@ def test_build_time_period_arrays_normalizes_numeric_tenure_codes(self): tables, period=2024, household_variable_map={"tenure_type": "tenure_type"}, - spm_unit_variable_map={ - "spm_unit_tenure_type": "spm_unit_tenure_type" - }, + spm_unit_variable_map={"spm_unit_tenure_type": "spm_unit_tenure_type"}, ) assert arrays["tenure_type"]["2024"].tolist() == [