|
61 | 61 | "docstring": "\"Add auto loan balance, interest and net_worth variable.", |
62 | 62 | "id": "add_auto_loan", |
63 | 63 | "kind": "function", |
64 | | - "line": 2782, |
| 64 | + "line": 2775, |
65 | 65 | "metadata": { |
66 | 66 | "api_refs": [ |
67 | 67 | "policyengine_us_data.datasets.cps.cps.add_auto_loan_interest_and_net_worth" |
|
88 | 88 | "docstring": "Populate household-level geography variables used by PolicyEngine US.\n\nArgs:\n cps: Output CPS H5 group receiving derived household variables.\n household: Raw CPS household table.", |
89 | 89 | "id": "add_household_variables", |
90 | 90 | "kind": "function", |
91 | | - "line": 1444, |
| 91 | + "line": 1437, |
92 | 92 | "metadata": { |
93 | 93 | "api_refs": [ |
94 | 94 | "policyengine_us_data.datasets.cps.cps.add_household_variables" |
|
115 | 115 | "docstring": "Add basic ID and weight variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The person table of the ASEC.\n tax_unit (DataFrame): The tax unit table created from the person table\n of the ASEC.\n family (DataFrame): The family table of the ASEC.\n spm_unit (DataFrame): The SPM unit table created from the person table\n of the ASEC.\n household (DataFrame): The household table of the ASEC.", |
116 | 116 | "id": "add_id_variables", |
117 | 117 | "kind": "function", |
118 | | - "line": 913, |
| 118 | + "line": 916, |
119 | 119 | "metadata": { |
120 | 120 | "api_refs": [ |
121 | 121 | "policyengine_us_data.datasets.cps.cps.add_id_variables" |
|
142 | 142 | "docstring": "Impute ORG-derived wage and union inputs onto CPS persons.", |
143 | 143 | "id": "add_org_inputs", |
144 | 144 | "kind": "function", |
145 | | - "line": 2666, |
| 145 | + "line": 2659, |
146 | 146 | "metadata": { |
147 | 147 | "api_refs": [ |
148 | 148 | "policyengine_us_data.datasets.cps.cps.add_org_labor_market_inputs" |
|
169 | 169 | "docstring": "Add income variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The CPS person table.\n year (int): The CPS year", |
170 | 170 | "id": "add_personal_income_variables", |
171 | 171 | "kind": "function", |
172 | | - "line": 1119, |
| 172 | + "line": 1122, |
173 | 173 | "metadata": { |
174 | 174 | "api_refs": [ |
175 | 175 | "policyengine_us_data.datasets.cps.cps.add_personal_income_variables" |
|
196 | 196 | "docstring": "Add personal demographic variables.\n\nArgs:\n cps (h5py.File): The CPS dataset file.\n person (DataFrame): The CPS person table.", |
197 | 197 | "id": "add_personal_variables", |
198 | 198 | "kind": "function", |
199 | | - "line": 975, |
| 199 | + "line": 978, |
200 | 200 | "metadata": { |
201 | 201 | "api_refs": [ |
202 | 202 | "policyengine_us_data.datasets.cps.cps.add_personal_variables" |
|
223 | 223 | "docstring": "", |
224 | 224 | "id": "add_previous_year_income", |
225 | 225 | "kind": "function", |
226 | | - "line": 1486, |
| 226 | + "line": 1479, |
227 | 227 | "metadata": { |
228 | 228 | "api_refs": [ |
229 | 229 | "policyengine_us_data.datasets.cps.cps.add_previous_year_income" |
|
277 | 277 | "docstring": "", |
278 | 278 | "id": "add_spm_variables", |
279 | 279 | "kind": "function", |
280 | | - "line": 1396, |
| 280 | + "line": 1398, |
281 | 281 | "metadata": { |
282 | 282 | "api_refs": [ |
283 | 283 | "policyengine_us_data.datasets.cps.cps.add_spm_variables" |
|
304 | 304 | "docstring": "Assign SSN card type using PRCITSHP, employment status, and ASEC-UA conditions.\nCodes:\n- 0: \"NONE\" - Likely undocumented immigrants\n- 1: \"CITIZEN\" - US citizens (born or naturalized)\n- 2: \"NON_CITIZEN_VALID_EAD\" - Non-citizens with work/study authorization\n- 3: \"OTHER_NON_CITIZEN\" - Non-citizens with indicators of legal status", |
305 | 305 | "id": "add_ssn_card_type", |
306 | 306 | "kind": "function", |
307 | | - "line": 1592, |
| 307 | + "line": 1585, |
308 | 308 | "metadata": { |
309 | 309 | "api_refs": [ |
310 | 310 | "policyengine_us_data.datasets.cps.cps.add_ssn_card_type" |
|
358 | 358 | "docstring": "", |
359 | 359 | "id": "add_tips", |
360 | 360 | "kind": "function", |
361 | | - "line": 2491, |
| 361 | + "line": 2484, |
362 | 362 | "metadata": { |
363 | 363 | "api_refs": [ |
364 | 364 | "policyengine_us_data.datasets.cps.cps.add_tips" |
|
810 | 810 | "docstring": "Replace clone-half person-level feature variables with donor matches.", |
811 | 811 | "id": "clone_features", |
812 | 812 | "kind": "function", |
813 | | - "line": 409, |
| 813 | + "line": 400, |
814 | 814 | "metadata": { |
815 | 815 | "api_refs": [ |
816 | 816 | "policyengine_us_data.datasets.cps.extended_cps._splice_clone_feature_predictions" |
|
936 | 936 | "docstring": "Second-stage QRF: train on CPS, predict for PUF clones.\n\nFor the PUF clone half of the extended CPS we need plausible values\nof CPS-only variables (retirement distributions, transfers, hours,\nSPM components, etc.) that are consistent with the clone's\nPUF-imputed income -- not just naively copied from the CPS donor.\n\nWe train a QRF on CPS person-level data where:\n * predictors = demographics + key income variables\n * outputs = CPS-only variables listed in\n ``CPS_ONLY_IMPUTED_VARIABLES``\n\nFor PUF clone prediction we use the PUF-imputed income values\nfrom the second half of ``data`` (the clone half, which already\nhas PUF-imputed income from stage 1).\n\nUses ``fit_predict()`` with ``max_train_samples`` instead of\nmanual sampling + separate fit/predict.\n\nArgs:\n data: Extended dataset dict after ``puf_clone_dataset()`` --\n already doubled, with PUF-imputed income in the second half.\n time_period: Tax year.\n dataset_path: Path to the CPS h5 file for Microsimulation.\n\nReturns:\n DataFrame with one column per CPS-only variable, containing\n predicted values for the PUF clone half (person-level).", |
937 | 937 | "id": "cps_only", |
938 | 938 | "kind": "function", |
939 | | - "line": 448, |
| 939 | + "line": 439, |
940 | 940 | "metadata": { |
941 | 941 | "api_refs": [ |
942 | 942 | "policyengine_us_data.datasets.cps.extended_cps._impute_cps_only_variables" |
|
1089 | 1089 | "docstring": "Remove variables that are computed by policyengine-us.\n\nVariables with formulas, ``adds``, or ``subtracts`` are\nrecomputed by the simulation engine, so storing them wastes\nspace and can mislead validation.\n\nAggregate variables whose ``adds`` include a behavioral-\nresponse input (e.g. ``employment_income_before_lsr``) are\nrenamed to that input before dropping so the raw data is\npreserved under the correct input-variable name.", |
1090 | 1090 | "id": "formula_drop", |
1091 | 1091 | "kind": "function", |
1092 | | - "line": 1206, |
| 1092 | + "line": 1197, |
1093 | 1093 | "metadata": { |
1094 | 1094 | "api_refs": [ |
1095 | 1095 | "policyengine_us_data.datasets.cps.extended_cps.ExtendedCPS._drop_formula_variables" |
|
1950 | 1950 | "docstring": "Replace PUF clone half of CPS-only variables with QRF predictions.\n\nAfter ``puf_clone_dataset()`` the CPS-only variables in the second\nhalf are naive copies of the CPS donor values. This function\nreplaces them with the second-stage QRF predictions that are\nconsistent with the clone's PUF-imputed income.\n\nArgs:\n data: Extended dataset dict (already doubled).\n predictions: DataFrame from ``_impute_cps_only_variables()``.\n time_period: Tax year.\n dataset_path: Path to CPS h5 file for entity mapping.\n\nReturns:\n Modified data dict with CPS-only variables spliced in.", |
1951 | 1951 | "id": "qrf_pass2", |
1952 | 1952 | "kind": "function", |
1953 | | - "line": 726, |
| 1953 | + "line": 717, |
1954 | 1954 | "metadata": { |
1955 | 1955 | "api_refs": [ |
1956 | 1956 | "policyengine_us_data.datasets.cps.extended_cps._splice_cps_only_predictions" |
|
0 commit comments