From af65377750419f640ab5bed192d13ab9d2cad182 Mon Sep 17 00:00:00 2001 From: Sylvester Damgaard Date: Wed, 19 Nov 2025 14:37:50 +0100 Subject: [PATCH 1/2] fix(parser): support nested field structures with backward compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit API changed response format from flat to nested structure for several fields: - Census: census2010: {...} → census: {2010: {...}} - ACS: acs-demographics: {...} → acs: {demographics: {...}} - School districts: school: [...] → school_districts: {elementary: {...}} Additionally, census field names changed (block → block_code, etc.) which would break existing code accessing the old field names. Changes: - Add nested census parsing with dynamic year support (census2020-census2099+) - Add nested ACS parsing supporting both simple and metric-specific formats - Add nested school districts parsing (dict and list formats) - Map new census field names to legacy names for backward compatibility - Update ZIP4Data and FFIECData models with complete field definitions - Remove hardcoded census year fields, use dynamic __getattr__ instead Backward compatibility ensured: - fields.census2020.block still works (maps to block_code) - fields.census2020.blockgroup still works (maps to block_group) - fields.census2020.tract still works (maps to tract_code) Future-proof: - census2031+ years work automatically without code changes - cd120+ congressional districts work automatically - Unknown API fields captured in extras dict Fixes #15 Fixes #16 Closes #14 Closes #17 --- src/geocodio/client.py | 191 +++++++++++++++++++++++++++++++++++------ src/geocodio/models.py | 114 ++++++++++++++++++------ 2 files changed, 252 insertions(+), 53 deletions(-) diff --git a/src/geocodio/client.py b/src/geocodio/client.py index b74cf2e..1e0c37e 100644 --- a/src/geocodio/client.py +++ b/src/geocodio/client.py @@ -22,7 +22,8 @@ Location, GeocodioFields, Timezone, CongressionalDistrict, CensusData, ACSSurveyData, StateLegislativeDistrict, SchoolDistrict, Demographics, Economics, Families, Housing, Social, - FederalRiding, ProvincialRiding, StatisticsCanadaData, ListResponse, PaginatedResponse + FederalRiding, ProvincialRiding, StatisticsCanadaData, ListResponse, PaginatedResponse, + ZIP4Data, FFIECData ) from geocodio.exceptions import InvalidRequestError, AuthenticationError, GeocodioServerError, BadRequestError @@ -402,7 +403,15 @@ def _parse_list_response(response_json: dict, response: httpx.Response = None) - http_response=response, ) + def _parse_fields(self, fields_data: dict | None) -> GeocodioFields | None: + """ + Parse fields data from API response. + + Supports both nested and flat field structures for backward compatibility: + - Nested: census: {2010: {...}, 2020: {...}}, acs: {demographics: {...}} + - Flat: census2010: {...}, acs-demographics: {...} + """ if not fields_data: return None @@ -436,30 +445,84 @@ def _parse_fields(self, fields_data: dict | None) -> GeocodioFields | None: for district in fields_data["stateleg-next"] ] + # School districts - support both nested dict and flat list formats school_districts = None - if "school" in fields_data: - school_districts = [ - SchoolDistrict.from_api(district) - for district in fields_data["school"] - ] - - # Dynamically parse all census fields (e.g., census2010, census2020, census2024, etc.) - # This supports any census year returned by the API - from dataclasses import fields as dataclass_fields - valid_field_names = {f.name for f in dataclass_fields(GeocodioFields)} - census_fields = {} + # Check for nested dict format: school_districts: {elementary: {...}, secondary: {...}} + if "school_districts" in fields_data: + school_data = fields_data["school_districts"] + if isinstance(school_data, dict): + # Nested dict format - iterate over dict values + school_districts = [ + SchoolDistrict.from_api(district) + for district in school_data.values() + ] + elif isinstance(school_data, list): + # List format (backward compatibility) + school_districts = [ + SchoolDistrict.from_api(district) + for district in school_data + ] + + # Also check for flat list format: school: [...] + elif "school" in fields_data: + school_data = fields_data["school"] + if isinstance(school_data, dict): + # Dict format + school_districts = [ + SchoolDistrict.from_api(district) + for district in school_data.values() + ] + elif isinstance(school_data, list): + # List format + school_districts = [ + SchoolDistrict.from_api(district) + for district in school_data + ] + + # Census fields - support both nested and flat structures + # Store in dict for dynamic access (fields.census2020, fields.census2031, etc.) + census_data_dict = {} + + def parse_census_data(data: dict) -> dict: + """ + Parse census data and map new field names to old field names for backward compatibility. + + API used to send: block, blockgroup, tract + API now sends: block_code, block_group, tract_code + + We populate both so existing code using old names continues to work. + """ + parsed = dict(data) # Copy original data + + # Map new field names to old field names if old names not present + if "block_code" in data and "block" not in data: + parsed["block"] = data["block_code"] + if "block_group" in data and "blockgroup" not in data: + parsed["blockgroup"] = data["block_group"] + if "tract_code" in data and "tract" not in data: + parsed["tract"] = data["tract_code"] + + return parsed + + # Check for nested census structure: census: {2010: {...}, 2020: {...}} + if "census" in fields_data and isinstance(fields_data["census"], dict): + for year, census_data in fields_data["census"].items(): + field_name = f"census{year}" + # Map new field names to old for backward compatibility + parsed_data = parse_census_data(census_data) + census_data_dict[field_name] = CensusData.from_api(parsed_data) + + # Also check for flat structure: census2010: {...}, census2020: {...} + # This ensures backward compatibility if API sends both formats for key in fields_data: - if key.startswith("census") and key[6:].isdigit(): # e.g., "census2024" - # Only include if it's a defined field in GeocodioFields - if key in valid_field_names: - census_fields[key] = CensusData.from_api(fields_data[key]) - - acs = ( - ACSSurveyData.from_api(fields_data["acs"]) - if "acs" in fields_data else None - ) + if key.startswith("census") and key[6:].isdigit() and key not in census_data_dict: + # Map new field names to old for backward compatibility + parsed_data = parse_census_data(fields_data[key]) + census_data_dict[key] = CensusData.from_api(parsed_data) + # Parse flat ACS structure for backward compatibility + # These will be merged with nested structure later if both exist demographics = ( Demographics.from_api(fields_data["acs-demographics"]) if "acs-demographics" in fields_data else None @@ -485,6 +548,58 @@ def _parse_fields(self, fields_data: dict | None) -> GeocodioFields | None: if "acs-social" in fields_data else None ) + # ACS fields - support both nested and flat structures + acs_fields = {} + acs = None + + # Check for ACS field + if "acs" in fields_data and isinstance(fields_data["acs"], dict): + acs_data = fields_data["acs"] + + # Check if this is nested ACS structure (contains metric keys) + # or simple ACS structure (contains population, households, etc.) + acs_metric_keys = {"demographics", "economics", "families", "housing", "social"} + + if any(key in acs_data for key in acs_metric_keys): + # Nested structure: acs: {demographics: {...}, economics: {...}} + acs_metric_map = { + "demographics": Demographics, + "economics": Economics, + "families": Families, + "housing": Housing, + "social": Social, + } + + for metric, model_class in acs_metric_map.items(): + if metric in acs_data: + acs_fields[metric] = model_class.from_api(acs_data[metric]) + else: + # Simple structure: acs: {population: ..., households: ..., median_income: ...} + acs = ACSSurveyData.from_api(acs_data) + + # Also preserve flat structure parsing for backward compatibility + if demographics and "demographics" not in acs_fields: + acs_fields["demographics"] = demographics + if economics and "economics" not in acs_fields: + acs_fields["economics"] = economics + if families and "families" not in acs_fields: + acs_fields["families"] = families + if housing and "housing" not in acs_fields: + acs_fields["housing"] = housing + if social and "social" not in acs_fields: + acs_fields["social"] = social + + # ZIP4 and FFIEC data + zip4 = ( + ZIP4Data.from_api(fields_data["zip4"]) + if "zip4" in fields_data else None + ) + + ffiec = ( + FFIECData.from_api(fields_data["ffiec"]) + if "ffiec" in fields_data else None + ) + # Canadian fields riding = ( FederalRiding.from_api(fields_data["riding"]) @@ -506,6 +621,29 @@ def _parse_fields(self, fields_data: dict | None) -> GeocodioFields | None: if "statcan" in fields_data else None ) + # Collect all known field keys that were parsed + parsed_keys = { + "timezone", "cd", "congressional_districts", + "stateleg", "stateleg-next", + "school", "school_districts", # Both school formats + "census", # Nested census structure + "acs", # Nested ACS structure + "acs-demographics", "acs-economics", "acs-families", "acs-housing", "acs-social", + "zip4", "ffiec", + "riding", "provriding", "provriding-next", + "statcan", + } + # Add flat census keys that were parsed (census2000, census2020, etc.) + # All census years are now stored in _census dict for dynamic access + parsed_keys.update(census_data_dict.keys()) + + # Extras - capture any fields not explicitly handled + # This is now mainly for truly unknown API fields (not census years) + extras = { + k: v for k, v in fields_data.items() + if k not in parsed_keys + } + return GeocodioFields( timezone=timezone, congressional_districts=congressional_districts, @@ -513,16 +651,15 @@ def _parse_fields(self, fields_data: dict | None) -> GeocodioFields | None: state_legislative_districts_next=state_legislative_districts_next, school_districts=school_districts, acs=acs, - demographics=demographics, - economics=economics, - families=families, - housing=housing, - social=social, + zip4=zip4, + ffiec=ffiec, riding=riding, provriding=provriding, provriding_next=provriding_next, statcan=statcan, - **census_fields, # Dynamically include all census year fields + extras=extras, + _census=census_data_dict, # All census years stored here + **acs_fields, # Dynamically include all ACS metric fields ) # @TODO add a "keep_trying" parameter to download() to keep trying until the list is processed. diff --git a/src/geocodio/models.py b/src/geocodio/models.py index 32ad22b..0be4da7 100644 --- a/src/geocodio/models.py +++ b/src/geocodio/models.py @@ -106,15 +106,33 @@ class StateLegislativeDistrict(ApiModelMixin): class CensusData(ApiModelMixin): """ Census data for a location. + + Supports both legacy field names (block, blockgroup, tract) and + current API field names (block_code, block_group, tract_code). """ + # Current API field names + census_year: Optional[int] = None + block_code: Optional[str] = None + block_group: Optional[str] = None + tract_code: Optional[str] = None + full_fips: Optional[str] = None + county_fips: Optional[str] = None + state_fips: Optional[str] = None + place: Optional[Dict[str, Any]] = None + metro_micro_statistical_area: Optional[Dict[str, Any]] = None + combined_statistical_area: Optional[Dict[str, Any]] = None + metropolitan_division: Optional[Dict[str, Any]] = None + county_subdivision: Optional[Dict[str, Any]] = None + source: Optional[str] = None + + # Legacy field names (for backward compatibility) block: Optional[str] = None blockgroup: Optional[str] = None tract: Optional[str] = None - county_fips: Optional[str] = None - state_fips: Optional[str] = None msa_code: Optional[str] = None # Metropolitan Statistical Area csa_code: Optional[str] = None # Combined Statistical Area + extras: Dict[str, Any] = field(default_factory=dict, repr=False) @@ -135,12 +153,17 @@ class ACSSurveyData(ApiModelMixin): class SchoolDistrict(ApiModelMixin): """ School district information. + + Supports both legacy and current API field names for backward compatibility. """ name: str district_number: Optional[str] = None - lea_id: Optional[str] = None # Local Education Agency ID + lea_id: Optional[str] = None # Local Education Agency ID (legacy) + lea_code: Optional[str] = None # Local Education Agency Code (current) nces_id: Optional[str] = None # National Center for Education Statistics ID + grade_low: Optional[str] = None # Lowest grade served + grade_high: Optional[str] = None # Highest grade served extras: Dict[str, Any] = field(default_factory=dict, repr=False) @@ -225,9 +248,17 @@ class Social(ApiModelMixin): class ZIP4Data(ApiModelMixin): """USPS ZIP+4 code and delivery information.""" - zip4: str - delivery_point: str - carrier_route: str + record_type: Optional[Dict[str, Any]] = None + residential: Optional[bool] = None + carrier_route: Optional[Dict[str, Any]] = None + plus4: Optional[List[str]] = None + zip9: Optional[List[str]] = None + facility_code: Optional[Dict[str, Any]] = None + city_delivery: Optional[bool] = None + valid_delivery_area: Optional[bool] = None + exact_match: Optional[bool] = None + building_or_firm_name: Optional[str] = None + government_building: Optional[bool] = None extras: Dict[str, Any] = field(default_factory=dict, repr=False) @@ -279,7 +310,28 @@ class StatisticsCanadaData(ApiModelMixin): class FFIECData(ApiModelMixin): """FFIEC CRA/HMDA Data (Beta).""" - # Add FFIEC specific fields as they become available + collection_year: Optional[int] = None + msa_md_code: Optional[str] = None + fips_state_code: Optional[str] = None + fips_county_code: Optional[str] = None + census_tract: Optional[str] = None + principal_city: Optional[bool] = None + small_county: Optional[Dict[str, Any]] = None + split_tract: Optional[Dict[str, Any]] = None + demographic_data: Optional[Dict[str, Any]] = None + urban_rural_flag: Optional[Dict[str, Any]] = None + msa_md_median_family_income: Optional[int] = None + msa_md_median_household_income: Optional[int] = None + tract_median_family_income_percentage: Optional[float] = None + ffiec_estimated_msa_md_median_family_income: Optional[int] = None + income_indicator: Optional[str] = None + cra_poverty_criteria: Optional[bool] = None + cra_unemployment_criteria: Optional[bool] = None + cra_distressed_criteria: Optional[bool] = None + cra_remote_rural_low_density_criteria: Optional[bool] = None + previous_year_cra_distressed_criteria: Optional[bool] = None + previous_year_cra_underserved_criterion: Optional[bool] = None + meets_current_previous_criteria: Optional[bool] = None extras: Dict[str, Any] = field(default_factory=dict, repr=False) @@ -287,7 +339,11 @@ class FFIECData(ApiModelMixin): class GeocodioFields: """ Container for optional 'fields' returned by the Geocodio API. - Add new attributes as additional data‑append endpoints become useful. + + Census years are handled dynamically - access any year with fields.census2020, + fields.census2025, etc. without needing to predefine every year. + + Note: slots removed to support dynamic field passing via **kwargs """ timezone: Optional[Timezone] = None @@ -296,24 +352,6 @@ class GeocodioFields: state_legislative_districts_next: Optional[List[StateLegislativeDistrict]] = None school_districts: Optional[List[SchoolDistrict]] = None - # Census data for all available years - census2000: Optional[CensusData] = None - census2010: Optional[CensusData] = None - census2011: Optional[CensusData] = None - census2012: Optional[CensusData] = None - census2013: Optional[CensusData] = None - census2014: Optional[CensusData] = None - census2015: Optional[CensusData] = None - census2016: Optional[CensusData] = None - census2017: Optional[CensusData] = None - census2018: Optional[CensusData] = None - census2019: Optional[CensusData] = None - census2020: Optional[CensusData] = None - census2021: Optional[CensusData] = None - census2022: Optional[CensusData] = None - census2023: Optional[CensusData] = None - census2024: Optional[CensusData] = None - # ACS data acs: Optional[ACSSurveyData] = None demographics: Optional[Demographics] = None @@ -332,6 +370,30 @@ class GeocodioFields: provriding_next: Optional[ProvincialRiding] = None statcan: Optional[StatisticsCanadaData] = None + # Catch-all for any future or unknown fields from the API + extras: Dict[str, Any] = field(default_factory=dict, repr=False) + + # Internal storage for census data (all years dynamically accessible) + _census: Dict[str, CensusData] = field(default_factory=dict, repr=False) + + def __getattr__(self, name: str): + """ + Dynamic attribute access for census years (census2020, census2025, etc.). + + This allows accessing census data for any year without hardcoding fields: + - fields.census2020 → CensusData for 2020 + - fields.census2031 → CensusData for 2031 (future-proof) + """ + # Handle censusXXXX attributes dynamically + if name.startswith("census") and len(name) > 6 and name[6:].isdigit(): + return self._census.get(name) + + # Fall back to extras for any other unknown attributes + if name in self.extras: + return self.extras[name] + + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") + # ────────────────────────────────────────────────────────────────────────────── # Main result objects From ee94771ce92095d40223b4792588ed27206c1d0c Mon Sep 17 00:00:00 2001 From: Sylvester Damgaard Date: Wed, 19 Nov 2025 14:47:56 +0100 Subject: [PATCH 2/2] fix(tests): correct ZIP4Data field names in E2E test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test was checking for non-existent fields: - zip4.zip4 → zip4.plus4 - zip4.delivery_point → zip4.city_delivery Also added validation for zip9 and valid_delivery_area fields to match the actual API response structure. Verified with real API - test now passes. --- tests/e2e/test_api.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/e2e/test_api.py b/tests/e2e/test_api.py index ac7b818..83735ee 100644 --- a/tests/e2e/test_api.py +++ b/tests/e2e/test_api.py @@ -460,9 +460,11 @@ def test_integration_with_zip4(client): # Check ZIP+4 data if fields.zip4: - assert fields.zip4.zip4 is not None - assert fields.zip4.delivery_point is not None + assert fields.zip4.plus4 is not None + assert fields.zip4.zip9 is not None assert fields.zip4.carrier_route is not None + assert fields.zip4.city_delivery is not None + assert fields.zip4.valid_delivery_area is not None def test_integration_with_ffiec(client):