In [1]:
# Imports

import os

from zipfile import ZipFile

import arcpy

In [2]:
# Vars

base_dir = r"C:\Users\Misti\OneDrive\PROJECTS\2025_08_Multi-Year_Census_Counties"

output_gdb = r"2025_08_Multi-Year_Census_Counties.gdb"

output_fc = "multi_year_census_counties"

zip_dir_name = r"Zipped"
unzip_dir_name = r"Unzipped"

found_fcs = []
year_fcs_dict = {}

# Derived vars

full_output_gdb = os.path.join(base_dir, output_gdb)
full_output_fc = os.path.join(full_output_gdb, output_fc)


In [3]:
def extract_all_zips():

    for file in os.listdir(os.path.join(base_dir, zip_dir_name)):

        if not file.endswith(".zip"):
            continue
                        
        with ZipFile(os.path.join(base_dir, zip_dir_name, file), 'r') as zip_obj:
            zip_obj.extractall(os.path.join(base_dir, unzip_dir_name))

# extract_all_zips()

In [4]:
def get_counties():

    unfound = 0
    unfound_gdbs = []

    # Reset list in case of multiple script/block runs (duplicate paths not needed LOL)
    found_fcs.clear()

    for file in os.listdir(os.path.join(base_dir, unzip_dir_name)):

        if not file.endswith(".gdb"):
            continue

        gdb = os.path.join(base_dir, unzip_dir_name, file)
        arcpy.env.workspace = gdb
        
        if not "County" in arcpy.ListFeatureClasses():
            unfound +=1
            unfound_gdbs.append(gdb)
        else:
            found_fcs.append(os.path.join(base_dir, unzip_dir_name, file, "County"))

    if unfound:
        print(f"Heads up! Didn't find a 'County' FC in these GDBs: {unfound_gdbs}")
    else:
        print("Yay! Found a 'County' FC in all gdbs!")

    return found_fcs

found_fcs = get_counties()


Yay! Found a 'County' FC in all gdbs!


In [5]:
def create_dict(year_fcs_dict):

    # Resetting intermediate and final lists/dicts
    # While testing/developing
    pre_dict = {}
    pre_list = []

    year_fcs_dict.clear()

    for fc in found_fcs:

        year = fc.split("tlgdb_")[1].split("_a_")[0]
        pre_dict[year] = fc

    pre_list = sorted(pre_dict.items(), reverse=True)
    year_fcs_dict = dict(pre_list)

    return year_fcs_dict

year_fcs_dict = create_dict(year_fcs_dict)

In [6]:
def create_output():

    # Bastards added a field by the time you get to 2025
    # My insert cursor was failing because we initially just used
    # the first full path in the list.
    # I think it's more likely they'll add fields than remove them,
    # So to try to ensure all fields needed are present for the insert cursor,
    # Getting the NEWEST gdb schema as the template here.
    
    template_fc = year_fcs_dict[max(year_fcs_dict.keys())]

    spatial_ref = arcpy.Describe(template_fc).spatialReference

    if not arcpy.Exists(full_output_gdb):
        print("Creating output GDB...")
        arcpy.management.CreateFileGDB(base_dir, output_gdb)
    else:
        print("Yay! Output GDB already exists.")

    if not arcpy.Exists(full_output_fc):
        print("Creating output feature class...")
        arcpy.management.CreateFeatureclass(full_output_gdb, output_fc, "POLYGON", template=template_fc, spatial_reference=spatial_ref)
    else:
        print("Yay! Output feature class already exists.")

    return full_output_fc

create_output()

Yay! Output GDB already exists.
Yay! Output feature class already exists.


'C:\\Users\\Misti\\OneDrive\\PROJECTS\\2025_08_Multi-Year_Census_Counties\\2025_08_Multi-Year_Census_Counties.gdb\\multi_year_census_counties'

In [7]:
def add_year_field():
    
    # I wouldn't technically have to do this bit because
    # If the field already exists, arcpy will just do nothing and move on...
    if not "Census_Year" in (f.name for f in arcpy.ListFields(full_output_fc)):
        
        print("Adding 'Census_Year' field")
        arcpy.management.AddField(full_output_fc, "Census_Year", "SHORT", field_length=4, field_alias="Census Year")

    else:
        print("Field 'Census_Year' alread exists! Moving on...")

add_year_field()

Field 'Census_Year' alread exists! Moving on...


In [None]:
def insert_counties(year_fcs_dict):

    not_initial = 0

    for year, fc in year_fcs_dict.items():

        gid_shape_dict = {row[0]: row[1] for row in arcpy.da.SearchCursor(full_output_fc, ["GEOID", "SHAPE@LENGTH"])}

        fields = [f.name for f in arcpy.ListFields(fc)]
        fields.append("SHAPE@")
        fields.append("SHAPE@LENGTH")

        ifields = fields.copy()
        ifields.append("Census_Year")

        with arcpy.da.SearchCursor(fc, fields) as scursor:
            for row in scursor:

                gid = row[fields.index("GEOID")]
                shape = row[fields.index("SHAPE@LENGTH")]
                            
                if gid in gid_shape_dict:

                    # Here's the magic number that determines how different a county shape
                    # has to be before it's written to the dataset as a new record,
                    # distinct to whatever county succeeded it (script runs in reverse chronological order)
                    # With dataset ranting in years from 2025 to 2013, the values are about:
                    # .005: 3945 counties | .001: 4897 counties
                    # .0005: 5454 counties | .0001: 6877 counties
                    # .00001: 8646 counties
                    # Although the units are degrees, .00001 is VERY VERY tiny;
                    # For my purposes any two counties this close in SHAPE@LENGTH
                    # could be considered identical.
                    if abs(shape - gid_shape_dict[gid]) < .00001:
                        continue

                with arcpy.da.InsertCursor(full_output_fc, ifields) as icursor:
                    if not_initial:
                        print(f"Hey found a new county to add in! Year: {year}; GEOID: {row[ifields.index('GEOID')]}")
                    new_row = row + (year,)

                    icursor.insertRow(new_row)

        not_initial +=1

insert_counties(year_fcs_dict)

Hey found a new county to add in! Year: 2024; GEOID: 01075
Hey found a new county to add in! Year: 2024; GEOID: 01107
Hey found a new county to add in! Year: 2024; GEOID: 02060
Hey found a new county to add in! Year: 2024; GEOID: 02066
Hey found a new county to add in! Year: 2024; GEOID: 02100
Hey found a new county to add in! Year: 2024; GEOID: 02105
Hey found a new county to add in! Year: 2024; GEOID: 02164
Hey found a new county to add in! Year: 2024; GEOID: 02198
Hey found a new county to add in! Year: 2024; GEOID: 02220
Hey found a new county to add in! Year: 2024; GEOID: 02240
Hey found a new county to add in! Year: 2024; GEOID: 04013
Hey found a new county to add in! Year: 2024; GEOID: 04015
Hey found a new county to add in! Year: 2024; GEOID: 04019
Hey found a new county to add in! Year: 2024; GEOID: 04021
Hey found a new county to add in! Year: 2024; GEOID: 06011
Hey found a new county to add in! Year: 2024; GEOID: 06013
Hey found a new county to add in! Year: 2024; GEOID: 060

In [None]:
# EVERYTHING BELOW THIS BLOCK IS NOT PART OF THE ACTUAL SCRIPT!! TESTING / DEV BLOCKS ONLY!!

In [29]:
# FOR USE WHILE BUILDING SCRIPT AND TROUBLESHOOTING ONLY!!!

def truncate_table():
    
    arcpy.management.TruncateTable(full_output_fc)

truncate_table()