In [4]:
import pandas as pd
import os

# --- Configuration ---
# The file produced by our address cleaning script
INPUT_FILE_PATH = "Data/Test-Datasets/WayneHealthpatientDEMOGRAPHICSAddressCorrected.csv"

# The new, cleaned file ready for upload
OUTPUT_FILE_PATH = "Data/addresses_for_geocoding.csv"

# --- Main Script ---
try:
    print(f"Reading your corrected address file from: {INPUT_FILE_PATH}")
    # Read all data as strings and replace any nulls with empty strings
    df = pd.read_csv(INPUT_FILE_PATH, dtype=str, nrows=10000).fillna('')

    print("Preparing a clean file formatted for the Census Geocoder...")

    # --- CRITICAL FIX: Remove rows with no street address ---
    # The geocoder will fail if this column is empty.
    original_rows = len(df)
    df = df[df['cleaned_street_address'].str.strip() != ''].copy()
    print(f"Removed {original_rows - len(df)} rows that had an empty street address.")
    # ---------------------------------------------------------

    # 1. Select only the necessary 'cleaned' columns and rename them
    geocoder_df = df[[
        'cleaned_street_address',
        'cleaned_city',
        'cleaned_state',
        'cleaned_zip'
    ]].rename(columns={
        'cleaned_street_address': 'Street Address',
        'cleaned_city': 'City',
        'cleaned_state': 'State',
        'cleaned_zip': 'ZIP'
    })

    # 2. Insert the 'Unique ID' column at the beginning
    # Using the original DataFrame's index is crucial for merging the results back later.
    geocoder_df.insert(0, 'Unique ID', df.index)

    # 4. Save the new DataFrame to a new CSV file
    os.makedirs(os.path.dirname(OUTPUT_FILE_PATH), exist_ok=True)
    geocoder_df.to_csv(OUTPUT_FILE_PATH, index=False)

    print(f"\nSuccess! ✨")
    print(f"A new, clean file is ready for upload at: {OUTPUT_FILE_PATH}")

except FileNotFoundError:
    print(f"Error: Input file not found at '{INPUT_FILE_PATH}'. Please make sure the file exists.")
except Exception as e:
    print(f"An error occurred: {e}")

Reading your corrected address file from: Data/Test-Datasets/WayneHealthpatientDEMOGRAPHICSAddressCorrected.csv
Preparing a clean file formatted for the Census Geocoder...
Removed 479 rows that had an empty street address.

Success! ✨
A new, clean file is ready for upload at: Data/addresses_for_geocoding.csv
