# Healthcare Desert - Data Collection (MS & AL)

This notebook collects tract-level socio-economic metrics (IGS/ACS) and healthcare facility locations for Mississippi and Alabama, and prepares raw tables for downstream analysis.

Inputs: IGS CSVs (tract-level), ACS via API, facility CSVs/GeoJSON, tract boundaries.
Outputs: Raw tables in `data/raw/` and merged attributes for modeling.


In [None]:
import os
import pandas as pd
import geopandas as gpd
from pathlib import Path

DATA_RAW = Path('data/raw')
DATA_RAW.mkdir(parents=True, exist_ok=True)

TARGET_STATES = ['MS', 'AL']
YEARS = list(range(2018, 2025))  # adjust later if needed

print('Configured:', TARGET_STATES, YEARS)


## Placeholders for data loaders

- IGS: manual CSV upload for now (tract FIPS, metrics)
- ACS: Census API fetch function (B27020, B19083, B19013, B17001)
- Facilities: CMS/HRSA CSVs with lat/long
- Boundaries: TIGER tract GeoJSON for MS/AL


In [None]:
from src.data.fetch_census_acs import fetch_acs_tract_data, normalize_acs_columns
from pathlib import Path
import pandas as pd

DATA_RAW = Path('data/raw')
DATA_RAW.mkdir(parents=True, exist_ok=True)

# FIPS: Alabama=01, Mississippi=28
STATE_FIPS = {"AL": "01", "MS": "28"}
ACS_VARS = {
    "B19013_001E": "median_income",
    "B19083_001E": "gini",
}

frames = []
for abbr, fips in STATE_FIPS.items():
    df = fetch_acs_tract_data(state_fips=fips, variables=list(ACS_VARS.keys()), dataset="acs/acs5", year=2023)
    df = normalize_acs_columns(df, ACS_VARS)
    df["state_abbr"] = abbr
    frames.append(df)

acs = pd.concat(frames, ignore_index=True)
acs.to_csv(DATA_RAW / 'acs_ms_al_2023.csv', index=False)
acs.head()
