In [None]:
import urllib.request, re

CDC_ONECOL_URL = "https://www.cdc.gov/brfss/annual_data/2024/llcp_varlayout_24_onecolumn.html"

def fetch_text(url: str) -> str:
    with urllib.request.urlopen(url) as r:
        data = r.read()
    try:
        return data.decode("utf-8", errors="ignore")
    except Exception:
        return data.decode("latin-1", errors="ignore")

def parse_layout(text: str):
    clean = re.sub(r"<.*?>", " ", text)
    clean = re.sub(r"\s+", " ", clean)

    pat = re.compile(r"(\d+)\s+([A-Z0-9_]+)\s+(\d+)")
    triples = pat.findall(clean)

    if not triples:
        raise RuntimeError("No (start, name, length) triples found")

    starts, names, lens = [], [], []
    for s, n, L in triples:
        starts.append(int(s))
        names.append(n)
        lens.append(int(L))
    return starts, names, lens

def make_colspecs(starts_1based, names, lens):
    rows = sorted(zip(starts_1based, names, lens), key=lambda t: t[0])
    seen = set()
    colspecs, colnames = [], []
    for start1, name, L in rows:
        if name in seen:
            continue
        seen.add(name)
        start0 = start1 - 1
        end_excl = start0 + L
        colspecs.append((start0, end_excl))
        colnames.append(name)
    return colspecs, colnames

layout_text = fetch_text(CDC_ONECOL_URL)
starts, names, lens = parse_layout(layout_text)
colspecs, colnames = make_colspecs(starts, names, lens)

In [None]:
import pandas as pd

df = pd.read_fwf("../../Data/LLCP2024.ASC", colspecs=colspecs, names=colnames, dtype=str)

df.to_csv("../../Data/BRFSS_2024.csv", index=False)

In [None]:
df = pd.read_csv("../../Data/BRFSS_2024.csv", dtype=str)
df.head()

Unnamed: 0,_STATE,FMONTH,IDATE,IMONTH,IDAY,IYEAR,DISPCODE,SEQNO,_PSU,CTELENM1,...,_LCSELIG,_LCSCTSN,DRNKANY6,DROCDY4_,_RFBING6,_DRNKWK3,_RFDRHV9,_FLSHOT7,_PNEUMO3,_AIDTST4
0,1,2,2282024,2,28,2024,1100,2024000001,2024000001,1,...,2,0,0,10,0,11,2.0,2,1,
1,1,2,2212024,2,21,2024,1100,2024000002,2024000002,1,...,2,0,0,10,0,11,1.0,2,1,
2,1,2,2212024,2,21,2024,1100,2024000003,2024000003,1,...,1,1,0,20,1,4001,,2,1,
3,1,2,2282024,2,28,2024,1100,2024000004,2024000004,1,...,2,0,0,10,0,11,1.0,2,1,
4,1,2,2212024,2,21,2024,1100,2024000005,2024000005,1,...,2,0,0,10,0,1,,2,1,
