In [1]:
import pandas as pd
from pathlib import Path

csv_path = Path("tech") / "merged3_with_puma_counties.csv"
df = pd.read_csv(csv_path)

df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

print(df.shape)
print(df.columns.tolist())
print(df.head(5).to_string(index=False))

for c in ["annual_income","hs_graduation_rate"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

if "county" in df.columns:
    print(df["county"].value_counts().head(10))

if "hs_graduation_rate" in df.columns:
    print(df["hs_graduation_rate"].describe())

if "annual_income" in df.columns:
    print(df["annual_income"].describe())
    bins   = [-1, 25_000, 50_000, 75_000, 100_000, 150_000, 10**12]
    labels = ["≤25k","25–50k","50–75k","75–100k","100–150k","≥150k"]
    df["income_bin"] = pd.cut(df["annual_income"], bins=bins, labels=labels)

if {"county","hs_graduation_rate"} <= set(df.columns):
    by_county = (df.groupby("county", as_index=False)
                   .agg(n=("hs_graduation_rate","size"),
                        grad_mean=("hs_graduation_rate","mean"))
                   .sort_values("grad_mean", ascending=False))
    print(by_county.head(10).to_string(index=False))

if {"income_bin","device_ownership"} <= set(df.columns):
    device_ct = pd.crosstab(df["income_bin"], df["device_ownership"], normalize="index").round(3)
    print(device_ct)

if {"income_bin","internet_access"} <= set(df.columns):
    net_ct = pd.crosstab(df["income_bin"], df["internet_access"], normalize="index").round(3)
    print(net_ct)


(62031, 303)
['rt', 'serialno', 'division', 'sporder', 'puma', 'region', 'state', 'adjinc', 'pwgtp', 'agep', 'cit', 'citwp', 'cow', 'ddrs', 'dear', 'deye', 'dout', 'dphy', 'drat', 'dratx', 'drem', 'eng', 'fer', 'gcl', 'gcm', 'gcr', 'himrks', 'hins1', 'hins2', 'hins3', 'hins4', 'hins5', 'hins6', 'hins7', 'intp', 'jwmnp', 'jwrip', 'jwtrns', 'lanx', 'mar', 'marhd', 'marhm', 'marht', 'marhw', 'marhyp', 'mig', 'mil', 'mlpa', 'mlpb', 'mlpcd', 'mlpe', 'mlpfg', 'mlph', 'mlpik', 'mlpj', 'nwab', 'nwav', 'nwla', 'nwlk', 'nwre', 'oip', 'pap', 'relshipp', 'retp', 'sch', 'schg', 'schl', 'semp', 'sex', 'ssip', 'ssp', 'wagp', 'wkhp', 'wkl', 'wkwn', 'wrk', 'yoep', 'anc', 'anc1p', 'anc2p', 'decade', 'dis', 'drivesp', 'esp', 'esr', 'fod1p', 'fod2p', 'hicov', 'hisp', 'indp', 'jwap', 'jwdp', 'lanp', 'migpuma', 'migsp', 'msp', 'naicsp', 'nativity', 'nop', 'oc', 'occp', 'paoc', 'pernp', 'pincp', 'pobp', 'povpip', 'powpuma', 'powsp', 'privcov', 'pubcov', 'qtrbir', 'rac1p', 'rac2p', 'rac3p', 'racaian', 'racasn