In [None]:
import requests
import pandas as pd

API_KEY = "71cbe35ff6e0c0df92b72d8a233c40a85de5b0b9"

url = (
    "https://api.census.gov/data/2023/acs/acs5/pums?"
    "get=AGEP,SEX,SCHL,OCCP,INDP,PINCP,RAC3P,WKHP,ESR,STATE"
    f"&for=state:*&key={API_KEY}"
)

# Fetch Data
response = requests.get(url)
print("Status:", response.status_code)
print("Content-Type:", response.headers.get("Content-Type"))

if response.status_code != 200:
    print("Error:", response.text[:500])
    raise SystemExit

data = response.json()

# Convert JSON → DataFrame
columns = data[0]
rows = data[1:]
df = pd.DataFrame(rows, columns=columns)

# Clean Column Names
df = df.rename(columns={
    "AGEP": "age",
    "SEX": "gender_code",
    "SCHL": "educational_attainment",
    "OCCP": "occupation_code",
    "INDP": "industry_code",
    "PINCP": "income",
    "RAC3P": "race_code",
    "WKHP": "hours_per_week",
    "ESR": "employment_status_code",
    "STATE": "state_code"
})

# Convert Numeric Columns
num_cols = [
    "age", "gender_code", "educational_attainment", "occupation_code",
    "industry_code", "income", "race_code", "hours_per_week",
    "employment_status_code", "state_code"
]
for col in num_cols:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# Add Readable Mappings
df["gender"] = df["gender_code"].map({1: "Male", 2: "Female"})
df["employment_status"] = df["employment_status_code"].map({
    1: "Civilian, employed, at work",
    2: "Civilian, employed, with a job but not at work",
    3: "Unemployed",
    4: "Armed forces, at work",
    5: "Armed forces, with a job but not at work",
    6: "Not in labor force"
})


Status: 200
Content-Type: application/json;charset=utf-8
   age  gender_code  educational_attainment  occupation_code  industry_code  \
0   35            1                      17           6250.0          770.0   
1   43            1                      12              NaN            NaN   
2   58            2                      18           9142.0         6190.0   
3   25            1                      12              NaN            NaN   
4   21            1                      16           5850.0         7870.0   
5   45            1                      12           7640.0         7790.0   
6   21            2                      19           4740.0         7870.0   
7   68            2                      15              NaN            NaN   
8   66            1                      16              NaN            NaN   
9   49            1                      21            110.0         8680.0   

   income  race_code  hours_per_week  employment_status_code  state_code 

In [2]:
# Display Preview
print("Total rows:", len(df))
df.head(10)

Total rows: 15912393


Unnamed: 0,age,gender_code,educational_attainment,occupation_code,industry_code,income,race_code,hours_per_week,employment_status_code,state_code,state,gender,employment_status
0,35,1,17,6250.0,770.0,63000,1,80,6,18,18,Male,Not in labor force
1,43,1,12,,,0,2,0,6,28,28,Male,Not in labor force
2,58,2,18,9142.0,6190.0,23100,2,20,3,6,6,Female,Unemployed
3,25,1,12,,,0,1,0,6,48,48,Male,Not in labor force
4,21,1,16,5850.0,7870.0,17000,1,40,1,25,25,Male,"Civilian, employed, at work"
5,45,1,12,7640.0,7790.0,7700,1,50,6,45,45,Male,Not in labor force
6,21,2,19,4740.0,7870.0,2000,10,15,1,18,18,Female,"Civilian, employed, at work"
7,68,2,15,,,16800,1,0,6,39,39,Female,Not in labor force
8,66,1,16,,,0,1,0,6,6,6,Male,Not in labor force
9,49,1,21,110.0,8680.0,0,1,0,6,4,4,Male,Not in labor force
