# US Census Dataset

In [1]:
import censusdata as cd
import pandas as pd
from tqdm import tqdm

In [2]:
state_fips_list = [
    '01','02','04','05','06','08','09','10','12','13','15','16','17','18','19','20',
    '21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36',
    '37','38','39','40','41','42','44','45','46','47','48','49','50','51','53','54',
    '55','56'
]

In [3]:
def get_state_tracts(state_fips, year=2022, dataset='acs5'):
    geo = cd.censusgeo([('state', state_fips), ('county', '*'), ('tract', '*')])
    vars = [
        'B19013_001E',   # Median household income
        'B01003_001E',   # Total population
        'B17001_002E',   # Below-poverty population
        'B15003_001E',   # Total education count
        'B15003_022E','B15003_023E','B15003_024E','B15003_025E'  # Bachelor's+
    ]
    df = cd.download(dataset, year, geo, vars).reset_index()
    df['state_fips']  = state_fips
    df['county_fips'] = df['index'].apply(lambda x: x.geo[1][1])
    df['tract_id']    = df['index'].apply(lambda x: x.geo[2][1])
    return df


In [4]:
frames = []
for s in tqdm(state_fips_list, desc="Downloading ACS data by state"):
    try:
        temp = get_state_tracts(s)
        frames.append(temp)
        print(f"✅ State {s}: {temp.shape[0]} tracts fetched")
    except Exception as e:
        print(f"⚠️ State {s} failed: {e}")

Downloading ACS data by state:   2%|█                                                   | 1/50 [00:01<01:12,  1.48s/it]

✅ State 01: 1437 tracts fetched


Downloading ACS data by state:   4%|██                                                  | 2/50 [00:02<00:46,  1.03it/s]

✅ State 02: 177 tracts fetched


Downloading ACS data by state:   6%|███                                                 | 3/50 [00:03<00:49,  1.04s/it]

✅ State 04: 1765 tracts fetched


Downloading ACS data by state:   8%|████▏                                               | 4/50 [00:03<00:41,  1.10it/s]

✅ State 05: 823 tracts fetched


Downloading ACS data by state:  10%|█████▏                                              | 5/50 [00:06<00:59,  1.33s/it]

✅ State 06: 9129 tracts fetched


Downloading ACS data by state:  12%|██████▏                                             | 6/50 [00:07<00:53,  1.22s/it]

✅ State 08: 1447 tracts fetched


Downloading ACS data by state:  14%|███████▎                                            | 7/50 [00:07<00:46,  1.07s/it]

✅ State 09: 884 tracts fetched


Downloading ACS data by state:  16%|████████▎                                           | 8/50 [00:08<00:40,  1.02it/s]

✅ State 10: 262 tracts fetched


Downloading ACS data by state:  18%|█████████▎                                          | 9/50 [00:10<00:47,  1.15s/it]

✅ State 12: 5160 tracts fetched


Downloading ACS data by state:  20%|██████████▏                                        | 10/50 [00:11<00:46,  1.15s/it]

✅ State 13: 2796 tracts fetched


Downloading ACS data by state:  22%|███████████▏                                       | 11/50 [00:12<00:40,  1.05s/it]

✅ State 15: 461 tracts fetched


Downloading ACS data by state:  24%|████████████▏                                      | 12/50 [00:12<00:37,  1.03it/s]

✅ State 16: 456 tracts fetched


Downloading ACS data by state:  26%|█████████████▎                                     | 13/50 [00:14<00:40,  1.09s/it]

✅ State 17: 3265 tracts fetched


Downloading ACS data by state:  28%|██████████████▎                                    | 14/50 [00:15<00:38,  1.08s/it]

✅ State 18: 1696 tracts fetched


Downloading ACS data by state:  30%|███████████████▎                                   | 15/50 [00:16<00:36,  1.03s/it]

✅ State 19: 896 tracts fetched


Downloading ACS data by state:  32%|████████████████▎                                  | 16/50 [00:17<00:32,  1.04it/s]

✅ State 20: 829 tracts fetched


Downloading ACS data by state:  34%|█████████████████▎                                 | 17/50 [00:17<00:31,  1.06it/s]

✅ State 21: 1306 tracts fetched


Downloading ACS data by state:  36%|██████████████████▎                                | 18/50 [00:18<00:29,  1.10it/s]

✅ State 22: 1388 tracts fetched


Downloading ACS data by state:  38%|███████████████████▍                               | 19/50 [00:19<00:26,  1.19it/s]

✅ State 23: 407 tracts fetched


Downloading ACS data by state:  40%|████████████████████▍                              | 20/50 [00:20<00:26,  1.14it/s]

✅ State 24: 1475 tracts fetched


Downloading ACS data by state:  42%|█████████████████████▍                             | 21/50 [00:21<00:26,  1.08it/s]

✅ State 25: 1620 tracts fetched


Downloading ACS data by state:  44%|██████████████████████▍                            | 22/50 [00:22<00:29,  1.05s/it]

✅ State 26: 3017 tracts fetched


Downloading ACS data by state:  46%|███████████████████████▍                           | 23/50 [00:23<00:26,  1.00it/s]

✅ State 27: 1505 tracts fetched


Downloading ACS data by state:  48%|████████████████████████▍                          | 24/50 [00:24<00:23,  1.09it/s]

✅ State 28: 878 tracts fetched


Downloading ACS data by state:  50%|█████████████████████████▌                         | 25/50 [00:25<00:22,  1.09it/s]

✅ State 29: 1654 tracts fetched


Downloading ACS data by state:  52%|██████████████████████████▌                        | 26/50 [00:25<00:20,  1.18it/s]

✅ State 30: 319 tracts fetched


Downloading ACS data by state:  54%|███████████████████████████▌                       | 27/50 [00:26<00:17,  1.29it/s]

✅ State 31: 553 tracts fetched


Downloading ACS data by state:  56%|████████████████████████████▌                      | 28/50 [00:27<00:17,  1.28it/s]

✅ State 32: 779 tracts fetched


Downloading ACS data by state:  58%|█████████████████████████████▌                     | 29/50 [00:28<00:15,  1.36it/s]

✅ State 33: 350 tracts fetched


Downloading ACS data by state:  60%|██████████████████████████████▌                    | 30/50 [00:28<00:16,  1.24it/s]

✅ State 34: 2181 tracts fetched


Downloading ACS data by state:  62%|███████████████████████████████▌                   | 31/50 [00:29<00:14,  1.28it/s]

✅ State 35: 612 tracts fetched


Downloading ACS data by state:  64%|████████████████████████████████▋                  | 32/50 [00:31<00:18,  1.02s/it]

✅ State 36: 5411 tracts fetched


Downloading ACS data by state:  66%|█████████████████████████████████▋                 | 33/50 [00:32<00:17,  1.04s/it]

✅ State 37: 2672 tracts fetched


Downloading ACS data by state:  68%|██████████████████████████████████▋                | 34/50 [00:33<00:15,  1.02it/s]

✅ State 38: 228 tracts fetched


Downloading ACS data by state:  70%|███████████████████████████████████▋               | 35/50 [00:34<00:15,  1.04s/it]

✅ State 39: 3168 tracts fetched


Downloading ACS data by state:  72%|████████████████████████████████████▋              | 36/50 [00:35<00:13,  1.03it/s]

✅ State 40: 1205 tracts fetched


Downloading ACS data by state:  74%|█████████████████████████████████████▋             | 37/50 [00:35<00:12,  1.08it/s]

✅ State 41: 1001 tracts fetched


Downloading ACS data by state:  76%|██████████████████████████████████████▊            | 38/50 [00:37<00:12,  1.06s/it]

✅ State 42: 3446 tracts fetched


Downloading ACS data by state:  78%|███████████████████████████████████████▊           | 39/50 [00:38<00:10,  1.07it/s]

✅ State 44: 250 tracts fetched


Downloading ACS data by state:  80%|████████████████████████████████████████▊          | 40/50 [00:38<00:09,  1.10it/s]

✅ State 45: 1323 tracts fetched


Downloading ACS data by state:  82%|█████████████████████████████████████████▊         | 41/50 [00:39<00:07,  1.17it/s]

✅ State 46: 242 tracts fetched


Downloading ACS data by state:  84%|██████████████████████████████████████████▊        | 42/50 [00:40<00:07,  1.13it/s]

✅ State 47: 1701 tracts fetched


Downloading ACS data by state:  86%|███████████████████████████████████████████▊       | 43/50 [00:42<00:08,  1.17s/it]

✅ State 48: 6896 tracts fetched


Downloading ACS data by state:  88%|████████████████████████████████████████████▉      | 44/50 [00:43<00:06,  1.04s/it]

✅ State 49: 716 tracts fetched


Downloading ACS data by state:  90%|█████████████████████████████████████████████▉     | 45/50 [00:43<00:04,  1.06it/s]

✅ State 50: 193 tracts fetched


Downloading ACS data by state:  92%|██████████████████████████████████████████████▉    | 46/50 [00:45<00:04,  1.01s/it]

✅ State 51: 2198 tracts fetched


Downloading ACS data by state:  94%|███████████████████████████████████████████████▉   | 47/50 [00:46<00:03,  1.03s/it]

✅ State 53: 1784 tracts fetched


Downloading ACS data by state:  96%|████████████████████████████████████████████████▉  | 48/50 [00:46<00:01,  1.05it/s]

✅ State 54: 546 tracts fetched


Downloading ACS data by state:  98%|█████████████████████████████████████████████████▉ | 49/50 [00:47<00:00,  1.08it/s]

✅ State 55: 1542 tracts fetched


Downloading ACS data by state: 100%|███████████████████████████████████████████████████| 50/50 [00:48<00:00,  1.03it/s]

✅ State 56: 160 tracts fetched





In [5]:
census_raw = pd.concat(frames, ignore_index=True)
print(f"\nTotal tracts fetched: {len(census_raw):,}")


Total tracts fetched: 84,209


In [6]:
census_raw = census_raw.rename(columns={
    'B19013_001E':'median_income',
    'B01003_001E':'population',
    'B17001_002E':'poverty_population',
    'B15003_001E':'total_education',
    'B15003_022E':'bachelor',
    'B15003_023E':'master',
    'B15003_024E':'professional',
    'B15003_025E':'doctorate'
})

census_raw['bachelor_or_higher'] = (
    census_raw[['bachelor','master','professional','doctorate']].sum(axis=1)
)
census_raw['education_rate'] = census_raw['bachelor_or_higher'] / census_raw['total_education']
census_raw['poverty_rate']   = census_raw['poverty_population'] / census_raw['population']

# Create full GEOID (state + county + tract)
census_raw['geoid'] = census_raw['state_fips'] + census_raw['county_fips'] + census_raw['tract_id']

In [7]:
census_all = census_raw[['geoid','state_fips','county_fips',
                         'median_income','population','education_rate','poverty_rate']].copy()

In [8]:
state_name_map = {
    '01':'Alabama','02':'Alaska','04':'Arizona','05':'Arkansas','06':'California',
    '08':'Colorado','09':'Connecticut','10':'Delaware','12':'Florida','13':'Georgia',
    '15':'Hawaii','16':'Idaho','17':'Illinois','18':'Indiana','19':'Iowa','20':'Kansas',
    '21':'Kentucky','22':'Louisiana','23':'Maine','24':'Maryland','25':'Massachusetts',
    '26':'Michigan','27':'Minnesota','28':'Mississippi','29':'Missouri','30':'Montana',
    '31':'Nebraska','32':'Nevada','33':'New Hampshire','34':'New Jersey','35':'New Mexico',
    '36':'New York','37':'North Carolina','38':'North Dakota','39':'Ohio','40':'Oklahoma',
    '41':'Oregon','42':'Pennsylvania','44':'Rhode Island','45':'South Carolina',
    '46':'South Dakota','47':'Tennessee','48':'Texas','49':'Utah','50':'Vermont',
    '51':'Virginia','53':'Washington','54':'West Virginia','55':'Wisconsin','56':'Wyoming'
}
census_all['state_name'] = census_all['state_fips'].map(state_name_map)


In [9]:
census_all = census_all[['geoid','state_fips','state_name','county_fips',
                         'median_income','population','education_rate','poverty_rate']]

In [10]:
census_all = census_all.dropna(subset=['median_income','population'])

In [11]:
census_all.to_csv("census_USA_2022.csv", index=False)
print(f"\n✅ Saved final ACS 2022 dataset → census_USA_2022.csv ({len(census_all):,} rows)")
print("✅ Columns:", list(census_all.columns))
print("📍 Example preview:")
print(census_all.head())


✅ Saved final ACS 2022 dataset → census_USA_2022.csv (84,209 rows)
✅ Columns: ['geoid', 'state_fips', 'state_name', 'county_fips', 'median_income', 'population', 'education_rate', 'poverty_rate']
📍 Example preview:
         geoid state_fips state_name county_fips  median_income  population  \
0  01001020100         01    Alabama         001          60563        1865   
1  01001020200         01    Alabama         001          57460        1861   
2  01001020300         01    Alabama         001          77371        3492   
3  01001020400         01    Alabama         001          73191        3987   
4  01001020501         01    Alabama         001          79953        4121   

   education_rate  poverty_rate  
0        0.233017      0.153351  
1        0.185085      0.056421  
2        0.153746      0.100802  
3        0.343841      0.102333  
4        0.395199      0.078379  


# BLS Wage Data


In [None]:
import requests
import pandas as pd
from tqdm import tqdm
import time