In [59]:
import os
import requests
import pathlib
import getpass
import pandas as pd

user = getpass.getuser()

In [60]:
work_dir = pathlib.Path(f"/Users/{user}/Library/CloudStorage/Box-Box/DataViz Projects/Bay_Area_Census_Website/census_decennial_download")
out_file = work_dir / "census_2020_race_sex_age_jc.csv"

In [61]:
def fetch_census_data(url):
    # Make the API request
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Convert the JSON response to a list of lists
        data = response.json()
        
        # The first element contains the column headers
        columns = data[0]
        
        # The rest of the elements contain the actual data
        rows = data[1:]
        
        # Create a pandas DataFrame from the data
        df = pd.DataFrame(rows, columns=columns)
        
        return df
    else:
        print(f"Failed to retrieve data. HTTP Status code: {response.status_code}")
        return None

In [62]:
# Define the API endpoint
race_url = (
    "https://api.census.gov/data/2020/dec/pl?get=group(P2)&"
    "ucgid=0500000US06001,0500000US06013,0500000US06041,"
    "0500000US06055,0500000US06075,0500000US06081,0500000US06085,"
    "0500000US06095,0500000US06097"
)

age_url = (
    "https://api.census.gov/data/2020/dec/dhc?get=group(P12)&"
    "ucgid=0500000US06001,0500000US06013,0500000US06041,"
    "0500000US06055,0500000US06075,0500000US06081,0500000US06085,"
    "0500000US06095,0500000US06097"
)


In [63]:
race_df = fetch_census_data(race_url)
age_df = fetch_census_data(age_url)

In [64]:
race_rename_dict = {
    'P2_001N': 'Race Total Population',
    'P2_002N': 'Hispanic or Latino',
    'P2_003N': 'Not Hispanic or Latino',
    'P2_004N': 'Population of One Race',
    'P2_005N': 'White',
    'P2_006N': 'Black or African American',
    'P2_007N': 'American Indian and Alaska Native',
    'P2_008N': 'Asian',
    'P2_009N': 'Native Hawaiian and Other Pacific Islander',
    'P2_010N': 'Some Other Race',
    'P2_011N': 'Two or More Races'
}
race_df.rename(columns=race_rename_dict,inplace=True)

In [65]:
age_rename_dict = {
    "P12_001N": "Age Total Population",
    "P12_002N": "Total Male",
    "P12_003N": "Male Under 5",
    "P12_004N": "Male 5 to 9",
    "P12_005N": "Male 10 to 14",
    "P12_006N": "Male 15 to 17",
    "P12_007N": "Male 18 to 19",
    "P12_008N": "Male 20",
    "P12_009N": "Male 21",
    "P12_010N": "Male 22 to 24",
    "P12_011N": "Male 25 to 29",
    "P12_012N": "Male 30 to 34",
    "P12_013N": "Male 35 to 39",
    "P12_014N": "Male 40 to 44",
    "P12_015N": "Male 45 to 49",
    "P12_016N": "Male 50 to 54",
    "P12_017N": "Male 55 to 59",
    "P12_018N": "Male 60 to 61",
    "P12_019N": "Male 62 to 64",
    "P12_020N": "Male 65 to 66",
    "P12_021N": "Male 67 to 69",
    "P12_022N": "Male 70 to 74",
    "P12_023N": "Male 75 to 79",
    "P12_024N": "Male 80 to 84",
    "P12_025N": "Male 85 and over",
    "P12_026N": "Total Female",
    "P12_027N": "Female Under 5",
    "P12_028N": "Female 5 to 9",
    "P12_029N": "Female 10 to 14",
    "P12_030N": "Female 15 to 17",
    "P12_031N": "Female 18 to 19",
    "P12_032N": "Female 20",
    "P12_033N": "Female 21",
    "P12_034N": "Female 22 to 24",
    "P12_035N": "Female 25 to 29",
    "P12_036N": "Female 30 to 34",
    "P12_037N": "Female 35 to 39",
    "P12_038N": "Female 40 to 44",
    "P12_039N": "Female 45 to 49",
    "P12_040N": "Female 50 to 54",
    "P12_041N": "Female 55 to 59",
    "P12_042N": "Female 60 to 61",
    "P12_043N": "Female 62 to 64",
    "P12_044N": "Female 65 to 66",
    "P12_045N": "Female 67 to 69",
    "P12_046N": "Female 70 to 74",
    "P12_047N": "Female 75 to 79",
    "P12_048N": "Female 80 to 84",
    "P12_049N": "Female 85 and over",
}
age_df.rename(columns=age_rename_dict,inplace=True)

In [66]:
race_out = list(race_rename_dict.values())
# add the GEO_ID and NAME columns
race_out.insert(0, "GEO_ID")  
race_out.insert(1, "NAME")

race_df[race_out]

Unnamed: 0,GEO_ID,NAME,Race Total Population,Hispanic or Latino,Not Hispanic or Latino,Population of One Race,White,Black or African American,American Indian and Alaska Native,Asian,Native Hawaiian and Other Pacific Islander,Some Other Race,Two or More Races
0,0500000US06001,"Alameda County, California",1682353,393749,1288604,1200067,472277,159499,4131,540511,13209,10440,88537
1,0500000US06013,"Contra Costa County, California",1165927,314900,851027,784574,455421,97994,2553,214520,5720,8366,66453
2,0500000US06041,"Marin County, California",262321,49410,212911,198496,173149,6120,555,16175,457,2040,14415
3,0500000US06055,"Napa County, California",138019,48829,89190,83462,68909,2300,507,10520,316,910,5728
4,0500000US06075,"San Francisco County, California",873965,136761,737204,691758,341306,45071,1570,294220,3244,6347,45446
5,0500000US06081,"San Mateo County, California",764442,191386,573056,534087,275902,14701,1021,227783,8840,5840,38969
6,0500000US06085,"Santa Clara County, California",1936259,487357,1448902,1370635,555708,42148,3240,753399,5945,10195,78267
7,0500000US06095,"Solano County, California",453491,128155,325336,294516,155125,60051,1624,70953,3775,2988,30820
8,0500000US06097,"Sonoma County, California",488863,141438,347425,322826,285792,7125,3053,22239,1708,2909,24599


In [67]:
age_out = list(age_rename_dict.values())
# add the GEO_ID and NAME columns
age_out.insert(0, "GEO_ID")
# age_out.insert(1, "NAME")

age_df[age_out]

Unnamed: 0,GEO_ID,Age Total Population,Total Male,Male Under 5,Male 5 to 9,Male 10 to 14,Male 15 to 17,Male 18 to 19,Male 20,Male 21,...,Female 50 to 54,Female 55 to 59,Female 60 to 61,Female 62 to 64,Female 65 to 66,Female 67 to 69,Female 70 to 74,Female 75 to 79,Female 80 to 84,Female 85 and over
0,0500000US06001,1682353,824426,45635,49200,51397,29725,22513,11407,11379,...,53682,53906,20296,29019,17995,24177,34736,23072,15328,18428
1,0500000US06013,1165927,568066,31538,36717,40960,25276,15044,7347,6671,...,40687,41410,15991,22546,13900,18985,28511,19264,12612,14456
2,0500000US06041,262321,128593,5933,7218,8577,5422,2930,1280,1201,...,10380,9936,3934,5536,3907,5579,8742,6103,3882,4271
3,0500000US06055,138019,67777,3129,3757,4384,2811,1725,793,820,...,4718,4975,2000,2856,1864,2701,3867,2806,1916,2451
4,0500000US06075,873965,446144,17726,15530,15309,9029,6362,3739,3628,...,25034,24054,9557,13810,9193,12901,19476,12526,9919,12947
5,0500000US06081,764442,377546,20593,21931,22988,13734,8376,4118,3846,...,26428,26381,10014,14167,8963,11874,17752,12428,8897,10763
6,0500000US06085,1936259,975313,52488,57103,61741,37671,24802,12093,11031,...,64880,61227,22923,31337,19017,24903,35506,26547,19209,22196
7,0500000US06095,453491,224784,12794,13696,15020,8866,5995,2956,3116,...,14476,16079,6583,9384,5998,7794,11057,6937,4514,4911
8,0500000US06097,488863,238535,11542,13354,14859,9099,6069,2892,2817,...,15633,17904,7431,11107,7449,10514,15649,9918,5942,7058


In [68]:
# join the two dataframes

final_df = pd.merge(race_df[race_out], age_df[age_out], on="GEO_ID")

In [69]:
final_df.to_csv(out_file, index=False)