# US Census - Extraction

### Import Libraries

In [1]:
import requests
import pandas as pd

### Parameters

In [2]:
URL = "https://data.census.gov/table?q=B25040&g=160XX00US0643000&tid=ACSDT1Y2021.B25040"
API_METADATA = "https://data.census.gov/api/search/metadata/table?id=ACSDT1Y2021.B25040&g=160XX00US0643000"
API_TABLE = "https://data.census.gov/api/access/data/table?id=ACSDT1Y2021.B25040&g=160XX00US0643000"

In [3]:
headers =  {
    "authority": "data.census.gov",
    "method": "GET",
    "path": "/table?q=B25040&g=160XX00US0643000&tid=ACSDT1Y2021.B25040",
    "scheme": "https",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "en-US,en;q=0.9",
    "Cache-Control": "max-age=0",
    "Sec-Ch-Ua": "\"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Google Chrome\";v=\"114\"",
    "Sec-Ch-Ua-Mobile": "?0",
    "Sec-Ch-Ua-Platform": "\"Windows\"",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "same-origin",
    "Sec-Fetch-User": "?1",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

In [4]:
def get_metadata_content(API_METADATA, headers):
    """
    Gets metadata content from API
    """
    r = requests.get(API_METADATA, headers=headers)
    metadata_content = r.json()

    return metadata_content

In [5]:
def get_mapping_dict(metadata_content):
    """
    Gets mapping dictionary from metadata content
    """
    mapping_content = metadata_content['response']['metadataContent']['dimensions'][2]['categories']
    # Note - M is for margin of error and E is for estimate
    mapping_dict = {}

    for item in mapping_content:
        label = item['label']

        corresponding_ids = item['item_mapping']
        for corresponding_id in corresponding_ids:
            if 'E' in corresponding_id:
                mapping_dict[corresponding_id] = f"{label}-Estimate"
            else:
                mapping_dict[corresponding_id] = f"{label}-Margin of Error"

    return mapping_dict

In [6]:
def get_data_table(API_TABLE, headers, mapping_dict):
    """
    Gets data table from API and parses the data
    into a clean dataframe
    """
    r = requests.get(API_TABLE, headers=headers)
    table_content = r.json()['response']

    labels = table_content['data'][0]
    data = table_content['data'][1]

    df = pd.DataFrame.from_dict({"labels": labels, "data": data})
    location = df.loc[df['labels'] == 'NAME']['data'].values[0]
    print(f"Location: {location}")
    df = df.loc[df['labels'].isin(mapping_dict.keys())]
    df.replace(mapping_dict, inplace=True)
    df.dropna(inplace=True)

    # Split 'labels' into 'label' and 'metric'
    df[['label','metric']] = df['labels'].str.rsplit('-', n=1, expand=True)

    # Pivot to get 'Estimate' and 'Margin of Error' as separate columns
    df_pivot = df.pivot(index='label', columns='metric', values='data')

    # Reset index
    df_pivot = df_pivot.reset_index()

    # Rename columns
    df_pivot.columns.name = ''
    df_pivot.rename(columns={'Estimate': 'estimate', 'Margin of Error': 'margin of error'}, inplace=True)

    return df_pivot

In [7]:
def complete_df(df, dataset_info):
    """
    Completes dataframe with dataset information
    """
    df['year']  = dataset_info['vintage']
    df['name'] = dataset_info['name']
    df['program'] = dataset_info['program']
    df['subprogram'] = dataset_info['subProgram']

    return df

### Tests

In [8]:
metadata_content = get_metadata_content(API_METADATA, headers)
dataset_info = metadata_content['response']['metadataContent']['dataset']
mapping_dict = get_mapping_dict(metadata_content)
df = get_data_table(API_TABLE, headers, mapping_dict)
df = complete_df(df, dataset_info)

Location: Long Beach city, California


In [9]:
df

Unnamed: 0,label,estimate,margin of error,year,name,program,subprogram
0,"Bottled, tank, or LP gas",1688,522,2021,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates
1,Coal or coke,0,218,2021,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates
2,Electricity,49934,3409,2021,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates
3,"Fuel oil, kerosene, etc.",106,128,2021,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates
4,No fuel used,11943,1910,2021,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates
5,Other fuel,207,186,2021,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates
6,Solar energy,708,428,2021,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates
7,Total:,172599,4170,2021,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates
8,Utility gas,107957,4059,2021,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates
9,Wood,56,96,2021,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates


### Testing Class

In [10]:
from USCensus import USCensus

In [19]:
census_scraper = USCensus(2018)
census_scraper.extract_data()

Obtained metadata content
Obtained mapping dictionary
Obtained data table
Completed dataframe


In [20]:
census_scraper.parsed_data

Unnamed: 0,label,estimate,margin of error,year,name,program,subprogram,location
0,"Bottled, tank, or LP gas",2062,594,2018,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates,"Long Beach city, California"
1,Coal or coke,0,211,2018,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates,"Long Beach city, California"
2,Electricity,43003,2771,2018,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates,"Long Beach city, California"
3,"Fuel oil, kerosene, etc.",46,76,2018,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates,"Long Beach city, California"
4,No fuel used,11943,1872,2018,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates,"Long Beach city, California"
5,Other fuel,66,109,2018,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates,"Long Beach city, California"
6,Solar energy,558,318,2018,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates,"Long Beach city, California"
7,Total:,173432,4300,2018,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates,"Long Beach city, California"
8,Utility gas,115574,4820,2018,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates,"Long Beach city, California"
9,Wood,180,220,2018,ACS 1-Year Estimates Detailed Tables,American Community Survey,1-Year Estimates,"Long Beach city, California"
