# US Census - Extraction

### Import Libraries

In [36]:
import requests
import pandas as pd


### Parameters

In [19]:
URL = "https://data.census.gov/table?q=B25040&g=160XX00US0643000&tid=ACSDT1Y2021.B25040"
API_METADATA = "https://data.census.gov/api/search/metadata/table?id=ACSDT1Y2021.B25040&g=160XX00US0643000"
API_TABLE = "https://data.census.gov/api/access/data/table?id=ACSDT1Y2021.B25040&g=160XX00US0643000"

In [3]:
headers =  {
    "authority": "data.census.gov",
    "method": "GET",
    "path": "/table?q=B25040&g=160XX00US0643000&tid=ACSDT1Y2021.B25040",
    "scheme": "https",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "en-US,en;q=0.9",
    "Cache-Control": "max-age=0",
    "Sec-Ch-Ua": "\"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Google Chrome\";v=\"114\"",
    "Sec-Ch-Ua-Mobile": "?0",
    "Sec-Ch-Ua-Platform": "\"Windows\"",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "same-origin",
    "Sec-Fetch-User": "?1",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

In [29]:
def get_metadata_content(API_METADATA, headers):
    """
    Gets metadata content from API
    """
    r = requests.get(API_METADATA, headers=headers)
    metadata_content = r.json()

    return metadata_content

In [45]:
def get_mapping_dict(metadata_content):
    """
    Gets mapping dictionary from metadata content
    """
    mapping_content = metadata_content['response']['metadataContent']['dimensions'][2]['categories']
    # Note - M is for margin of error and E is for estimate
    mapping_dict = {}

    for item in mapping_content:
        label = item['label']

        corresponding_ids = item['item_mapping']
        for corresponding_id in corresponding_ids:
            if 'E' in corresponding_id:
                mapping_dict[corresponding_id] = f"{label}-Estimate"
            else:
                mapping_dict[corresponding_id] = f"{label}-Margin of Error"

    return mapping_dict

In [61]:
def get_data_table(API_TABLE, headers, mapping_dict):
    """
    Gets data table from API and parses the data
    into a clean dataframe
    """
    r = requests.get(API_TABLE, headers=headers)
    table_content = r.json()['response']

    labels = table_content['data'][0]
    data = table_content['data'][1]

    df = pd.DataFrame.from_dict({"labels": labels, "data": data})
    df = df.loc[df['labels'].isin(mapping_dict.keys())]
    df.replace(mapping_dict, inplace=True)
    df.dropna(inplace=True)

    # Split 'labels' into 'label' and 'metric'
    df[['label','metric']] = df['labels'].str.rsplit('-', n=1, expand=True)

    # Pivot to get 'Estimate' and 'Margin of Error' as separate columns
    df_pivot = df.pivot(index='label', columns='metric', values='data')

    # Reset index
    df_pivot = df_pivot.reset_index()

    # Rename columns
    df_pivot.columns.name = ''
    df_pivot.rename(columns={'Estimate': 'estimate', 'Margin of Error': 'margin of error'}, inplace=True)

    return df_pivot

### Tests

In [46]:
metadata_content = get_metadata_content(API_METADATA, headers)

In [52]:
dataset_info = metadata_content['response']['metadataContent']['dataset']

In [62]:
mapping_dict = get_mapping_dict(metadata_content)

In [64]:
df = get_data_table(API_TABLE, headers, mapping_dict)

In [65]:
df

Unnamed: 0,label,estimate,margin of error
0,"Bottled, tank, or LP gas",1688,522
1,Coal or coke,0,218
2,Electricity,49934,3409
3,"Fuel oil, kerosene, etc.",106,128
4,No fuel used,11943,1910
5,Other fuel,207,186
6,Solar energy,708,428
7,Total:,172599,4170
8,Utility gas,107957,4059
9,Wood,56,96
