# Census Housing by Age and State ETL

In this notebook data is extracted from the Census Bureau's American Community Survey 5-Year Data and loaded into a MongoDB database.

We will be using the Subject Tables, extracting variable from the DEMOGRAPHIC CHARACTERISTICS FOR OCCUPIED HOUSING UNITS group (S2502). The variables in question are percentages of Total Occupied Home, Owner Occupied Homes, and Renter Occupied Homes by age and state over the years 2010-2019.




In [1]:
#dependencies
import pandas as pd
import numpy as np
import pymongo
from census import Census
from config import census_key
api_key = census_key

### Extract and Transform

In [2]:
#transformation function to apply to every dataset 

def transform_census_age_data(census_data, year):
    
    data = census_data
    
    df = pd.DataFrame(data)
    
    rename_dict = {}

    for name in df.columns:
        code = name[-4:]
        if code == "011E":
            rename_dict[name] = "<35"
        elif code == "012E":
            rename_dict[name] = "35-44" 
        elif code == "013E":
            rename_dict[name] = "45-54"
        elif code == "014E":
            rename_dict[name] = "55-64"
        elif code == "015E":
            rename_dict[name] = "65-74"
        elif code == "016E":
            rename_dict[name] = "75-84"
        elif code == "017E":
            rename_dict[name] = ">85"

    df.rename(columns = rename_dict, inplace = True)
    
    df["year"] = year
    
    return df

In [2]:
# API calls and transformation

#dictionary to hold output
census_extract = {
    "owner_occupied" : [],
    "renter_occupied" : []
}



c = Census(api_key, year=2018)

owner_us_occupied_data = c.acs5st.get(("NAME", "S2502_C04_011E", "S2502_C04_012E", "S2502_C04_013E",
                      "S2502_C04_014E","S2502_C04_015E", "S2502_C04_016E", "S2502_C04_017E"), {'for': 'us:*'})

renter_us_occupied_data = c.acs5st.get(("NAME", "S2502_C06_011E", "S2502_C06_012E", "S2502_C06_013E",
                      "S2502_C06_014E","S2502_C06_015E", "S2502_C06_016E", "S2502_C06_017E"), {'for': 'us:*'})

owner_state_occupied_data = c.acs5st.get(("NAME", "S2502_C04_011E", "S2502_C04_012E", "S2502_C04_013E",
                      "S2502_C04_014E","S2502_C04_015E", "S2502_C04_016E", "S2502_C04_017E"), {'for': 'state:*'})

renter_state_occupied_data = c.acs5st.get(("NAME", "S2502_C06_011E", "S2502_C06_012E", "S2502_C06_013E",
                      "S2502_C06_014E","S2502_C06_015E", "S2502_C06_016E", "S2502_C06_017E"), {'for': 'state:*'})

#apply transformation function
# total_home_df = transform_census_age_data(total_home_data, input_year)
# owner_occupied_df = transform_census_age_data(owner_occupied_data, input_year)
# renter_occupied_df = transform_census_age_data(renter_occupied_data, input_year)

#append to output dictionary
# census_extract["totals"].append(total_home_df)
# census_extract["owner_occupied"].append(owner_occupied_df)
# census_extract["renter_occupied"].append(renter_occupied_df)


In [5]:
owner_state_occupied_data


[{'NAME': 'Wisconsin',
  'S2502_C04_011E': 10.9,
  'S2502_C04_012E': 15.8,
  'S2502_C04_013E': 21.0,
  'S2502_C04_014E': 23.6,
  'S2502_C04_015E': 16.9,
  'S2502_C04_016E': 8.7,
  'S2502_C04_017E': 3.1,
  'state': '55'},
 {'NAME': 'Wyoming',
  'S2502_C04_011E': 13.8,
  'S2502_C04_012E': 15.5,
  'S2502_C04_013E': 18.1,
  'S2502_C04_014E': 23.7,
  'S2502_C04_015E': 17.8,
  'S2502_C04_016E': 8.2,
  'S2502_C04_017E': 2.9,
  'state': '56'},
 {'NAME': 'Puerto Rico',
  'S2502_C04_011E': 5.6,
  'S2502_C04_012E': 13.2,
  'S2502_C04_013E': 18.8,
  'S2502_C04_014E': 22.4,
  'S2502_C04_015E': 22.3,
  'S2502_C04_016E': 13.2,
  'S2502_C04_017E': 4.4,
  'state': '72'},
 {'NAME': 'Mississippi',
  'S2502_C04_011E': 10.0,
  'S2502_C04_012E': 15.2,
  'S2502_C04_013E': 19.7,
  'S2502_C04_014E': 22.8,
  'S2502_C04_015E': 18.7,
  'S2502_C04_016E': 10.3,
  'S2502_C04_017E': 3.4,
  'state': '28'},
 {'NAME': 'Missouri',
  'S2502_C04_011E': 11.6,
  'S2502_C04_012E': 15.4,
  'S2502_C04_013E': 19.5,
  'S2502_C04_

### Load

In [6]:
#establish MongoDB connection
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)
db = client.housing_db

#load in totals
db.total_housing_by_age.drop()
db.total_housing_by_age.find()

for record in census_as_dictionaries[0]:
    db.total_housing_by_age.insert_one(record)

#load in owner-occupied data
db.owner_occupied_housing_by_age.drop()
db.owner_occupied_housing_by_age.find()

for record in census_as_dictionaries[1]:
    db.owner_occupied_housing_by_age.insert_one(record)

    
#load in renter-occupied data
db.renter_occupied_housing_by_age.drop()
db.renter_occupied_housing_by_age.find()

for record in census_as_dictionaries[2]:
    db.renter_occupied_housing_by_age.insert_one(record)