# Data USA API Pulls - Statewide

In order to pull median property value, median income, and the age breakdown of every small town in Montana, I will need to call each of them by their unique identifier from the DataUSA API. 

The unique identifiers for each "place" in Data USA's database is stored in the table on this page: https://datausa.io/about/classifications/Geography/Place

I did a V-Lookup in Excel to match the IDs from the API to the towns for which I pulled listings (those with populations fewer than 2,500). 

In [1]:
# imports
import requests
import pandas as pd
from pandas.io.json import json_normalize
import csv
import json          

In [2]:
# import list of town names and unique ID
towns = pd.read_csv('placeIDs.towns.csv')
towns.head()

Unnamed: 0,Town,ID Look up
0,"East Missoula, MT",16000US3023200
1,"Conrad, MT",16000US3017275
2,"Red Lodge, MT",16000US3061525
3,"Pablo, MT",16000US3056350
4,"Colstrip, MT",16000US3016525


In [3]:
# pull out the IDs
IDs = towns['ID Look up'].tolist()
len(IDs)

273

In [None]:
# note, not all towns are officially "census-designated areas" so some are not available
# median home values collected below 

home_values = {}

for ID in IDs :
    
    # identify the URL for each unique ID
    # this specifies the ID and that I only want the latest year
    url = (f'https://datausa.io/api/data?measure=Property%20Value,Property%20Value%20Moe&Geography={ID}&year=latest')
    
    # call API
    results = requests.get(url).json()
    
    items = []
    
    for item in results['data'] :
        home_info = item
        home_values[ID] = home_info
 

In [None]:
# save to a transposed dataframe
home_values = pd.DataFrame(home_values)
median_home_values = home_values.T

median_home_values.head()

In [None]:
# save to csv
median_home_values.to_csv('statewide_median_home_values.csv')

In [None]:
# median household income values collected below

income_values = {}

for ID in IDs :
    
    # identify the URL for each unique ID
    # this specifies the ID and that I only want the latest year
    url = (f'https://datausa.io/api/data?measure=Household%20Income%20by%20Race&Geography={ID}&year=latest')
    
    # call API
    results = requests.get(url).json()
    
    items = []
    
    for item in results['data'] :
        income_info = item
        income_values[ID] = income_info

In [None]:
# save to a transposed dataframe
income_values = pd.DataFrame(income_values)
median_income_values = income_values.T

median_income_values.head()

In [None]:
# save to csv
median_income_values.to_csv('statewide_median_income_values.csv')

In [4]:
# the code below grab the age distribution of all towns

age = {}

for ID in IDs :
    
    # identify the URL for each unique ID
    # this specifies the ID and that I only want the latest year
    url = (f'https://datausa.io/api/data?Geography={ID}&measures=Birthplace&drilldowns=Age&year=latest')
    
    # call API
    try :
        results = requests.get(url).json()
    except :
        print(f"error for {ID}")
    items = []
    
    for item in results['data'] :
        age_info = item
        age[ID] = age_info


error for 16000US3003475


In [5]:
# save to a transposed dataframe
ages = pd.DataFrame(age)
age_data = ages.T

age_data.head()

Unnamed: 0,ID Age,Age,ID Year,Year,Birthplace,Geography,ID Geography,Slug Geography
16000US3023200,10,75 Years & Over,2019,2019,110,"East Missoula, MT",16000US3023200,east-missoula-mt
16000US3017275,10,75 Years & Over,2019,2019,304,"Conrad, MT",16000US3017275,conrad-mt
16000US3061525,10,75 Years & Over,2019,2019,287,"Red Lodge, MT",16000US3061525,red-lodge-mt
16000US3056350,10,75 Years & Over,2019,2019,92,"Pablo, MT",16000US3056350,pablo-mt
16000US3016525,10,75 Years & Over,2019,2019,20,"Colstrip, MT",16000US3016525,colstrip-mt


In [None]:
# save to csv
age_data.to_csv('statewide_age_data.csv')

In [6]:
# the code below pulls population data

population = {}

for ID in IDs :
    
    # identify the URL for each unique ID
    # this specifies the ID and that I only want the latest year
    url = (f'https://datausa.io/api/data?Geography={ID}&measure=Population&year=latest')
    
    # call API
    try :
        results = requests.get(url).json()
    except :
        print(f"error for {ID}")
    
    items = []
    
    for item in results['data'] :
        population_info = item
        population[ID] = population_info  

In [7]:
# save to a transposed dataframe
populations = pd.DataFrame(population)
population_data = populations.T

population_data.head()

Unnamed: 0,ID Year,Year,Population,Geography,ID Geography,Slug Geography
16000US3023200,2019,2019,2111,"East Missoula, MT",16000US3023200,east-missoula-mt
16000US3017275,2019,2019,2633,"Conrad, MT",16000US3017275,conrad-mt
16000US3061525,2019,2019,2212,"Red Lodge, MT",16000US3061525,red-lodge-mt
16000US3056350,2019,2019,2274,"Pablo, MT",16000US3056350,pablo-mt
16000US3016525,2019,2019,2448,"Colstrip, MT",16000US3016525,colstrip-mt


In [8]:
# save to csv
population_data.to_csv('statewide_population_data.csv')