### Resources
#### https://catalog.data.gov/dataset/census-api-by-coordinates
#### https://www.programmableweb.com/category/all/apis?keyword=zip%20code
#### https://www.census.gov/geo/maps-data/data/geocoder.html
#### https://www.census.gov/data/developers/data-sets/popest-popproj.html
#### https://www.census.gov/content/dam/Census/data/developers/api-user-guide/api-guide.pdf

In [1]:
#import libraries and api keys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import os
import requests
import json
from urllib import 
#from keys.gitignore import 
#from keys import 

## Gather Zip Codes within 20 mile radius of city

In [233]:
# Create a dictionary of the cities' names and their zipcodes
cities = {'Cleveland':44114,
          'LA': 90071,
          'Austin': 78705,
          'NYC': 10019,
          'Denver': 80202
         }
# APi key for https://www.zipcodeapi.com/API
api = "F1Q5equ8jewDKQnEnJ5o2SYzmUj0WJr7wjbvYlHLG1xlGqrc5XNzVYYRC4RB3cT1"

# Loop through the cities to get the required zip_code
for city in cities:
    # Create an api request from the website 
    data_1 = requests.get(f"https://www.zipcodeapi.com/rest/F1Q5equ8jewDKQnEnJ5o2SYzmUj0WJr7wjbvYlHLG1xlGqrc5XNzVYYRC4RB3cT1/radius.csv/{cities[city]}/20/mile")
    
    # Create an empty list to save the results later
    zipcode_data = []
    # Get the text as str for further processing and slpit them
    text = data_1.text
    for row in text.split('\n'):
        zipcode_data.append(row.split(','))
    
    # Create a data Frame for the results collected from preivous step
    df = pd.DataFrame(zipcode_data)
    #Set the columns index 
    df = df.set_axis(['zip_code','distance','city','state'], axis='columns', inplace=False)
    # Drop the first row which contains unnecessarily string data
    df = df.drop(0)
    # Convert to a numeric data from distance
    df['distance'] = pd.to_numeric(df['distance'])
    # Sort the values by ascending order
    df = df.sort_values(by='distance', ascending=True)
    # Save it to a csv file by the city name
    df.to_csv(f'Zipcodes/{city}.csv')

## Merge zip codes, ZCTA and median age data into single dataframe

In [None]:
#define city function
def city_age(city_name):

    #load in csv files for each city
    city_file = "Zipcodes/" + city_name + ".csv"
    city_df = pd.read_csv(city_file)
    city_df = city_df.drop(city_df.columns[0], axis=1)
    city_df['zip_code'] = city_df['zip_code'].astype(str).str[:-2]

    #https://www.udsmapper.org/zcta-crosswalk.cfm
    #load in csv for ZTAC info
    zip_file = "Zipcodes/zip_to_zcta_2018.csv"
    zip_df = pd.read_csv(zip_file)

    #convert zip code to string to add back zeroes
    zip_df["zip_code"] = zip_df["zip_code"].astype(str)

    for index, row in zip_df.iterrows():
    
        length = len(zip_df.at[index, "zip_code"])
        ZIP = zip_df.at[index, "zip_code"]
    
        if (5 - length) == 2:
            new_zip = "00" + ZIP
            zip_df.at[index, "zip_code"] = new_zip
        else:
            if (5 - length) == 1:
                new_zip = "0" + ZIP
                zip_df.at[index, "zip_code"] = new_zip

    #convert zcta to string to add back zeroes
    zip_df["zcta"] = zip_df["zcta"].astype(str)

    for index, row in zip_df.iterrows():
    
        length = len(zip_df.at[index, "zcta"])
        ZCTA = zip_df.at[index, "zcta"]
    
        if (5 - length) == 2:
            new_zcta = "00" + ZCTA
            zip_df.at[index, "zcta"] = new_zcta
        else:
            if (5 - length) == 1:
                new_zcta = "0" + ZCTA
                zip_df.at[index, "zcta"] = new_zcta
            
    #join ZCTA data to city df
    df_merge = pd.merge(city_df, zip_df, on="zip_code")

    #create data frame for zip to zcta mapping
    age_file = "Zipcodes/DEC_10_SF1_QTP1_with_ann.csv"
    age_df = pd.read_csv(age_file)

    #convert zcta to string to add back zeroes
    age_df["zcta"] = age_df["zcta"].astype(str)

    for index, row in age_df.iterrows():
    
        length = len(age_df.at[index, "zcta"])
        ZCTA2 = age_df.at[index, "zcta"]
    
        if (5 - length) == 2:
            new_zcta2 = "00" + ZCTA2
            age_df.at[index, "zcta"] = new_zcta2
        else:
            if (5 - length) == 1:
                new_zcta2 = "0" + ZCTA2
                age_df.at[index, "zcta"] = new_zcta2

    df_final= pd.merge(df_merge, age_df, on="zcta")
    clean_df = df_final[df_final["median age"] != 0]
    clean_df.reset_index(inplace=True, drop=True)
    return clean_df.head()

## Create Dataframe for each City

In [None]:
aus_df = city_age("Austin")
cle_df = city_age("Cleveland")
den_df = city_age("Denver")
nyc_df = city_age("NYC")
la_df = city_age("LA")

# Analysis