In [1]:
# Dependencies
import pandas as pd
import requests
from census import Census
import json

# Census & gmaps API Keys
from config import (api_key,gkey)

# Build the endpoint URL
base_url = ('https://maps.googleapis.com/maps/api/geocode/json?')

# declare csv output path
csv_path = '../Resources/census_city.csv'

c = Census(api_key, year=2018)

In [2]:
# Get Census Population Data for cities
census_data = c.acs5.get(("NAME", "B01003_001E"), {'for': 'place:*'})

# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column Reordering
census_df = census_df.rename(columns={"B01003_001E": "Population",
                                      "NAME": "City", "place": "Place"})

print(f'There are {len(census_df)} cities in the census_df dataset.')

There are 29573 cities in the census_df dataset.


In [3]:
# create df of cities where the population is greater than 500000
big_city_df = census_df.loc[census_df['Population'] > 500000]
                            
print(f'There are {len(big_city_df)} cities in the big city dataset.')
big_city_df.head()

There are 34 cities in the big city dataset.


Unnamed: 0,City,Population,state,Place
2838,"Las Vegas city, Nevada",626637.0,32,40000
3411,"Albuquerque city, New Mexico",559202.0,35,2000
4321,"Charlotte city, North Carolina",841611.0,37,12000
4439,"New York city, New York",8443713.0,36,51000
6654,"Columbus city, Ohio",867628.0,39,18000


In [4]:
# Create a df with City and State values separated
City_State = big_city_df["City"].str.split(" city,", n = 1, expand = True) 

# merge city_state with the big city df
city_merge_df = pd.merge(big_city_df, City_State, right_index=True, left_index=True)

# rename the city and state columns
city_merge_df.rename(columns = {0 :'Clean_City', 1 : 'Clean_State'}, inplace = True)

# create a City_State column
city_merge_df['City_State'] = city_merge_df['Clean_City'].map(str) +','+ city_merge_df['Clean_State'].map(str)

# create columns for lat and lng
city_merge_df["Lat"] = ""
city_merge_df["Lng"] = ""

# loop through city merge df and set lat and lng for each record
for index, row in city_merge_df.iterrows():
    
    url = base_url + 'address=' + row[0] + '&key=' + gkey      
    response = requests.get(url).json() 
    
    city_merge_df.loc[index, "Lat"] = response["results"][0]["geometry"]["location"]["lat"]
    city_merge_df.loc[index, "Lng"] = response["results"][0]["geometry"]["location"]["lng"]

# create final city df
final_city_df = city_merge_df[['City_State','Clean_City','Population','Lat','Lng']]

# print head to screen (this takes several seconds)
final_city_df.head()

Unnamed: 0,City_State,Clean_City,Population,Lat,Lng
2838,"Las Vegas, Nevada",Las Vegas,626637.0,36.1699,-115.14
3411,"Albuquerque, New Mexico",Albuquerque,559202.0,35.0844,-106.65
4321,"Charlotte, North Carolina",Charlotte,841611.0,35.2271,-80.8431
4439,"New York, New York",New York,8443713.0,40.7128,-74.006
6654,"Columbus, Ohio",Columbus,867628.0,39.9612,-82.9988


In [5]:
# write csv file of the city data to the Respurces folder
final_city_df.to_csv(csv_path)