In [None]:
# Dependencies
import numpy as np
import pandas as pd
import requests
import pymongo
import json
from tqdm import tqdm
from sqlalchemy import create_engine
from census import Census

# Google API Key
from config import gkey

# Census API Key
from config import census_key
c = Census(census_key, year=2017)

In [None]:
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E",         
                         ), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)



# Column Reordering and renaming
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name",
                                      "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add in Employment Rate (Employment Count / Population)
census_pd["Unemployment Rate"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)
#Data set snap shot
print(len(census_pd))
census_pd.head()

In [None]:
#Clean the data by dropping duplicates and setting the index
census_pd.drop_duplicates("Zipcode", inplace=True)
census_pd.set_index("Zipcode", inplace=True)

census_pd.head()
census_pd.info()

In [None]:
census_df = census_pd.reset_index()

census_df.head()
#census_pd.info()

In [None]:
# Save data as a csv so we don't have to keep hitting the API
# Note to avoid any issues later, use encoding="utf-8"
census_df.to_csv("Resources/census_data.csv", encoding="utf-8", index=False)

In [None]:
census_df.info()

In [None]:
# Add columns for lat, lng, airport name, airport address, airport rating
# Note that we used "" to specify initial entry.
census_df["Lat"] = ""
census_df["Lng"] = ""

census_df.head()

In [None]:
len(census_df)

In [None]:
# create a params dict that will be updated with new city each iteration
params = {"key": gkey}


# Loop through the cities_pd and run a lat/long search for each city
for index, row in tqdm(census_df.iterrows(),total = len(census_df)):
    
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"

    #city = row['City']
    #state = row['State']
    zip   = row['Zipcode']

    # update address key value
    #params['address'] = f"{city},{state}"
    #params['address'] = f"{state},{zip}"
    params['address'] = f"{zip}"

    # make request
    zipcode_lat_lng = requests.get(base_url, params=params)
    
    # print the zipcode_lat_lng url, avoid doing for public github repos in order to avoid exposing key
    # print(zipcode_lat_lng.url)
    
    # convert to json
    zipcode_lat_lng = zipcode_lat_lng.json()
    #print(zipcode_lat_lng)
    
    if len(zipcode_lat_lng["results"]) != 0:
        census_df.loc[index, "Lat"] = zipcode_lat_lng["results"][0]["geometry"]["location"]["lat"]
        census_df.loc[index, "Lng"] = zipcode_lat_lng["results"][0]["geometry"]["location"]["lng"]
    else:
        continue
        
# Snap view of data to confirm lat & lng applied
census_df.head()

In [None]:
census_df.to_csv("Resources/censusdata_geo.csv", encoding="utf-8", index=False)

In [None]:
census_df.head()

In [None]:
#Jsonify data for loading to MongoDB
records = json.loads(census_df.to_json(orient = "table"))["data"]

In [None]:
# Setup connection to mongodb
conn = "mongodb://localhost:27017"
#conn = "mongodb+srv://Luderoch:1234@accidentcluster.zvsni.mongodb.net/Accidentsdb?retryWrites=true&w=majority"
client = pymongo.MongoClient(conn)

# Select database and collection to use
db = client.US_Accidents

#Drops collection to avoid duplicates in database
db.census_db.drop()

census_db = db.census_db

census_db.insert_many(records)