In [1]:
#import Dependencies

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gmaps
import requests

from pprint import pprint
# Import API key
from api_keys import g_key
from api_keys import w_key
!jupyter nbextension enable --py gmaps 
from sklearn import preprocessing

%matplotlib inline

Enabling notebook extension jupyter-gmaps/extension...
      - Validating: ok


In [2]:
# Store filepath in a variable
csv_file = "Resources/AQ_byCBSA_19.csv"

# Read our Data file with the pandas library
# Not every CSV requires an encoding, but its good to be aware of this
csv_df = pd.read_csv(csv_file, encoding="ISO-8859-1")

#Preview the table 
csv_df.head(2)

Unnamed: 0,CBSA,CBSA Code,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days SO2,Days PM2.5,Days PM10
0,"Aberdeen, SD",10100,2019,60,52,8,0,0,0,0,77,56,24,0,0,0,0,54,6
1,"Aberdeen, WA",10140,2019,199,199,0,0,0,0,0,40,30,18,0,0,0,0,199,0


In [3]:
# Check for Duplicate data, seems there is none

duplicate_data = csv_df.loc[csv_df.duplicated(subset = ['CBSA','CBSA']),'CBSA Code'].unique()
duplicate_data


array([], dtype=int64)

In [4]:
clean_data = csv_df.drop(['Year'],axis =1)
clean_data.head(2)

Unnamed: 0,CBSA,CBSA Code,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days SO2,Days PM2.5,Days PM10
0,"Aberdeen, SD",10100,60,52,8,0,0,0,0,77,56,24,0,0,0,0,54,6
1,"Aberdeen, WA",10140,199,199,0,0,0,0,0,40,30,18,0,0,0,0,199,0


In [5]:

#Create x , where x the 'scores' column's values as float
#x = clean_data[['Days with AQI']].values.astype(float)

#Create a minimum and maximum processor object
#min_max_scaler = preprocessing.MinMaxScaler()

# Create an object to transfrom the data to fit minmax processor
#x_scaled = min_max_scaler.fit_transform(x)

#clean_data_normalized = pd.DataFrame(x_scaled)

#clean_data_normalized

In [6]:

#plt.figure(figsize=(10,5))

#clean_data_normalized.plot(kind = 'bar')


In [7]:
clean_data.head()

Unnamed: 0,CBSA,CBSA Code,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days SO2,Days PM2.5,Days PM10
0,"Aberdeen, SD",10100,60,52,8,0,0,0,0,77,56,24,0,0,0,0,54,6
1,"Aberdeen, WA",10140,199,199,0,0,0,0,0,40,30,18,0,0,0,0,199,0
2,"Adjuntas, PR",10260,57,53,4,0,0,0,0,58,44,18,0,0,0,0,57,0
3,"Adrian, MI",10300,181,145,36,0,0,0,0,90,57,40,0,0,94,0,87,0
4,"Akron, OH",10420,273,203,67,3,0,0,0,115,62,44,0,0,162,1,110,0


In [8]:
#clean_data.set_index('State')

In [9]:
# lists to hold responses
lat = []
lng = []
CBSA = []
max_AQI = []
PM2_5 = []

# Build the Query URL 
base_url =  "https://maps.googleapis.com/maps/api/geocode/json?"
query_url = f"{base_url}&key={g_key}&address="

# start iteration for API calls
for i in range (len(clean_data['CBSA'])):
    try:
        
        # call API and store a reponse
        response = requests.get(query_url + clean_data['CBSA'][i].replace(" ","+")).json()

        # append reponse data to respective lists
        latitude = round(response['results'][0]['geometry']['location']['lat'], 2)

        longitude = round(response['results'][0]['geometry']['location']['lng'], 2)
        
        CBSA.append(clean_data['CBSA'][i])
        lat.append(latitude)
        lng.append(longitude)
        max_AQI.append(clean_data['Max AQI'][i])
        PM2_5.append(clean_data['Days PM2.5'][i])
        
        
    except KeyError:
        print(clean_data['CBSA'][i] + " not found!")
        
print("Processing Complete")



Processing Complete


In [10]:
zipped_latlng = list(zip(CBSA,lat,lng,max_AQI,PM2_5))

In [11]:
dataMap_df = pd.DataFrame(zipped_latlng, columns = ['CBSA' ,'Latitude','Longitude','Max AQI','PM2_5'])
dataMap_df

Unnamed: 0,CBSA,Latitude,Longitude,Max AQI,PM2_5
0,"Aberdeen, SD",45.46,-98.49,77,54
1,"Aberdeen, WA",46.98,-123.82,40,199
2,"Adjuntas, PR",18.16,-66.72,58,57
3,"Adrian, MI",41.90,-84.04,90,87
4,"Akron, OH",41.08,-81.52,115,110
...,...,...,...,...,...
515,"Yakima, WA",46.60,-120.51,114,211
516,"York-Hanover, PA",39.80,-76.98,93,45
517,"Youngstown-Warren-Boardman, OH-PA",41.29,-80.54,100,74
518,"Yuba City, CA",39.14,-121.62,110,144


In [12]:

# Configure gmaps
gmaps.configure(api_key = g_key)

# Locations
locations = dataMap_df[["Latitude", "Longitude"]]
# weight variables 
max_aqi = dataMap_df["Max AQI"]
pm2_5 = dataMap_df["PM2_5"]



#create a gmaps obejct
fig = gmaps.figure()

# Create heat layer
heat_layer = gmaps.heatmap_layer(locations, weights = pm2_5, dissipating=False, max_intensity= 300 ,point_radius = 2)

# add layer to the figure
fig.add_layer(heat_layer)

# Display figure
fig


Figure(layout=FigureLayout(height='420px'))

In [13]:

# Using the template add PM2.5 ,MAX AQI and CBSA name to the heatmap
info_box_template = """
<dl>
<dt>CBSA</dt><dd>{CBSA}</dd>
<dt>MAX AQI</dt><dd>{Max AQI}</dd>
<dt>PM2.5</dt><dd>{PM2_5}</dd>
</dl>
"""

# Store the DataFrame Row

info = [info_box_template.format(**row) for index, row in dataMap_df.iterrows()]

locations = dataMap_df[["Latitude", "Longitude"]]

In [14]:
# Add marker layer ontop of heat map
markers = gmaps.marker_layer(locations, info_box_content= info)
fig.add_layer(markers)

# Display Map
fig


Figure(layout=FigureLayout(height='420px'))