In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as st
import gmaps
import os

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Import API key
from api_keys import gkey
from api_keys import weather_key
gmaps.configure(api_key=gkey)

# Save config information.
map_url = "https://maps.googleapis.com/maps/api/geocode/json?"

# Build partial query URL
map_query_url = f"{map_url}&key={gkey}&address="

# Input File (CSV)
input_data_file_city = r"C:\Users\micha\Documents\AirQuality\annual_aqi_by_cbsa_2019.csv"
input_data_file_county = r"C:\Users\micha\Documents\AirQuality\annual_aqi_by_county_2019.csv"

aqi_city = pd.read_csv(input_data_file_city)
aqi_county = pd.read_csv(input_data_file_county)

ModuleNotFoundError: No module named 'api_key'

In [None]:
#CREATING DATA BY STATE


aqi_county_groups = aqi_county.set_index('State').groupby(['State'])

# Get Average data by State
good_days = aqi_county_groups["Good Days"].mean()
moderate_days = aqi_county_groups["Moderate Days"].mean()
unhealthy_days = aqi_county_groups["Unhealthy Days"].mean()
very_unhealthy_days = aqi_county_groups["Very Unhealthy Days"].mean()
hazardous_days = aqi_county_groups["Hazardous Days"].mean()
days_co = aqi_county_groups["Days CO"].mean()
days_no2 = aqi_county_groups["Days NO2"].mean()
days_ozone = aqi_county_groups["Days Ozone"].mean()
days_so2 = aqi_county_groups["Days SO2"].mean()

# Create DataFrame
aqi_state_summary = pd.DataFrame({"Good Days" : good_days,
                                  "Moderate Days" : moderate_days,
                                  "Unhealthy Days" : unhealthy_days,
                                  "Very Unhealthy Days" : very_unhealthy_days,
                                  "Hazardous Days" : hazardous_days,
                                  "Days CO" : days_co,
                                  "Days NO2" : days_no2,
                                  "Days Ozone" : days_ozone,
                                  "Days SO2" : days_so2                                  
                             })

aqi_state_summary.head()

In [None]:
# Top 5 States with Hazardous Days
aqi_state_summary.sort_values(by='Good Days', ascending=False).head()

In [None]:
# Top 5 States with Days CO
aqi_state_summary.sort_values(by='Days CO', ascending=False).head()

In [None]:
# Top 5 States with Days NO2
aqi_state_summary.sort_values(by='Days NO2', ascending=False).head()

In [None]:
# Top 5 States with higher than normal Ozone
aqi_state_summary.sort_values(by='Days Ozone', ascending=False).head()

In [None]:
# Top 5 States with Days SO2
aqi_state_summary.sort_values(by='Days SO2', ascending=False).head()

In [None]:
#CREATING DATA BY CITY

# Data is not collected for all 365 days by each city so an accurate picture cannot be attained with certainty. 
#This has to be taken into consideration when looking at results

#collected data percentage 
aqi_city["Year Coverage"] = aqi_city["Days with AQI"] / 365
aqi_city["Days Ozone Percent"] = aqi_city["Days Ozone"]/aqi_city["Days with AQI"]

# Cities with best data coverage for evaluation (atleast 70% or higher data collected in 365 days)  
coverage = aqi_city.loc[aqi_city["Year Coverage"] >= .70]
coverage_df = pd.DataFrame(coverage)

# Top 5 cities with best air quality
coverage_df.sort_values(['Days Ozone'], ascending=[False]).head()

In [None]:
# The original imported dataset for AQI does not have Latitude and Longitude for each city.  In order to map it, lat & Lng
# is pulled from google

available_cities = []
state = []
lats = []
lngs = []
medianAQI = []
days_ozone = []
hazardous_days = []
percentile = []

print("----------Pulling City Lat and Lng data------------")

#loop through all the cities from the imported dataset
for i in range (len(aqi_city['CBSA'])):
    try:
        #handle spaces in city names
        response = requests.get(map_query_url + aqi_city['CBSA'][i].replace(" ","+")).json()

        #gather city data
        available_cities.append(aqi_city['CBSA'][i])
        
        #get state from city name
        state.append(aqi_city['CBSA'][i].rsplit(", ")[1])
        
        #get lat & Lng from json response
        lat = round(response['results'][0]['geometry']['location']['lat'], 2)
        lng = round(response['results'][0]['geometry']['location']['lng'], 2)
        lats.append(lat)
        lngs.append(lng)
        
        #collect medianAQI for found cities
        medianAQI.append(aqi_city['Median AQI'][i])
        
        #collect days ozone for found cities
        days_ozone.append(aqi_city['Days Ozone'][i])
        
        #collect hazardous days for found cities
        hazardous_days.append(aqi_city['Hazardous Days'][i])
        
        #collect percentile AQI 
        percentile.append(aqi_city['90th Percentile AQI'][i])

        #print city data as its aquired
        print(aqi_city['CBSA'][i] + ", Lat:" + str(lat) + ", Lng:" + str(lng))
              
    except Exception:
        #print city name that was not found
        print(aqi_city['CBSA'][i] + " not found!")

print("-------------Data Read Completed-------------")

available_city_data = {
    "City":available_cities,
    "State": state,
    "Lat":lats,
    "Lng":lngs,
    "AQI": medianAQI,
    "Days Ozone": days_ozone,
    "Hazardous Days": hazardous_days,
    "90th Percentile AQI": percentile
}

In [None]:
map_ready_data = pd.DataFrame(available_city_data)
map_ready_data.head()

In [None]:
zipped_latlng = list(zip(CBSA,lat,lng,max_AQI,PM2_5))

In [None]:
dataMap_df = pd.DataFrame(zipped_latlng, columns = ['CBSA' ,'Latitude','Longitude','Days Ozone','PM2_5'])
dataMap_df

In [None]:
AZ_data = map_ready_data.loc[map_ready_data["State"] == 'AZ']

#create bar chart
x_axis = np.arange(len(AZ_data))
plt.bar(x_axis, AZ_data["Days Ozone"], align="center")
tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, AZ_data["City"], rotation="vertical")
plt.title('Recorded Days Ozone in AZ - 2019')
plt.xlabel('Cities')
plt.show()
AZ_data

# Conclusion: Phoenix metropolitan area does experience Meduim to Good ozone exposure levels.
# Payson Az and Phoenix metropolitan area are both in medium to good ozone due to high pollutants in the air.
# Flagstaff and Prescott, AZ has the higher number of Good Ozone with low median AQI while Lake Havasu didn't have enough sample data.

In [None]:
# line plot 

aqi_city["Year Coverage"] = aqi_city["Days with AQI"] / 365
aqi_city["Days Ozone Percent"] = aqi_city["Good Days"]/aqi_city["Days with AQI"]

# Cities with best data coverage for evaluation (atleast 70% or higher data collected in 365 days)  
coverage = aqi_city.loc[aqi_city["Year Coverage"] >= .70]
coverage_df = pd.DataFrame(coverage)

line_plot_df = coverage_df.reset_index()
line_plot_df
line_plot_final = line_plot_df[["Year Coverage", "Days Ozone", "Days with AQI"]]
line_plot_final
lines = line_plot_final.plot.line()

In [None]:
from IPython.core.display import Image, display
display(Image(r'C:\Users\micha\OneDrive\Desktop\img\goodair.png', width=1900, unconfined=True))

In [None]:
# Hypothesis: An assumption can be made that In general, as concentrations of ground-level ozone increase, 
#both the number of people affected and the seriousness of the health effects increase. 
#Also, more people with lung disease visit doctors or emergency rooms and are admitted to the hospital. 
#When ozone levels are very high, everyone should be concerned about ozone exposure.
#Congested metropolitan city like Phoenix with high traffic volume and more industries would show moderate to Good Days of Ozone.
#(AQI between 0-50) than cities that have lower population, low traffic and no pollution creating industry