In [None]:
# Import Dependencies
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import requests
import json
import hvplot.pandas

# Import the API key
from config import api_key

In [None]:
# Read in our csv file with city info and put it into a dataframe
file_path = Path("worldcities.csv")
city_df = pd.read_csv(file_path)

city_df

In [None]:
# Filter only for cities in the US with a population of 100,000 residents or more
usa_df = city_df.loc[(city_df["country"] == "United States") & (city_df["population"] >= 100000)]

usa_df = usa_df.rename(columns={"admin_name": "state"})
usa_df["population"] = usa_df["population"].astype("int64")

usa_df

In [None]:
# Get a statistical overview of our columns
usa_df.describe()

In [None]:
# Ensure that all columns have the proper data-types
usa_df.dtypes

In [None]:
# Add new Air Quality Index Level columns for each city in dataframe
usa_df["AQI"] = " "
usa_df["CO"] = " "
usa_df["NO"] = " "
usa_df["NO2"] = " "
usa_df["O3"] = " "
usa_df["SO2"] = " "
usa_df["PM2.5"] = " "
usa_df["PM10"] = " "
usa_df["NH3"] = " "

usa_df

In [None]:
# Set base URL and params to search for current air pollution data
base_url = "http://api.openweathermap.org/data/2.5/air_pollution?"

params = {"appid":api_key}

# Print a message to follow up the air pollution search
print("Starting city air pollution details search:")

# Iterate through the usa_df dataframe
for index, row in usa_df.iterrows():
    
    # Get city name, latitude, and longitude values for each row in dataframe
    city = row["city"]
    lat = row["lat"]
    lng = row["lng"]
    
    # Add the current city's latitude and longitude to the params dictionary
    params["lat"] = lat
    params["lon"] = lng
    
    # Make an API request using params dictionary and store it into a json
    response = requests.get(base_url, params=params)
    data = response.json()
    
    # Drill down to get the values we are looking for in the json and store those values in our dataframe
    try:
        usa_df.loc[index, "AQI"] = data["list"][0]["main"]["aqi"]
        usa_df.loc[index, "CO"] = data["list"][0]["components"]["co"]
        usa_df.loc[index, "NO"] = data["list"][0]["components"]["no"]
        usa_df.loc[index, "NO2"] = data["list"][0]["components"]["no2"]
        usa_df.loc[index, "O3"] = data["list"][0]["components"]["o3"]
        usa_df.loc[index, "SO2"] = data["list"][0]["components"]["so2"]
        usa_df.loc[index, "PM2.5"] = data["list"][0]["components"]["pm2_5"]
        usa_df.loc[index, "PM10"] = data["list"][0]["components"]["pm10"]
        usa_df.loc[index, "NH3"] = data["list"][0]["components"]["nh3"]
        
        
    except Exception as e:
        # If city is not found, log the error.
        print(f"{e.args} not found for {usa_df.loc[index, 'city']}")
        
    print(f"Appending results for {city}")

#####  Air Quality Index. Possible values: 1, 2, 3, 4, 5. Where 1 = Good, 2 = Fair, 3 = Moderate, 4 = Poor, 5 = Very Poor.

In [None]:
# Preview the updated dataframe and sort the results by AQI in descending order
usa_df.head(20).sort_values(by="AQI", ascending=False)

In [None]:
# Build scatter plot for 
x = usa_df["population"]
y = usa_df["CO"]

plt.scatter(x, y, edgecolors="black")

# Incorporate the other graph properties
plt.title("Population vs. Carbon Monoxide Levels")
plt.xlabel("Population")
plt.ylabel("Carbon Monoxide (CO)")

plt.grid()

# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()

In [None]:
x = usa_df["lat"]
y = usa_df["AQI"]

plt.scatter(x, y, edgecolors="black")

# Incorporate the other graph properties
plt.title("City Latitude vs. AQI")
plt.xlabel("Latitude")
plt.ylabel("Air Quality Index")

plt.grid()

# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

In [None]:
%%capture --no-display

# Configure the map plot
map_plot_1 = usa_df.hvplot.points("lng", "lat",  geo = True, tiles = "OSM", color = "city",
                                  hover_cols = ["city", "state", "population"], frame_width=700, frame_height=500)

# Display the map
map_plot_1

In [None]:
""" base_url = "http://api.openweathermap.org/data/2.5/air_pollution/history?"

start = 1672560000
end = 1689776640

params = {"start":start, 
          "end": end,
          "appid":api_key}

counter = 0

for index, row in usa_df.iterrows():
    lat = row["lat"]
    lng = row["lng"]
    
    params["lat"] = lat
    params["lon"] = lng
    
    response = requests.get(base_url, params=params)
    data = response.json()
    
    print(json.dumps(data, indent=4))
    
    
    try:
        city_df.loc[index, "CO"] = response["stations"][0]["CO"]
        
    except Exception as e:
        print(f"{e.args} not found for {city_df.loc[index, 'city']}") """