In [5]:
# Importing libraries
import pandas as pd
from pathlib import Path
import csv
from config import api_key
import requests
from pprint import pprint
import random
import matplotlib.pyplot as plt
from datetime import datetime, timedelta


In [6]:
csv_path = Path("../Resources/global air pollution dataset.csv")
global_df = pd.read_csv(csv_path)
global_df.head()

Unnamed: 0,Country,City,AQI Value,AQI Category,CO AQI Value,CO AQI Category,Ozone AQI Value,Ozone AQI Category,NO2 AQI Value,NO2 AQI Category,PM2.5 AQI Value,PM2.5 AQI Category
0,Russian Federation,Praskoveya,51,Moderate,1,Good,36,Good,0,Good,51,Moderate
1,Brazil,Presidente Dutra,41,Good,1,Good,5,Good,1,Good,41,Good
2,Italy,Priolo Gargallo,66,Moderate,1,Good,39,Good,2,Good,66,Moderate
3,Poland,Przasnysz,34,Good,1,Good,34,Good,0,Good,20,Good
4,France,Punaauia,22,Good,0,Good,22,Good,0,Good,6,Good


In [7]:
global_df.columns

Index(['Country', 'City', 'AQI Value', 'AQI Category', 'CO AQI Value',
       'CO AQI Category', 'Ozone AQI Value', 'Ozone AQI Category',
       'NO2 AQI Value', 'NO2 AQI Category', 'PM2.5 AQI Value',
       'PM2.5 AQI Category'],
      dtype='object')

In [8]:
NorthAmerica_df = global_df.loc[global_df['Country'].isin(['United States of America', 'Canada', 'Mexico'])]
NorthAmerica_df.head()
NorthAmerica_df.count()

Country               3601
City                  3601
AQI Value             3601
AQI Category          3601
CO AQI Value          3601
CO AQI Category       3601
Ozone AQI Value       3601
Ozone AQI Category    3601
NO2 AQI Value         3601
NO2 AQI Category      3601
PM2.5 AQI Value       3601
PM2.5 AQI Category    3601
dtype: int64

In [9]:
unique_cities = NorthAmerica_df[['Country', 'City']].drop_duplicates().reset_index(drop=True)
cities_list = unique_cities['City']
cities_list.to_csv('../Resources/cities_list.csv')
unique_cities.to_csv('../Resources/citiesandcountry_list.csv')

# Making a list of cities. This is the entire list of 3601 countries 
list_cities = cities_list.to_list()

# Randomized list of 500 cities. This is probably better for plotting and to avoid hitting API limit
random_cities = random.sample(list_cities,50) # change to 500 or any number you want

# Shortened list of cities. Use this for testing code to avoid hitting API limit
short_cities = cities_list.head(3).tolist()


In [10]:
random_cities

['Toms River',
 'Asheboro',
 'Huetamo',
 'Willowbrook',
 'Cicero',
 'Bethel Park',
 'Houston',
 'Cincinnati',
 'Keene',
 'Omaha',
 'West Caldwell',
 'Merced',
 'Van Wert',
 'Sugar Hill',
 'Westerly',
 'Bexley',
 'Santa Rosalia',
 'Hempstead',
 'Shively',
 'Aguascalientes',
 'Flint',
 'Elizabeth City',
 'University Park',
 'Bergenfield',
 'Silao',
 'Atotonilco El Alto',
 'Zapotlanejo',
 'Yakima',
 'Ahualulco',
 'Dolores Hidalgo',
 'Bay City',
 'Hilo',
 'Metepec',
 'Anenecuilco',
 'Manassas Park',
 'Rossmoor',
 'Murrieta',
 'Riverton',
 'Burton',
 'Sunland Park',
 'Shrewsbury',
 'Dobbs Ferry',
 'Zacapu',
 'Cottonwood',
 'Durant',
 'Tarrytown',
 'Rapid City',
 'Salmon Creek',
 'Santa Cruz Del Valle',
 'Fairview Park']

In [11]:
# Acessing the open weather API to get lattitude and longitude values

url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

# Build partial query URL
query_url = f"{url}appid={api_key}&units={units}&q="

# Making API calls
lat = []
lon = []

# loop through list of cities
for city in random_cities:
    #print (city)
    
    response = requests.get(query_url + city).json()
    try:
        #print (response)
        lat.append(response['coord']['lat'])
        lon.append(response['coord']['lon'])
    except Exception as e:
        print(f'city not found or {e}')
        pass

city not found or 'coord'


In [12]:
# Accessing the open weather API to get air pollution values

air_url = 'http://api.openweathermap.org/data/2.5/air_pollution?' # current air pollution values

coord = [] # latitude and longitude coordinates
aqi = [] # air quality index
co = [] # Concentration of CO (Carbon monoxide), μg/m3
nh3 = [] # Concentration of NO (Nitrogen monoxide), μg/m3
no = [] # Сoncentration of NO2 (Nitrogen dioxide), μg/m3
no2 = [] # Сoncentration of O3 (Ozone), μg/m3
o3 = [] # Сoncentration of SO2 (Sulphur dioxide), μg/m3
pm10 = [] # Сoncentration of PM2.5 (Fine particles matter),
pm2_5 = [] # Сoncentration of PM10 (Coarse particulate matter), μg/m3
so2 = [] # Сoncentration of NH3 (Ammonia), μg/m3


for lt,ln in zip(lat,lon):
    response = requests.get(f'{air_url}lat={lt}&lon={ln}&appid={api_key}').json()
    #pprint (response)
    try:
        coord.append(response['coord'])
        aqi.append(response['list'][0]['main']['aqi'])
        co.append(response['list'][0]['components']['co'])
        nh3.append(response['list'][0]['components']['nh3'])
        no.append(response['list'][0]['components']['no'])
        no2.append(response['list'][0]['components']['no2'])
        o3.append(response['list'][0]['components']['o3'])
        pm10.append(response['list'][0]['components']['pm10'])
        pm2_5.append(response['list'][0]['components']['pm2_5'])
        so2.append(response['list'][0]['components']['so2'])
    except Exception as e:
        print (f'failed to get values for lat:{lt} and lon:{ln}. Error is: {e}')

In [21]:
coord


[{'lon': -74.1979, 'lat': 39.9537},
 {'lon': -79.8136, 'lat': 35.7079},
 {'lon': -118.2551, 'lat': 33.917},
 {'lon': -87.7539, 'lat': 41.8456},
 {'lon': -80.0395, 'lat': 40.3276},
 {'lon': -95.358, 'lat': 29.76},
 {'lon': -84.4569, 'lat': 39.162},
 {'lon': -72.2781, 'lat': 42.9337},
 {'lon': -95.9378, 'lat': 41.2586},
 {'lon': -74.3018, 'lat': 40.8409},
 {'lon': -120.7516, 'lat': 37.1666},
 {'lon': -84.5841, 'lat': 40.8695},
 {'lon': -84.0335, 'lat': 34.1065},
 {'lon': -71.8273, 'lat': 41.3776},
 {'lon': -82.9377, 'lat': 39.9689},
 {'lon': -112.2833, 'lat': 27.3167},
 {'lon': -73.619, 'lat': 40.706},
 {'lon': -85.8227, 'lat': 38.2001},
 {'lon': -102.3, 'lat': 21.8833},
 {'lon': -83.6875, 'lat': 43.0125},
 {'lon': -76.2511, 'lat': 36.2946},
 {'lon': -80.3676, 'lat': 25.7465},
 {'lon': -73.9974, 'lat': 40.9276},
 {'lon': -101.4333, 'lat': 20.9333},
 {'lon': -102.5167, 'lat': 20.55},
 {'lon': -103.0667, 'lat': 20.6333},
 {'lon': -120.6678, 'lat': 46.4665},
 {'lon': -101.1667, 'lat': 22.4}

In [13]:

# OpenWeatherMap API endpoint for getting weather data
url = "http://api.openweathermap.org/data/2.5/weather?"

# Parameters for the API request
units = "metric"
city = "New York"

# Build the complete query URL
query_url = f"{url}appid={api_key}&units={units}&q={city}"

# Make the API call to get weather data for New York
response = requests.get(query_url).json()

# Extract latitude and longitude if the city is found
try:
    lat = response['coord']['lat']
    lon = response['coord']['lon']
    print(f"Coordinates of New York: Latitude = {lat}, Longitude = {lon}")
except KeyError:
    print("City not found or invalid response from the API")


Coordinates of New York: Latitude = 40.7143, Longitude = -74.006


In [14]:
# OpenWeatherMap API endpoint for getting weather data
url = "http://api.openweathermap.org/data/2.5/weather?"

# Parameters for the API request
units = "metric"
city = "Los Angeles"

# Build the complete query URL
query_url = f"{url}appid={api_key}&units={units}&q={city}"

# Make the API call to get weather data for New York
response = requests.get(query_url).json()

# Extract latitude and longitude if the city is found
try:
    lat = response['coord']['lat']
    lon = response['coord']['lon']
    print(f"Coordinates of Los Angeles: Latitude = {lat}, Longitude = {lon}")
except KeyError:
    print("City not found or invalid response from the API")


Coordinates of Los Angeles: Latitude = 34.0522, Longitude = -118.2437


In [15]:
# OpenWeatherMap API endpoint for getting weather data
url = "http://api.openweathermap.org/data/2.5/weather?"

# Parameters for the API request
units = "metric"
city = "Chicago"

# Build the complete query URL
query_url = f"{url}appid={api_key}&units={units}&q={city}"

# Make the API call to get weather data for New York
response = requests.get(query_url).json()

# Extract latitude and longitude if the city is found
try:
    lat = response['coord']['lat']
    lon = response['coord']['lon']
    print(f"Coordinates of Chicago: Latitude = {lat}, Longitude = {lon}")
except KeyError:
    print("City not found or invalid response from the API")


Coordinates of Chicago: Latitude = 41.85, Longitude = -87.65


In [16]:
# OpenWeatherMap API endpoint for getting weather data
url = "http://api.openweathermap.org/data/2.5/weather?"

# Parameters for the API request
units = "metric"
city = "Houston"

# Build the complete query URL
query_url = f"{url}appid={api_key}&units={units}&q={city}"

# Make the API call to get weather data for New York
response = requests.get(query_url).json()

# Extract latitude and longitude if the city is found
try:
    lat = response['coord']['lat']
    lon = response['coord']['lon']
    print(f"Coordinates of Houston: Latitude = {lat}, Longitude = {lon}")
except KeyError:
    print("City not found or invalid response from the API")


Coordinates of Houston: Latitude = 29.7633, Longitude = -95.3633


In [17]:
# OpenWeatherMap API endpoint for getting weather data
url = "http://api.openweathermap.org/data/2.5/weather?"

# Parameters for the API request
units = "metric"
city = "Phoenix"

# Build the complete query URL
query_url = f"{url}appid={api_key}&units={units}&q={city}"

# Make the API call to get weather data for New York
response = requests.get(query_url).json()

# Extract latitude and longitude if the city is found
try:
    lat = response['coord']['lat']
    lon = response['coord']['lon']
    print(f"Coordinates of Phoenix: Latitude = {lat}, Longitude = {lon}")
except KeyError:
    print("City not found or invalid response from the API")


Coordinates of Phoenix: Latitude = 33.4484, Longitude = -112.074


In [18]:
#Do not use this code

from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook

# Activate inline plotting for Bokeh
output_notebook()


# New York coordinates
lat = 40.7143
lon = -74.006

# Time settings for the past five years (convert to Unix timestamps)
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=5*365) # 5 years ago

# Historical Air Pollution URL
air_history_url = "http://api.openweathermap.org/data/2.5/air_pollution/history"

# Placeholder lists to collect data
timestamps = []
aqi_values = []
co_values = []
no2_values = []
o3_values = []
pm10_values = []
pm2_5_values = []
so2_values = []

# Loop through time, getting data in monthly chunks
current_date = start_date
while current_date < end_date:
    # Set start and end of the month
    start_time_unix = int(current_date.timestamp())
    next_month = current_date + timedelta(days=30)
    end_time_unix = int(next_month.timestamp())
    
    # API Request
    response = requests.get(
        f"{air_history_url}?lat={lat}&lon={lon}&start={start_time_unix}&end={end_time_unix}&appid={api_key}"
    ).json()
    
    # Extract data if available
    try:
        for record in response['list']:
            timestamps.append(datetime.utcfromtimestamp(record['dt']))
            aqi_values.append(record['main']['aqi'])
            co_values.append(record['components']['co'])
            no2_values.append(record['components']['no2'])
            o3_values.append(record['components']['o3'])
            pm10_values.append(record['components']['pm10'])
            pm2_5_values.append(record['components']['pm2_5'])
            so2_values.append(record['components']['so2'])
    except KeyError:
        print(f"No data available for {current_date.strftime('%Y-%m')}")

    # Move to the next month
    current_date = next_month

# Create a DataFrame
data = pd.DataFrame({
    'timestamp': timestamps,
    'AQI': aqi_values,
    'CO': co_values,
    'NO2': no2_values,
    'O3': o3_values,
    'PM10': pm10_values,
    'PM2.5': pm2_5_values,
    'SO2': so2_values
})

# Plot using Bokeh
source = ColumnDataSource(data)

# Create a new plot
p = figure(
    title="Air Quality in New York Over 5 Years",
    x_axis_label='Date',
    y_axis_label='Concentration (μg/m3)',
    x_axis_type='datetime',
    width=900,  # Change from plot_width to width
    height=500  # Change from plot_height to height
)

# Add lines for different pollutants
p.line(x='timestamp', y='AQI', line_width=2, source=source, color='red', legend_label='AQI')
p.line(x='timestamp', y='CO', line_width=2, source=source, color='blue', legend_label='CO')
p.line(x='timestamp', y='NO2', line_width=2, source=source, color='green', legend_label='NO2')
p.line(x='timestamp', y='O3', line_width=2, source=source, color='purple', legend_label='O3')
p.line(x='timestamp', y='PM10', line_width=2, source=source, color='orange', legend_label='PM10')
p.line(x='timestamp', y='PM2.5', line_width=2, source=source, color='brown', legend_label='PM2.5')
p.line(x='timestamp', y='SO2', line_width=2, source=source, color='gray', legend_label='SO2')

# Customize legend
p.legend.location = "top_left"
p.legend.click_policy = "hide"

# Show plot
show(p)


In [19]:
import panel as pn

# Create widgets for interaction
pollutant_selector = pn.widgets.Select(name='Pollutant', options=['AQI', 'CO', 'NO2', 'O3', 'PM10', 'PM2.5', 'SO2'])

@pn.depends(pollutant_selector)
def update_plot(selected_pollutant):
    p_interactive = figure(
        title=f"{selected_pollutant} Over Time",
        x_axis_label='Date',
        y_axis_label='Concentration (μg/m3)',
        x_axis_type='datetime',
        width=900,
        height=500
    )
    p_interactive.line(x='timestamp', y=selected_pollutant, source=data, line_width=2)
    return p_interactive

# Create dashboard layout
dashboard = pn.Column(pollutant_selector, update_plot)
pn.serve(dashboard)

# Display the dashboard
dashboard


Launching server at http://localhost:55767


In [7]:
pip install jupyter_bokeh

Collecting jupyter_bokeh
  Downloading jupyter_bokeh-4.0.5-py3-none-any.whl.metadata (7.1 kB)
Collecting ipywidgets==8.* (from jupyter_bokeh)
  Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting comm>=0.1.3 (from ipywidgets==8.*->jupyter_bokeh)
  Downloading comm-0.2.2-py3-none-any.whl.metadata (3.7 kB)
Collecting widgetsnbextension~=4.0.12 (from ipywidgets==8.*->jupyter_bokeh)
  Downloading widgetsnbextension-4.0.13-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab-widgets~=3.0.12 (from ipywidgets==8.*->jupyter_bokeh)
  Downloading jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata (4.1 kB)
Downloading jupyter_bokeh-4.0.5-py3-none-any.whl (148 kB)
   ---------------------------------------- 0.0/148.6 kB ? eta -:--:--
   ---------------- ----------------------- 61.4/148.6 kB 1.6 MB/s eta 0:00:01
   ---------------------------------------- 148.6/148.6 kB 2.2 MB/s eta 0:00:00
Downloading ipywidgets-8.1.5-py3-none-any.whl (139 kB)
   -----------------------

In [10]:
from datetime import datetime, timedelta
import panel as pn
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook
import numpy as np
import pandas as pd
import requests
import time
from config import api_key

output_notebook()

# Enable Panel inline plotting in notebooks
pn.extension()

# Cities to analyze along with their latitude and longitude
cities_info = {
    'New York': {'lat': 40.7143, 'lon': -74.006},
    'Los Angeles': {'lat': 34.0522, 'lon': -118.2437},
    'Chicago': {'lat': 41.85, 'lon': -87.65},
    'Houston': {'lat': 29.7633, 'lon': -95.3633},
    'Phoenix': {'lat': 33.4484, 'lon': -112.074}
}


# Function to fetch air quality data from OpenWeather API
def fetch_air_quality_data(city, lat, lon, start_date, end_date):
    start_timestamp = int(datetime.strptime(start_date, '%Y-%m-%d').timestamp())
    end_timestamp = int(datetime.strptime(end_date, '%Y-%m-%d').timestamp())
    url = f"http://api.openweathermap.org/data/2.5/air_pollution/history"
    
    # OpenWeather API allows fetching data in hourly intervals
    params = {
        'lat': lat,
        'lon': lon,
        'start': start_timestamp,
        'end': end_timestamp,
        'appid': api_key
    }
    
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json().get('list', [])
        return data
    else:
        print(f"Failed to fetch data for {city}: {response.status_code}")
        return []

# Function to get air quality data for all cities and aggregate it into a DataFrame
def get_air_quality_data(cities_info, start_date, end_date):
    all_data = []
    for city, coords in cities_info.items():
        lat, lon = coords['lat'], coords['lon']
        city_data = fetch_air_quality_data(city, lat, lon, start_date, end_date)
        for entry in city_data:
            dt = datetime.utcfromtimestamp(entry['dt'])
            components = entry['components']
            all_data.append({
                'City': city,
                'Date': dt,
                'AQI': entry.get('main', {}).get('aqi', np.nan),  # You may need to calculate this from components
                'CO': components.get('co', np.nan),
                'NO2': components.get('no2', np.nan),
                'PM10': components.get('pm10', np.nan),
                'PM2.5': components.get('pm2_5', np.nan),
                'SO2': components.get('so2', np.nan)
            })
        time.sleep(1)  # To avoid exceeding the API rate limit
    return pd.DataFrame(all_data)

# Generate data for the given date range
start_date = '2019-01-01'
end_date = '2023-12-31'
air_quality_data = get_air_quality_data(cities_info, start_date, end_date)

# Function to create a Bokeh plot for multiple cities
def create_bokeh_plot(data, selected_cities):
    # Filter data for the selected cities
    city_data = data[data['City'].isin(selected_cities)]

    # Convert date column to datetime
    city_data['Date'] = pd.to_datetime(city_data['Date'])

    # Create a new plot
    p = figure(
        title=f"Air Quality Over 5 Years for Selected Cities",
        x_axis_label='Date',
        y_axis_label='Concentration (μg/m3)',
        x_axis_type='datetime',
        width=900,
        height=500
    )

    # Add lines for different pollutants for each selected city
    colors = ['blue', 'red', 'purple', 'orange', 'brown', 'gray']
    pollutants = ['CO', 'NO2','PM10', 'PM2.5', 'SO2']

    for i, city in enumerate(selected_cities):
        city_specific_data = city_data[city_data['City'] == city]
        source = ColumnDataSource(city_specific_data)
        
        for j, pollutant in enumerate(pollutants):
            p.line(
                x='Date', 
                y=pollutant, 
                line_width=2, 
                source=source, 
                color=colors[j % len(colors)], 
                legend_label=f"{pollutant} ({city})"
            )

    # Customize legend
    p.legend.location = "top_left"
    p.legend.click_policy = "hide"

    return p

# Panel widget for multiple city selection
city_selector = pn.widgets.MultiSelect(name='Cities', options=list(cities_info.keys()), size=5)

# Function to update the plot based on city selection
@pn.depends(city_selector.param.value)
def update_plot(selected_cities):
    if selected_cities:
        return create_bokeh_plot(air_quality_data, selected_cities)
    else:
        return "Please select at least one city."

# Create the dashboard layout
dashboard2 = pn.Column(city_selector, update_plot)

# Display the dashboard
dashboard2.show()

Launching server at http://localhost:53000


<panel.io.server.Server at 0x126938ecc50>

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_data['Date'] = pd.to_datetime(city_data['Date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_data['Date'] = pd.to_datetime(city_data['Date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_data['Date'] = pd.to_datetime(city_data['Date'])
A value is trying to be set on a copy of a 

In [3]:
pip install folium

Collecting folium
  Downloading folium-0.17.0-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting branca>=0.6.0 (from folium)
  Downloading branca-0.8.0-py3-none-any.whl.metadata (1.5 kB)
Downloading folium-0.17.0-py2.py3-none-any.whl (108 kB)
   ---------------------------------------- 0.0/108.4 kB ? eta -:--:--
   ---------------------------------------- 0.0/108.4 kB ? eta -:--:--
   --- ------------------------------------ 10.2/108.4 kB ? eta -:--:--
   ----------- --------------------------- 30.7/108.4 kB 325.1 kB/s eta 0:00:01
   --------------------------------- ----- 92.2/108.4 kB 655.4 kB/s eta 0:00:01
   -------------------------------------- 108.4/108.4 kB 625.3 kB/s eta 0:00:00
Downloading branca-0.8.0-py3-none-any.whl (25 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.8.0 folium-0.17.0
Note: you may need to restart the kernel to use updated packages.


In [5]:
import requests
import folium
from folium.plugins import HeatMap
from config import api_key

# Step 1: Set up the API and parameters

base_url = 'http://api.openweathermap.org/data/2.5/air_pollution'

# List of cities with their coordinates
cities = [
    {'name': 'New York', 'lat': 40.7143, 'lon': -74.006},
    {'name': 'Los Angeles', 'lat': 34.0522, 'lon': -118.2437},
    {'name': 'Chicago', 'lat': 41.85, 'lon': -87.65},
    {'name': 'Houston', 'lat': 29.7633, 'lon': -95.3633},
    {'name': 'Phoenix', 'lat': 33.4484, 'lon': -112.074}
]

# Step 2: Fetch air pollution data for each city
pollution_data = []

for city in cities:
    params = {
        'lat': city['lat'],
        'lon': city['lon'],
        'appid': api_key
    }
    response = requests.get(base_url, params=params)
    data = response.json()
    
    if response.status_code == 200 and 'list' in data:
        # Get PM2.5 concentration
        pm25 = data['list'][0]['components']['pm2_5']
        pollution_data.append([city['lat'], city['lon'], pm25])
    else:
        print(f"Failed to get data for {city['name']}")

# Step 3: Create a folium map
map_center = [39.8283, -98.5795]  # Center of the USA
pollution_map = folium.Map(location=map_center, zoom_start=4)

# Step 4: Add pollution data as a heat map
HeatMap(pollution_data).add_to(pollution_map)

# Step 5: Save the map to an HTML file
pollution_map.save('air_pollution_heat_map.html')

print("Air pollution heat map created and saved as 'air_pollution_heat_map.html'")


Air pollution heat map created and saved as 'air_pollution_heat_map.html'


In [22]:
unique_cities = NorthAmerica_df[['Country', 'City']].drop_duplicates().reset_index(drop=True)
cities_list = unique_cities['City']
cities_list.to_csv('../Resources/cities_list.csv')
unique_cities.to_csv('../Resources/citiesandcountry_list.csv')

# Making a list of cities. This is the entire list of 3601 countries 
list_cities = cities_list.to_list()

# Randomized list of 500 cities. This is probably better for plotting and to avoid hitting API limit
random_cities = random.sample(list_cities,500) # change to 500 or any number you want

# Shortened list of cities. Use this for testing code to avoid hitting API limit
short_cities = cities_list.head(3).tolist()

In [None]:
random_cities

['Gautier',
 'Westland',
 'Quartz Hill',
 'Pinewood',
 'Lowell',
 'Berea',
 'Groves',
 'Ahome',
 'Tenancingo',
 'Revere',
 'Riverdale',
 'Susanville',
 'Detroit',
 'Alton',
 'Bethpage',
 'Moorhead',
 'Vermilion',
 'Sun City',
 'Pearland',
 'Fredericton',
 'Oaxaca',
 'Hilliard',
 'Marshalltown',
 'Lakeside',
 'Winslow',
 'Fairhaven',
 'Artesia',
 'Hecelchakan',
 'Poplar Bluff',
 'Hammonton',
 'Friendswood',
 'Vernon',
 'Prairie Village',
 'Atenco',
 'Rohnert Park',
 'Commerce City',
 'Calkini',
 'Inkster',
 'Pittsfield',
 'Fort Madison',
 'Valparaiso',
 'Waltham',
 'Langley Park',
 'Ann Arbor',
 'Arlington',
 'Little Rock',
 'Pine Bluff',
 'Escondido',
 'Cheektowaga',
 'Alamo',
 'Tepatitlan',
 'Acaponeta',
 'Glasgow',
 'Sussex',
 'Wrentham',
 'San Sebastian El Grande',
 'Hercules',
 'Fair Oaks',
 'Ellicott City',
 'Allen Park',
 'Paradise Valley',
 'Tyler',
 'West Chester',
 'Progreso',
 'Gadsden',
 'Saint Louis Park',
 'South Hadley',
 'Bloomington',
 'Calabasas',
 'Wilbraham',
 'Reyno

In [24]:
import requests
import pandas as pd
import random
from config import api_key

# Function to get the AQI, latitude, and longitude from the OpenWeather API for a given city
def get_aqi_data(city, api_key):
    try:
        # Step 1: Get Latitude and Longitude of the city
        geocoding_url = f'http://api.openweathermap.org/geo/1.0/direct?q={city}&limit=1&appid={api_key}'
        response = requests.get(geocoding_url)
        data = response.json()

        if len(data) > 0:
            lat = data[0]['lat']
            lon = data[0]['lon']

            # Step 2: Use Latitude and Longitude to get AQI value
            aqi_url = f'http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={api_key}'
            response = requests.get(aqi_url)
            aqi_data = response.json()

            aqi_value = aqi_data['list'][0]['main']['aqi']
            return city, lat, lon, aqi_value
        else:
            print(f"No data found for city: {city}")
            return city, None, None, None

    except Exception as e:
        print(f"Error fetching data for city: {city}. Error: {str(e)}")
        return city, None, None, None

# Load the cities list
cities_list = pd.read_csv('../Resources/cities_list.csv')['City'].tolist()

# Randomize the list of cities
random_cities = random.sample(cities_list, 500)



# List to store city, latitude, longitude, and AQI data
city_aqi_data = []

# Fetch data for each city in the random_cities list
for city in random_cities:
    city, lat, lon, aqi = get_aqi_data(city, api_key)
    city_aqi_data.append([city, lat, lon, aqi])

# Create a DataFrame from the collected data
aqi_df = pd.DataFrame(city_aqi_data, columns=['City', 'Latitude', 'Longitude', 'AQI'])

# Save the DataFrame to a CSV file
aqi_df.to_csv('../Resources/cities_aqi_data.csv', index=False)

print("Data fetching complete. Saved to cities_aqi_data.csv")


No data found for city: Doctor Phillips
No data found for city: Tequixquitla
No data found for city: Amherst Center
No data found for city: Fort Saint John
Data fetching complete. Saved to cities_aqi_data.csv


In [None]:
pip install pandas folium

In [31]:
import pandas as pd
import folium
from folium.plugins import HeatMap

# Load the CSV data
file_path = ("../Resources/cities_aqi_data.csv")
data = pd.read_csv(file_path)

# Drop any rows with missing data
data_clean = data.dropna()

# Create a base map centered around the mean latitude and longitude
mean_lat = data_clean['Latitude'].mean()
mean_lng = data_clean['Longitude'].mean()
m = folium.Map(location=[mean_lat, mean_lng], zoom_start=5)

# Prepare the heatmap data (latitude, longitude, AQI)
heat_data = [[row['Latitude'], row['Longitude'], row['AQI']] for index, row in data_clean.iterrows()]
HeatMap(heat_data).add_to(m)

# Define a function to generate HTML for the legend
def add_legend(map_obj):
    legend_html = '''
     <div style="position: fixed;
                 bottom: 50px; right: 50px; width: 150px; height: 150px;
                 background-color: white; z-index:9999; font-size:14px;
                 border:2px solid grey;">
     <h4 style="text-align:center; margin-top: 10px;">AQI Legend</h4>
     <i style="background: green; width: 18px; height: 18px; display: inline-block;"></i> Good <br>
     <i style="background: yellow; width: 18px; height: 18px; display: inline-block;"></i> Fair <br>
     <i style="background: orange; width: 18px; height: 18px; display: inline-block;"></i> Moderate <br>
     <i style="background: red; width: 18px; height: 18px; display: inline-block;"></i> Poor <br>
     <i style="background: purple; width: 18px; height: 18px; display: inline-block;"></i> Very Poor <br>
     </div>
     '''
    map_obj.get_root().html.add_child(folium.Element(legend_html))

# Call the function to add the legend
add_legend(m)

# Save the heatmap with the legend to an HTML file
output_path = 'aqi_heatmap_with_legend.html'
m.save(output_path)

print(f"Heatmap saved to {output_path}")


Heatmap saved to aqi_heatmap_with_legend.html


In [1]:

# Do not use the code
# Get Latitude and Longtitude of 5 most polluted cities and least polluted cities in United States of America

import requests
from config import api_key

def get_city_coordinates(city_name, api_key):
    """
    Fetches the latitude and longitude of a city using the OpenWeather Geocoding API.
    """
    url = f"http://api.openweathermap.org/geo/1.0/direct?q={city_name}&limit=1&appid={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data:
            lat = data[0]['lat']
            lon = data[0]['lon']
            return lat, lon
        else:
            print(f"Could not find coordinates for {city_name}")
            return None, None
    else:
        print(f"Error: {response.status_code}")
        return None, None

def main():
    
    
    # List of cities
    cities = [
        "Garland", "Gordonville", "Pomona", "Long Beach", 
        "Signal Hill", "Escondido", "Palmdale", 
        "Lakewood", "Oceanside", "Paradise"
    ]
    
    # Dictionary to store city coordinates
    city_coordinates = {}

    # Fetch coordinates for each city
    for city in cities:
        lat, lon = get_city_coordinates(city, api_key)
        if lat is not None and lon is not None:
            city_coordinates[city] = {"latitude": lat, "longitude": lon}
            print(f"{city}: Latitude = {lat}, Longitude = {lon}")

    # Print the final results
    print("\nCity Coordinates:")
    for city, coords in city_coordinates.items():
        print(f"{city}: {coords}")

if __name__ == "__main__":
    main()


Garland: Latitude = 32.912624, Longitude = -96.6388833
Gordonville: Latitude = 37.3111628, Longitude = -89.6792607
Pomona: Latitude = 34.0553813, Longitude = -117.7517496
Long Beach: Latitude = 33.7690164, Longitude = -118.191604
Signal Hill: Latitude = 33.804826, Longitude = -118.168259
Escondido: Latitude = 33.1216751, Longitude = -117.0814849
Palmdale: Latitude = 34.5793131, Longitude = -118.1171108
Lakewood: Latitude = 39.7085736, Longitude = -105.0846694
Oceanside: Latitude = 33.1958696, Longitude = -117.3794834
Paradise: Latitude = 36.1150858, Longitude = -115.173414

City Coordinates:
Garland: {'latitude': 32.912624, 'longitude': -96.6388833}
Gordonville: {'latitude': 37.3111628, 'longitude': -89.6792607}
Pomona: {'latitude': 34.0553813, 'longitude': -117.7517496}
Long Beach: {'latitude': 33.7690164, 'longitude': -118.191604}
Signal Hill: {'latitude': 33.804826, 'longitude': -118.168259}
Escondido: {'latitude': 33.1216751, 'longitude': -117.0814849}
Palmdale: {'latitude': 34.579

In [3]:
# Do not use the code

import requests
import folium
from config import api_key


def get_air_quality(lat, lon, api_key):
    """
    Fetches the PM2.5 air quality data for the given latitude and longitude.
    """
    url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        pm2_5 = data['list'][0]['components']['pm2_5']
        return pm2_5
    else:
        print(f"Error fetching air quality data: {response.status_code}")
        return None

def create_heat_map(city_data):
    """
    Creates a heat map with markers where the marker size is based on PM2.5 levels.
    """
    # Create a base map
    heat_map = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

    # Add markers to the map
    for city, data in city_data.items():
        lat = data['latitude']
        lon = data['longitude']
        pm2_5 = data['pm2_5']
        
        # Set the marker size based on PM2.5 level
        marker_size = min(max(pm2_5 * 2, 5), 30)  # Scale marker size for visibility

        # Add a circle marker to the map
        folium.CircleMarker(
            location=(lat, lon),
            radius=marker_size,
            color='red' if pm2_5 > 12 else 'green',
            fill=True,
            fill_color='red' if pm2_5 > 12 else 'green',
            fill_opacity=0.7,
            tooltip=f"{city}: PM2.5 = {pm2_5} µg/m³"
        ).add_to(heat_map)
    
    # Save the map as an HTML file
    heat_map.save("pollution_heat_map.html")
    print("Heat map saved as 'pollution_heat_map.html'")

def main():
    

    # List of cities with their coordinates
    cities = {
        "Garland": {"latitude": 32.9126, "longitude": -96.6389}
        "Gordonville": {"latitude": 33.8304, "longitude": -96.8208},
        "Pomona": {"latitude": 34.0553, "longitude": -117.7523},
        "Long Beach": {"latitude": 33.7701, "longitude": -118.1937},
        "Signal Hill": {"latitude": 33.8045, "longitude": -118.1678},
        "Escondido": {"latitude": 33.1192, "longitude": -117.0864},
        "Palmdale": {"latitude": 34.5794, "longitude": -118.1165},
        "Lakewood": {"latitude": 39.7047, "longitude": -105.0814},
        "Oceanside": {"latitude": 33.1959, "longitude": -117.3795},
        "Paradise": {"latitude": 36.0972, "longitude": -115.1467}
    }

    # Fetch air quality data and add to the city data
    for city, coords in cities.items():
        lat, lon = coords['latitude'], coords['longitude']
        pm2_5 = get_air_quality(lat, lon, api_key)
        if pm2_5 is not None:
            cities[city]['pm2_5'] = pm2_5
            print(f"{city}: PM2.5 = {pm2_5} µg/m³")

    # Create the heat map
    create_heat_map(cities)

if __name__ == "__main__":
    main()


Garland: PM2.5 = 2.2 µg/m³
Gordonville: PM2.5 = 0.55 µg/m³
Pomona: PM2.5 = 37.55 µg/m³
Long Beach: PM2.5 = 5.84 µg/m³
Signal Hill: PM2.5 = 18.36 µg/m³
Escondido: PM2.5 = 7.26 µg/m³
Palmdale: PM2.5 = 66.87 µg/m³
Lakewood: PM2.5 = 6.07 µg/m³
Oceanside: PM2.5 = 3 µg/m³
Paradise: PM2.5 = 6.34 µg/m³
Heat map saved as 'pollution_heat_map.html'


In [1]:
pip install requests pandas openpyxl

Note: you may need to restart the kernel to use updated packages.


In [3]:
import requests
import pandas as pd
from config import api_key



# List of cities with their corresponding latitude and longitude
cities = [
    {"name": "New York", "lat": 40.7128, "lon": -74.0060},
    {"name": "Los Angeles", "lat": 34.0522, "lon": -118.2437},
    {"name": "Chicago", "lat": 41.8781, "lon": -87.6298},
    {"name": "Houston", "lat": 29.7604, "lon": -95.3698},
    {"name": "Phoenix", "lat": 33.4484, "lon": -112.0740}
]

# Function to get air quality data for a city
def get_air_quality(lat, lon):
    url = f'http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={api_key}'
    response = requests.get(url)
    data = response.json()
    if response.status_code == 200 and "list" in data:
        air_data = data["list"][0]["components"]
        aqi = data["list"][0]["main"]["aqi"]
        return {
            "AQI": aqi,
            "CO": air_data.get("co"),
            "NO2": air_data.get("no2"),
            "O3": air_data.get("o3"),
            "PM10": air_data.get("pm10"),
            "PM2.5": air_data.get("pm2_5"),
            "SO2": air_data.get("so2")
        }
    else:
        print(f"Failed to retrieve data for lat: {lat}, lon: {lon}")
        return None

# List to store air quality data
data_list = []

# Loop through cities and fetch air quality data
for city in cities:
    air_quality = get_air_quality(city["lat"], city["lon"])
    if air_quality:
        air_quality["City"] = city["name"]
        air_quality["Latitude"] = city["lat"]
        air_quality["Longitude"] = city["lon"]
        data_list.append(air_quality)

# Create a DataFrame
df = pd.DataFrame(data_list, columns=["City", "Latitude", "Longitude", "AQI", "CO", "NO2", "O3", "PM10", "PM2.5", "SO2"])

df.to_csv('../Resources/top_5_cities.csv', index=False)

print("Data saved to ../Resources/top_5_cities.csv")



Data saved to ../Resources/top_5_cities.csv
