In [1]:
# Importing libraries
import pandas as pd
from pathlib import Path
import csv
from config import api_key
import requests
from pprint import pprint
import random
import matplotlib.pyplot as plt
from datetime import datetime, timedelta


In [2]:
csv_path = Path("../Resources/global air pollution dataset.csv")
global_df = pd.read_csv(csv_path)
global_df.head()

Unnamed: 0,Country,City,AQI Value,AQI Category,CO AQI Value,CO AQI Category,Ozone AQI Value,Ozone AQI Category,NO2 AQI Value,NO2 AQI Category,PM2.5 AQI Value,PM2.5 AQI Category
0,Russian Federation,Praskoveya,51,Moderate,1,Good,36,Good,0,Good,51,Moderate
1,Brazil,Presidente Dutra,41,Good,1,Good,5,Good,1,Good,41,Good
2,Italy,Priolo Gargallo,66,Moderate,1,Good,39,Good,2,Good,66,Moderate
3,Poland,Przasnysz,34,Good,1,Good,34,Good,0,Good,20,Good
4,France,Punaauia,22,Good,0,Good,22,Good,0,Good,6,Good


In [3]:
global_df.columns

Index(['Country', 'City', 'AQI Value', 'AQI Category', 'CO AQI Value',
       'CO AQI Category', 'Ozone AQI Value', 'Ozone AQI Category',
       'NO2 AQI Value', 'NO2 AQI Category', 'PM2.5 AQI Value',
       'PM2.5 AQI Category'],
      dtype='object')

In [4]:
NorthAmerica_df = global_df.loc[global_df['Country'].isin(['United States of America', 'Canada', 'Mexico'])]
NorthAmerica_df.head()
NorthAmerica_df.count()

Country               3601
City                  3601
AQI Value             3601
AQI Category          3601
CO AQI Value          3601
CO AQI Category       3601
Ozone AQI Value       3601
Ozone AQI Category    3601
NO2 AQI Value         3601
NO2 AQI Category      3601
PM2.5 AQI Value       3601
PM2.5 AQI Category    3601
dtype: int64

In [5]:
unique_cities = NorthAmerica_df[['Country', 'City']].drop_duplicates().reset_index(drop=True)
cities_list = unique_cities['City']
cities_list.to_csv('../Resources/cities_list.csv')
unique_cities.to_csv('../Resources/citiesandcountry_list.csv')

# Making a list of cities. This is the entire list of 3601 countries 
list_cities = cities_list.to_list()

# Randomized list of 500 cities. This is probably better for plotting and to avoid hitting API limit
random_cities = random.sample(list_cities,50) # change to 500 or any number you want

# Shortened list of cities. Use this for testing code to avoid hitting API limit
short_cities = cities_list.head(3).tolist()


In [6]:
random_cities

['Highland Village',
 'West Melbourne',
 'Gretna',
 'Ripon',
 'Woburn',
 'Santa Fe',
 'Folsom',
 'Grove City',
 'Boca Raton',
 'Tequisquiapan',
 'Wahiawa',
 'South Laurel',
 'Kingsburg',
 'Horsham',
 'Billings',
 'Maumee',
 'Wilbraham',
 'Newberg',
 'Immokalee',
 'Ruiz',
 'Waianae',
 'Bloomingdale',
 'Mountain View',
 'North Saint Paul',
 'North Plainfield',
 'Dunmore',
 'Streator',
 'Buffalo',
 'Pittsfield',
 'Moyotzingo',
 'Peterborough',
 'Gardendale',
 'Huetamo',
 'Hackensack',
 'Arriaga',
 'Richmond Heights',
 'East Massapequa',
 'Forest Park',
 'Tlaxcalancingo',
 'Mount Juliet',
 'Pleasant Hill',
 'Valtierrilla',
 'Post Falls',
 'Milwaukie',
 'Wickliffe',
 'Clawson',
 'Moncton',
 'Laconia',
 'The Colony',
 'Glenn Dale']

In [7]:
# Acessing the open weather API to get lattitude and longitude values

url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

# Build partial query URL
query_url = f"{url}appid={api_key}&units={units}&q="

# Making API calls
lat = []
lon = []

# loop through list of cities
for city in random_cities:
    #print (city)
    
    response = requests.get(query_url + city).json()
    try:
        #print (response)
        lat.append(response['coord']['lat'])
        lon.append(response['coord']['lon'])
    except Exception as e:
        print(f'city not found or {e}')
        pass

city not found or 'coord'


In [8]:
# Accessing the open weather API to get air pollution values

air_url = 'http://api.openweathermap.org/data/2.5/air_pollution?' # current air pollution values

coord = [] # latitude and longitude coordinates
aqi = [] # air quality index
co = [] # Concentration of CO (Carbon monoxide), μg/m3
nh3 = [] # Concentration of NO (Nitrogen monoxide), μg/m3
no = [] # Сoncentration of NO2 (Nitrogen dioxide), μg/m3
no2 = [] # Сoncentration of O3 (Ozone), μg/m3
o3 = [] # Сoncentration of SO2 (Sulphur dioxide), μg/m3
pm10 = [] # Сoncentration of PM2.5 (Fine particles matter),
pm2_5 = [] # Сoncentration of PM10 (Coarse particulate matter), μg/m3
so2 = [] # Сoncentration of NH3 (Ammonia), μg/m3


for lt,ln in zip(lat,lon):
    response = requests.get(f'{air_url}lat={lt}&lon={ln}&appid={api_key}').json()
    #pprint (response)
    try:
        coord.append(response['coord'])
        aqi.append(response['list'][0]['main']['aqi'])
        co.append(response['list'][0]['components']['co'])
        nh3.append(response['list'][0]['components']['nh3'])
        no.append(response['list'][0]['components']['no'])
        no2.append(response['list'][0]['components']['no2'])
        o3.append(response['list'][0]['components']['o3'])
        pm10.append(response['list'][0]['components']['pm10'])
        pm2_5.append(response['list'][0]['components']['pm2_5'])
        so2.append(response['list'][0]['components']['so2'])
    except Exception as e:
        print (f'failed to get values for lat:{lt} and lon:{ln}. Error is: {e}')

In [15]:

# OpenWeatherMap API endpoint for getting weather data
url = "http://api.openweathermap.org/data/2.5/weather?"

# Parameters for the API request
units = "metric"
city = "New York"

# Build the complete query URL
query_url = f"{url}appid={api_key}&units={units}&q={city}"

# Make the API call to get weather data for New York
response = requests.get(query_url).json()

# Extract latitude and longitude if the city is found
try:
    lat = response['coord']['lat']
    lon = response['coord']['lon']
    print(f"Coordinates of New York: Latitude = {lat}, Longitude = {lon}")
except KeyError:
    print("City not found or invalid response from the API")


Coordinates of New York: Latitude = 40.7143, Longitude = -74.006


In [16]:
# OpenWeatherMap API endpoint for getting weather data
url = "http://api.openweathermap.org/data/2.5/weather?"

# Parameters for the API request
units = "metric"
city = "Los Angeles"

# Build the complete query URL
query_url = f"{url}appid={api_key}&units={units}&q={city}"

# Make the API call to get weather data for New York
response = requests.get(query_url).json()

# Extract latitude and longitude if the city is found
try:
    lat = response['coord']['lat']
    lon = response['coord']['lon']
    print(f"Coordinates of Los Angeles: Latitude = {lat}, Longitude = {lon}")
except KeyError:
    print("City not found or invalid response from the API")


Coordinates of Los Angeles: Latitude = 34.0522, Longitude = -118.2437


In [17]:
# OpenWeatherMap API endpoint for getting weather data
url = "http://api.openweathermap.org/data/2.5/weather?"

# Parameters for the API request
units = "metric"
city = "Chicago"

# Build the complete query URL
query_url = f"{url}appid={api_key}&units={units}&q={city}"

# Make the API call to get weather data for New York
response = requests.get(query_url).json()

# Extract latitude and longitude if the city is found
try:
    lat = response['coord']['lat']
    lon = response['coord']['lon']
    print(f"Coordinates of Chicago: Latitude = {lat}, Longitude = {lon}")
except KeyError:
    print("City not found or invalid response from the API")


Coordinates of Chicago: Latitude = 41.85, Longitude = -87.65


In [18]:
# OpenWeatherMap API endpoint for getting weather data
url = "http://api.openweathermap.org/data/2.5/weather?"

# Parameters for the API request
units = "metric"
city = "Houston"

# Build the complete query URL
query_url = f"{url}appid={api_key}&units={units}&q={city}"

# Make the API call to get weather data for New York
response = requests.get(query_url).json()

# Extract latitude and longitude if the city is found
try:
    lat = response['coord']['lat']
    lon = response['coord']['lon']
    print(f"Coordinates of Houston: Latitude = {lat}, Longitude = {lon}")
except KeyError:
    print("City not found or invalid response from the API")


Coordinates of Houston: Latitude = 29.7633, Longitude = -95.3633


In [19]:
# OpenWeatherMap API endpoint for getting weather data
url = "http://api.openweathermap.org/data/2.5/weather?"

# Parameters for the API request
units = "metric"
city = "Phoenix"

# Build the complete query URL
query_url = f"{url}appid={api_key}&units={units}&q={city}"

# Make the API call to get weather data for New York
response = requests.get(query_url).json()

# Extract latitude and longitude if the city is found
try:
    lat = response['coord']['lat']
    lon = response['coord']['lon']
    print(f"Coordinates of Phoenix: Latitude = {lat}, Longitude = {lon}")
except KeyError:
    print("City not found or invalid response from the API")


Coordinates of Phoenix: Latitude = 33.4484, Longitude = -112.074


In [10]:

from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook

# Activate inline plotting for Bokeh
output_notebook()


# New York coordinates
lat = 40.7143
lon = -74.006

# Time settings for the past five years (convert to Unix timestamps)
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=5*365) # 5 years ago

# Historical Air Pollution URL
air_history_url = "http://api.openweathermap.org/data/2.5/air_pollution/history"

# Placeholder lists to collect data
timestamps = []
aqi_values = []
co_values = []
no2_values = []
o3_values = []
pm10_values = []
pm2_5_values = []
so2_values = []

# Loop through time, getting data in monthly chunks
current_date = start_date
while current_date < end_date:
    # Set start and end of the month
    start_time_unix = int(current_date.timestamp())
    next_month = current_date + timedelta(days=30)
    end_time_unix = int(next_month.timestamp())
    
    # API Request
    response = requests.get(
        f"{air_history_url}?lat={lat}&lon={lon}&start={start_time_unix}&end={end_time_unix}&appid={api_key}"
    ).json()
    
    # Extract data if available
    try:
        for record in response['list']:
            timestamps.append(datetime.utcfromtimestamp(record['dt']))
            aqi_values.append(record['main']['aqi'])
            co_values.append(record['components']['co'])
            no2_values.append(record['components']['no2'])
            o3_values.append(record['components']['o3'])
            pm10_values.append(record['components']['pm10'])
            pm2_5_values.append(record['components']['pm2_5'])
            so2_values.append(record['components']['so2'])
    except KeyError:
        print(f"No data available for {current_date.strftime('%Y-%m')}")

    # Move to the next month
    current_date = next_month

# Create a DataFrame
data = pd.DataFrame({
    'timestamp': timestamps,
    'AQI': aqi_values,
    'CO': co_values,
    'NO2': no2_values,
    'O3': o3_values,
    'PM10': pm10_values,
    'PM2.5': pm2_5_values,
    'SO2': so2_values
})

# Plot using Bokeh
source = ColumnDataSource(data)

# Create a new plot
p = figure(
    title="Air Quality in New York Over 5 Years",
    x_axis_label='Date',
    y_axis_label='Concentration (μg/m3)',
    x_axis_type='datetime',
    width=900,  # Change from plot_width to width
    height=500  # Change from plot_height to height
)

# Add lines for different pollutants
p.line(x='timestamp', y='AQI', line_width=2, source=source, color='red', legend_label='AQI')
p.line(x='timestamp', y='CO', line_width=2, source=source, color='blue', legend_label='CO')
p.line(x='timestamp', y='NO2', line_width=2, source=source, color='green', legend_label='NO2')
p.line(x='timestamp', y='O3', line_width=2, source=source, color='purple', legend_label='O3')
p.line(x='timestamp', y='PM10', line_width=2, source=source, color='orange', legend_label='PM10')
p.line(x='timestamp', y='PM2.5', line_width=2, source=source, color='brown', legend_label='PM2.5')
p.line(x='timestamp', y='SO2', line_width=2, source=source, color='gray', legend_label='SO2')

# Customize legend
p.legend.location = "top_left"
p.legend.click_policy = "hide"

# Show plot
show(p)


In [11]:
import panel as pn

# Create widgets for interaction
pollutant_selector = pn.widgets.Select(name='Pollutant', options=['AQI', 'CO', 'NO2', 'O3', 'PM10', 'PM2.5', 'SO2'])

@pn.depends(pollutant_selector)
def update_plot(selected_pollutant):
    p_interactive = figure(
        title=f"{selected_pollutant} Over Time",
        x_axis_label='Date',
        y_axis_label='Concentration (μg/m3)',
        x_axis_type='datetime',
        width=900,
        height=500
    )
    p_interactive.line(x='timestamp', y=selected_pollutant, source=data, line_width=2)
    return p_interactive

# Create dashboard layout
dashboard = pn.Column(pollutant_selector, update_plot)
pn.serve(dashboard)

# Display the dashboard
dashboard


Launching server at http://localhost:58179


AssertionError: 

In [1]:
from datetime import datetime, timedelta
import panel as pn
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook
import numpy as np
import pandas as pd
import requests
import time
from config import api_key

output_notebook()

# Enable Panel inline plotting in notebooks
pn.extension()

# Cities to analyze along with their latitude and longitude
cities_info = {
    'New York': {'lat': 40.7143, 'lon': -74.006},
    'Los Angeles': {'lat': 34.0522, 'lon': -118.2437},
    'Chicago': {'lat': 41.85, 'lon': -87.65},
    'Houston': {'lat': 29.7633, 'lon': -95.3633},
    'Phoenix': {'lat': 33.4484, 'lon': -112.074}
}


# Function to fetch air quality data from OpenWeather API
def fetch_air_quality_data(city, lat, lon, start_date, end_date):
    start_timestamp = int(datetime.strptime(start_date, '%Y-%m-%d').timestamp())
    end_timestamp = int(datetime.strptime(end_date, '%Y-%m-%d').timestamp())
    url = f"http://api.openweathermap.org/data/2.5/air_pollution/history"
    
    # OpenWeather API allows fetching data in hourly intervals
    params = {
        'lat': lat,
        'lon': lon,
        'start': start_timestamp,
        'end': end_timestamp,
        'appid': api_key
    }
    
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json().get('list', [])
        return data
    else:
        print(f"Failed to fetch data for {city}: {response.status_code}")
        return []

# Function to get air quality data for all cities and aggregate it into a DataFrame
def get_air_quality_data(cities_info, start_date, end_date):
    all_data = []
    for city, coords in cities_info.items():
        lat, lon = coords['lat'], coords['lon']
        city_data = fetch_air_quality_data(city, lat, lon, start_date, end_date)
        for entry in city_data:
            dt = datetime.utcfromtimestamp(entry['dt'])
            components = entry['components']
            all_data.append({
                'City': city,
                'Date': dt,
                'AQI': entry.get('main', {}).get('aqi', np.nan),  # You may need to calculate this from components
                'CO': components.get('co', np.nan),
                'NO2': components.get('no2', np.nan),
                'O3': components.get('o3', np.nan),
                'PM10': components.get('pm10', np.nan),
                'PM2.5': components.get('pm2_5', np.nan),
                'SO2': components.get('so2', np.nan)
            })
        time.sleep(1)  # To avoid exceeding the API rate limit
    return pd.DataFrame(all_data)

# Generate data for the given date range
start_date = '2019-01-01'
end_date = '2023-12-31'
air_quality_data = get_air_quality_data(cities_info, start_date, end_date)

# Function to create a Bokeh plot for multiple cities
def create_bokeh_plot(data, selected_cities):
    # Filter data for the selected cities
    city_data = data[data['City'].isin(selected_cities)]

    # Convert date column to datetime
    city_data['Date'] = pd.to_datetime(city_data['Date'])

    # Create a new plot
    p = figure(
        title=f"Air Quality Over 5 Years for Selected Cities",
        x_axis_label='Date',
        y_axis_label='Concentration (μg/m3)',
        x_axis_type='datetime',
        width=900,
        height=500
    )

    # Add lines for different pollutants for each selected city
    colors = ['blue', 'red', 'green', 'purple', 'orange', 'brown', 'gray']
    pollutants = ['CO', 'NO2', 'O3', 'PM10', 'PM2.5', 'SO2']

    for i, city in enumerate(selected_cities):
        city_specific_data = city_data[city_data['City'] == city]
        source = ColumnDataSource(city_specific_data)
        
        for j, pollutant in enumerate(pollutants):
            p.line(
                x='Date', 
                y=pollutant, 
                line_width=2, 
                source=source, 
                color=colors[j % len(colors)], 
                legend_label=f"{pollutant} ({city})"
            )

    # Customize legend
    p.legend.location = "top_right"
    p.legend.click_policy = "hide"

    return p

# Panel widget for multiple city selection
city_selector = pn.widgets.MultiSelect(name='Cities', options=list(cities_info.keys()), size=5)

# Function to update the plot based on city selection
@pn.depends(city_selector.param.value)
def update_plot(selected_cities):
    if selected_cities:
        return create_bokeh_plot(air_quality_data, selected_cities)
    else:
        return "Please select at least one city."

# Create the dashboard layout
dashboard2 = pn.Column(city_selector, update_plot)

# Display the dashboard
dashboard2.show()


   pip install jupyter_bokeh

or:
    conda install jupyter_bokeh

and try again.
  pn.extension()


Launching server at http://localhost:62211


AssertionError: 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_data['Date'] = pd.to_datetime(city_data['Date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_data['Date'] = pd.to_datetime(city_data['Date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  city_data['Date'] = pd.to_datetime(city_data['Date'])
A value is trying to be set on a copy of a 

In [3]:
pip install folium

Collecting folium
  Downloading folium-0.17.0-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting branca>=0.6.0 (from folium)
  Downloading branca-0.8.0-py3-none-any.whl.metadata (1.5 kB)
Downloading folium-0.17.0-py2.py3-none-any.whl (108 kB)
   ---------------------------------------- 0.0/108.4 kB ? eta -:--:--
   ---------------------------------------- 0.0/108.4 kB ? eta -:--:--
   --- ------------------------------------ 10.2/108.4 kB ? eta -:--:--
   ----------- --------------------------- 30.7/108.4 kB 325.1 kB/s eta 0:00:01
   --------------------------------- ----- 92.2/108.4 kB 655.4 kB/s eta 0:00:01
   -------------------------------------- 108.4/108.4 kB 625.3 kB/s eta 0:00:00
Downloading branca-0.8.0-py3-none-any.whl (25 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.8.0 folium-0.17.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
import requests
import folium
from folium.plugins import HeatMap
from config import api_key

# Step 1: Set up the API and parameters

base_url = 'http://api.openweathermap.org/data/2.5/air_pollution'

# List of cities with their coordinates
cities = [
    {'name': 'New York', 'lat': 40.7128, 'lon': -74.0060},
    {'name': 'Los Angeles', 'lat': 34.0522, 'lon': -118.2437},
    {'name': 'Chicago', 'lat': 41.8781, 'lon': -87.6298},
    {'name': 'Houston', 'lat': 29.7604, 'lon': -95.3698},
    {'name': 'Miami', 'lat': 25.7617, 'lon': -80.1918}
]

# Step 2: Fetch air pollution data for each city
pollution_data = []

for city in cities:
    params = {
        'lat': city['lat'],
        'lon': city['lon'],
        'appid': api_key
    }
    response = requests.get(base_url, params=params)
    data = response.json()
    
    if response.status_code == 200 and 'list' in data:
        # Get PM2.5 concentration
        pm25 = data['list'][0]['components']['pm2_5']
        pollution_data.append([city['lat'], city['lon'], pm25])
    else:
        print(f"Failed to get data for {city['name']}")

# Step 3: Create a folium map
map_center = [39.8283, -98.5795]  # Center of the USA
pollution_map = folium.Map(location=map_center, zoom_start=4)

# Step 4: Add pollution data as a heat map
HeatMap(pollution_data).add_to(pollution_map)

# Step 5: Save the map to an HTML file
pollution_map.save('air_pollution_heat_map.html')

print("Air pollution heat map created and saved as 'air_pollution_heat_map.html'")


Air pollution heat map created and saved as 'air_pollution_heat_map.html'
