In [23]:
import pandas as pd
import matplotlib.pyplot  as plt
import requests
import time

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

In [24]:
# Load the CSV file
file_path = 'Resources/average_air_quality_health.csv'
data = pd.read_csv(file_path)

In [25]:
# First, save the first city name and aggregate other numeric columns
country_aggregated = data.groupby('Country').agg({
    'City': 'first',   #  To keep the first city name beacuse the API is based on city name and if we just pass country code it may return wrong value: Golnaz
    'PM2.5': 'sum',    
    'PM10': 'sum',
    'NO2': 'sum',
    'O3': 'sum',
    'SO2': 'sum',
    'CO': 'sum',
    'NH3': 'sum',
    'MortalityRate_per_100k': 'sum'
}).reset_index()

# Round all numeric columns to 0 decimal places (excluding 'City' column)
country_aggregated.iloc[:, 2:] = country_aggregated.iloc[:, 2:].round(0)

# Display the result
country_aggregated.head()


Unnamed: 0,Country,City,PM2.5,PM10,NO2,O3,SO2,CO,NH3,MortalityRate_per_100k
0,AO,namibe,13.0,52.0,0.0,64.0,0.0,214.0,0.0,22.0
1,AR,el calafate,18.0,29.0,6.0,319.0,1.0,2267.0,6.0,86.0
2,AU,alice springs,79.0,201.0,30.0,1192.0,32.0,5362.0,5.0,70.0
3,BE,knokke,2.0,2.0,8.0,51.0,1.0,217.0,3.0,6.0
4,BO,chimore,46.0,48.0,9.0,85.0,1.0,2690.0,2.0,56.0


In [27]:
# Base URL for OpenWeather Geocoding API
geo_url = "http://api.openweathermap.org/geo/1.0/direct"

latitudes = []
longitudes = []

# Loop through each country and get lat/lon
for city in country_aggregated['City']:
    try:
        # Fetch country coordinates using the geocoding endpoint
        query_url = f"{geo_url}?q={city}&limit=1&appid={weather_api_key}"
        response = requests.get(query_url)

        if response.status_code != 200:
            print(f"City {city} not found (Status Code: {response.status_code}). Skipping...")
            latitudes.append(None)
            longitudes.append(None)
            continue

        data = response.json()
        if len(data) > 0:
            lat = data[0]['lat']
            lon = data[0]['lon']
            
            # Append lat and lon to lists
            latitudes.append(lat)
            longitudes.append(lon)
            print(f"Retrieved coordinates for {city}: ({lat}, {lon})")
        else:
            print(f"No data found for {city}. Skipping...")
            latitudes.append(None)
            longitudes.append(None)

    except KeyError as key_err:
        print(f"KeyError: {key_err} for {city}. Skipping...")
        latitudes.append(None)
        longitudes.append(None)
    except requests.exceptions.RequestException as req_err:
        print(f"Request error: {req_err} for {city}. Skipping...")
        latitudes.append(None)
        longitudes.append(None)

    # Pause to avoid rate limiting
    time.sleep(1)

# Add latitude and longitude lists directly to the dataframe
country_aggregated['Latitude'] = latitudes
country_aggregated['Longitude'] = longitudes

country_aggregated.head()

Retrieved coordinates for namibe: (-15.195064, 12.1458085)
Retrieved coordinates for el calafate: (-50.3388712, -72.2720843)
Retrieved coordinates for alice springs: (-23.6983884, 133.8812885)
Retrieved coordinates for knokke: (51.3464863, 3.2876314)
Retrieved coordinates for chimore: (-16.9939202, -65.1504578)
Retrieved coordinates for acarau: (-2.8869514, -40.1194713)
Retrieved coordinates for amos: (48.5718519, -78.1160856)
Retrieved coordinates for mongoumba: (3.6382754, 18.5948982)
Retrieved coordinates for bondoukou: (8.0397992, -2.7984258)
Retrieved coordinates for ancud: (-41.8682162, -73.8287225)
Retrieved coordinates for garoua boulai: (5.8948457, 14.5471591)
Retrieved coordinates for fuling: (29.7065405, 107.3910809)
Retrieved coordinates for cartagena: (10.4195841, -75.5271224)
Retrieved coordinates for cidade velha: (-1.4619451, -48.5027503)
Retrieved coordinates for colonia: (50.938361, 6.959974)
Retrieved coordinates for cheria: (35.2706778, 7.7527115)
Retrieved coordina

Unnamed: 0,Country,City,PM2.5,PM10,NO2,O3,SO2,CO,NH3,MortalityRate_per_100k,Latitude,Longitude
0,AO,namibe,13.0,52.0,0.0,64.0,0.0,214.0,0.0,22.0,-15.195064,12.145808
1,AR,el calafate,18.0,29.0,6.0,319.0,1.0,2267.0,6.0,86.0,-50.338871,-72.272084
2,AU,alice springs,79.0,201.0,30.0,1192.0,32.0,5362.0,5.0,70.0,-23.698388,133.881289
3,BE,knokke,2.0,2.0,8.0,51.0,1.0,217.0,3.0,6.0,51.346486,3.287631
4,BO,chimore,46.0,48.0,9.0,85.0,1.0,2690.0,2.0,56.0,-16.99392,-65.150458


In [28]:
length = len(country_aggregated)
print(f"The length of the dataframe is: {length}")

The length of the dataframe is: 86


In [29]:
print(country_aggregated.isnull().sum())

Country                   0
City                      0
PM2.5                     0
PM10                      0
NO2                       0
O3                        0
SO2                       0
CO                        0
NH3                       0
MortalityRate_per_100k    0
Latitude                  4
Longitude                 4
dtype: int64


In [30]:
# Drop rows where Latitude or Longitude are null
country_aggregated_cleaned = country_aggregated.dropna(subset=['Latitude', 'Longitude'])

In [31]:
import hvplot.pandas

In [32]:
# Configure the map
map_plot_2 = country_aggregated_cleaned.hvplot.points(
    "Longitude",
    "Latitude",
    geo = True,
    tiles = "EsriImagery",
    frame_width = 800,
    frame_height = 600,
    size = "MortalityRate_per_100k",
    scale = 0.5,
    color = "Country"
)

# Display the map plot
map_plot_2