In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

In [3]:
# Generate random latitude and longitude
lats = np.random.uniform(-90, 90, size=1000)
lngs = np.random.uniform(-180, 180, size=1000)


cities = []

# Use citipy to determine the nearest city for each lat-lng combination
for lat, lng in zip(lats, lngs):
    city = citipy.nearest_city(lat, lng).city_name
    if city not in cities:
        cities.append(city)

print(f"Generated {len(cities)} unique cities.")

Generated 437 unique cities.


In [4]:
# Base URL for OpenWeatherMap
air_quality_url = "http://api.openweathermap.org/data/2.5/air_pollution"

valid_cities = []

for city in cities:
    try:
        # Fetch city coordinates using the weather endpoint
        query_url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={weather_api_key}"
        response = requests.get(query_url)

        # Check if the request was successful
        if response.status_code != 200:
            print(f"City {city} not found (Status Code: {response.status_code}). Skipping...")
            continue

        # Parse JSON response
        data = response.json()
        lat = data['coord']['lat']
        lon = data['coord']['lon']

        # Make an API call to the Air Pollution endpoint
        pollution_response = requests.get(
            f"{air_quality_url}?lat={lat}&lon={lon}&appid={weather_api_key}"
        )

        # Check pollution API response
        if pollution_response.status_code == 200:
            valid_cities.append(city)
        else:
            print(f"Air pollution data not found for {city}. Skipping...")

    except KeyError as key_err:
        print(f"KeyError: {key_err} for {city}. Skipping...")
    except requests.exceptions.RequestException as req_err:
        print(f"Request error: {req_err} for {city}. Skipping...")

    # Pause to avoid rate limiting
    time.sleep(1)

print(f"Valid cities with air quality data: {len(valid_cities)}")


City ytyk-kyuyel' not found (Status Code: 404). Skipping...
City gueoul not found (Status Code: 404). Skipping...
City port glaud not found (Status Code: 404). Skipping...
City vingt cinq not found (Status Code: 404). Skipping...
City dar naim not found (Status Code: 404). Skipping...
City taiohae not found (Status Code: 404). Skipping...
City cumaribo not found (Status Code: 404). Skipping...
City seogwipo not found (Status Code: 404). Skipping...
City muzayri' not found (Status Code: 404). Skipping...
Request error: HTTPConnectionPool(host='api.openweathermap.org', port=80): Read timed out. (read timeout=None) for flying fish cove. Skipping...
City telaga batu not found (Status Code: 404). Skipping...
City bayan nur not found (Status Code: 404). Skipping...
City ouellah not found (Status Code: 404). Skipping...
Request error: HTTPConnectionPool(host='api.openweathermap.org', port=80): Max retries exceeded with url: /data/2.5/weather?q=tolanaro&appid=670f2a326654e8e4ee66af45094f3c93 (

In [7]:
print(valid_cities)

['tiksi', 'waitangi', 'thompson', 'punta arenas', 'carnarvon', 'bilibino', 'puerto natales', 'kulhudhuffushi', 'albany', 'ribeira grande', 'touros', 'chonchi', 'port-aux-francais', 'avarua', 'hadibu', 'arraial do cabo', 'nguigmi', 'ushuaia', 'invercargill', 'tura', 'vernon', 'blackmans bay', 'malanje', 'bandarbeyla', 'iqaluit', 'mbaiki', 'narasannapeta', 'adamstown', 'baley', 'vilyuysk', 'taulaga', "vrangel'", 'san fernando del valle de catamarca', 'holualoa', 'grytviken', 'beau vallon', 'papatowai', 'polyarnyy', 'rio grande', 'west island', 'menongue', 'margaret river', 'utrik', 'harper', 'mar del plata', 'yatou', 'sitka', 'hermanus', 'kodiak', 'petropavlovsk-kamchatsky', 'happy valley-goose bay', 'thunder bay', 'bredasdorp', 'aasiaat', 'basco', 'sao joao da barra', 'aktau', 'xinyuan', 'talnakh', 'chipata', 'hasaki', 'ilulissat', 'puerto ayora', 'geraldton', 'humaita', 'isla aguada', 'safford', 'skeldon', 'ust-nera', 'abong mbang', 'yellowknife', 'olonkinbyen', 'uturoa', 'quimili', 'g

In [26]:
# Create a dictionary to store city-country pairs
city_country_map = {}

for city in valid_cities:
    try:
        query_url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={weather_api_key}"
        response = requests.get(query_url).json()
        
        # Store the city and corresponding country code
        country = response['sys']['country']
        city_country_map[city] = country

        # Print city and country
        print(f"City: {city}, Country: {country}")

    except KeyError as key_err:
        print(f"KeyError: {key_err} for {city}. Skipping...")
    except requests.exceptions.RequestException as req_err:
        print(f"Request error: {req_err} for {city}. Skipping...")

    # Pause to avoid rate limiting
    time.sleep(1)

print(city_country_map) 


City: tiksi, Country: RU
City: waitangi, Country: NZ
City: thompson, Country: CA
City: punta arenas, Country: CL
City: carnarvon, Country: AU
City: bilibino, Country: RU
City: puerto natales, Country: CL
City: kulhudhuffushi, Country: MV
City: albany, Country: US
City: ribeira grande, Country: PT
City: touros, Country: BR
City: chonchi, Country: CL
City: port-aux-francais, Country: TF
City: avarua, Country: CK
City: hadibu, Country: YE
City: arraial do cabo, Country: BR
City: nguigmi, Country: NE
City: ushuaia, Country: AR
City: invercargill, Country: NZ
City: tura, Country: IN
City: vernon, Country: CA
City: blackmans bay, Country: AU
City: malanje, Country: AO
City: bandarbeyla, Country: SO
City: iqaluit, Country: CA
City: mbaiki, Country: CF
City: narasannapeta, Country: IN
City: adamstown, Country: PN
City: baley, Country: RU
City: vilyuysk, Country: RU
City: taulaga, Country: AS
City: vrangel', Country: RU
City: san fernando del valle de catamarca, Country: AR
City: holualoa, Coun

In [27]:
print(f"Total cities: {len(city_country_map)}")

Total cities: 418


In [28]:
data = list(city_country_map.items())

# Create the DataFrame
df = pd.DataFrame(data, columns=['City', 'Country'])

# Save the DataFrame to a CSV file
df.to_csv('Resources/city_country_map.csv', index=False, encoding='utf-8')

In [30]:
# Get all the unique countries from the city-country map
unique_countries = set(city_country_map.values())

# Print the count and the list of unique countries
print(f"Total unique countries: {len(unique_countries)}")
print(unique_countries)


Total unique countries: 116
{'RE', 'DE', 'IR', 'SJ', 'OM', 'MX', 'CN', 'PT', 'LR', 'ID', 'ZA', 'SN', 'EG', 'UZ', 'AS', 'PY', 'GE', 'BW', 'MM', 'CF', 'PE', 'CU', 'ET', 'PA', 'IS', 'IE', 'NA', 'PG', 'WS', 'TH', 'KG', 'EC', 'KZ', 'ST', 'NO', 'BR', 'BO', 'AF', 'JM', 'FR', 'MU', 'GS', 'SB', 'TM', 'GY', 'US', 'AO', 'CD', 'KR', 'KI', 'CI', 'GB', 'AR', 'RO', 'TD', 'PF', 'SD', 'CA', 'TC', 'VN', 'SO', 'BF', 'CM', 'MR', 'ZM', 'MA', 'CL', 'HN', 'SE', 'JP', 'MH', 'VE', 'IN', 'LY', 'RU', 'SH', 'TR', 'TK', 'FI', 'BI', 'AU', 'GU', 'SV', 'MY', 'KE', 'UA', 'DZ', 'LV', 'CK', 'FJ', 'MV', 'GR', 'CO', 'MN', 'IT', 'TF', 'GL', 'VU', 'FO', 'UY', 'WF', 'TZ', 'CC', 'MW', 'GH', 'NE', 'YE', 'BD', 'MP', 'GF', 'PN', 'MG', 'NC', 'PH', 'FM', 'NZ'}


In [83]:
import pycountry

# WHO API: Get all available countries and their alpha-3 codes
who_url = "https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues"
who_response = requests.get(who_url).json()

# Convert WHO countries from alpha-3 to alpha-2 codes to be matched withOpenWeather Country Code
who_countries_alpha2 = {}

for entry in who_response['value']:
    alpha_3 = entry['Code']
    country_name = entry['Title']

    # Convert alpha-3 to alpha-2 code using pycountry
    try:
        alpha_2 = pycountry.countries.get(alpha_3=alpha_3).alpha_2
        who_countries_alpha2[alpha_2] = country_name
    except AttributeError:
        print(f"Skipping {alpha_3} - No matching alpha-2 code found.")

print(f"WHO Countries (alpha-2): {who_countries_alpha2}")

who_country_codes = who_countries_alpha2.keys()
print(f"WHO Country Codes: {list(who_country_codes)}")


Skipping CHI - No matching alpha-2 code found.
Skipping ME1 - No matching alpha-2 code found.
Skipping SDN736 - No matching alpha-2 code found.
Skipping XKX - No matching alpha-2 code found.
WHO Countries (alpha-2): {'AW': 'Aruba', 'AF': 'Afghanistan', 'AO': 'Angola', 'AI': 'Anguilla', 'AL': 'Albania', 'AD': 'Andorra', 'AE': 'United Arab Emirates', 'AR': 'Argentina', 'AM': 'Armenia', 'AS': 'American Samoa', 'AG': 'Antigua and Barbuda', 'AU': 'Australia', 'AT': 'Austria', 'AZ': 'Azerbaijan', 'BI': 'Burundi', 'BE': 'Belgium', 'BJ': 'Benin', 'BQ': 'Bonaire, Saint Eustatius and Saba', 'BF': 'Burkina Faso', 'BD': 'Bangladesh', 'BG': 'Bulgaria', 'BH': 'Bahrain', 'BS': 'Bahamas', 'BA': 'Bosnia and Herzegovina', 'BY': 'Belarus', 'BZ': 'Belize', 'BM': 'Bermuda', 'BO': 'Bolivia (Plurinational State of)', 'BR': 'Brazil', 'BB': 'Barbados', 'BN': 'Brunei Darussalam', 'BT': 'Bhutan', 'BW': 'Botswana', 'CF': 'Central African Republic', 'CA': 'Canada', 'CH': 'Switzerland', 'CL': 'Chile', 'CN': 'China'

In [86]:
# Filter cities where the country code exists in the WHO data
filtered_cities = {
    city: code for city, code in city_country_map.items() if code in who_country_codes
}

print(f"Cities with matching WHO data: {len(filtered_cities)}")
print(filtered_cities)


Cities with matching WHO data: 412
{'tiksi': 'RU', 'waitangi': 'NZ', 'thompson': 'CA', 'punta arenas': 'CL', 'carnarvon': 'AU', 'bilibino': 'RU', 'puerto natales': 'CL', 'kulhudhuffushi': 'MV', 'albany': 'US', 'ribeira grande': 'PT', 'touros': 'BR', 'chonchi': 'CL', 'avarua': 'CK', 'hadibu': 'YE', 'arraial do cabo': 'BR', 'nguigmi': 'NE', 'ushuaia': 'AR', 'invercargill': 'NZ', 'tura': 'IN', 'vernon': 'CA', 'blackmans bay': 'AU', 'malanje': 'AO', 'bandarbeyla': 'SO', 'iqaluit': 'CA', 'mbaiki': 'CF', 'narasannapeta': 'IN', 'baley': 'RU', 'vilyuysk': 'RU', 'taulaga': 'AS', "vrangel'": 'RU', 'san fernando del valle de catamarca': 'AR', 'holualoa': 'US', 'beau vallon': 'MU', 'papatowai': 'NZ', 'polyarnyy': 'RU', 'rio grande': 'BR', 'menongue': 'AO', 'margaret river': 'AU', 'utrik': 'MH', 'harper': 'LR', 'mar del plata': 'AR', 'yatou': 'CN', 'sitka': 'US', 'hermanus': 'ZA', 'kodiak': 'US', 'petropavlovsk-kamchatsky': 'RU', 'happy valley-goose bay': 'CA', 'thunder bay': 'CA', 'bredasdorp': 'Z

In [87]:
# Store air quality data for cities
air_quality_data = []

# Loop through each city to get pollution data
for city, country in filtered_cities.items():
    try:
        # Query the Air Pollution API using the city's coordinates
        pollution_url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={weather_api_key}"
        pollution_response = requests.get(pollution_url).json()

        # Extract all pollution components
        components = pollution_response['list'][0]['components']
        pm25 = components.get('pm2_5', None)
        pm10 = components.get('pm10', None)
        no2 = components.get('no2', None)
        o3 = components.get('o3', None)
        so2 = components.get('so2', None)
        co = components.get('co', None)
        nh3 = components.get('nh3', None)

        # Store the data for each city
        air_quality_data.append({
            'City': city,
            'Country': country,
            'PM2.5': pm25,
            'PM10': pm10,
            'NO2': no2,
            'O3': o3,
            'SO2': so2,
            'CO': co,
            'NH3': nh3
        })

        print(f"Retrieved pollution data for {city}, {country}")

    except Exception as e:
        print(f"Error retrieving data for {city}: {e}")

    # Pause to avoid API rate limits
    time.sleep(1)

# Convert the collected data into a DataFrame
air_quality_df = pd.DataFrame(air_quality_data)
print(air_quality_df.head())


Retrieved pollution data for tiksi, RU
Retrieved pollution data for waitangi, NZ
Retrieved pollution data for thompson, CA
Retrieved pollution data for punta arenas, CL
Retrieved pollution data for carnarvon, AU
Retrieved pollution data for bilibino, RU
Retrieved pollution data for puerto natales, CL
Retrieved pollution data for kulhudhuffushi, MV
Retrieved pollution data for albany, US
Retrieved pollution data for ribeira grande, PT
Retrieved pollution data for touros, BR
Retrieved pollution data for chonchi, CL
Retrieved pollution data for avarua, CK
Retrieved pollution data for hadibu, YE
Retrieved pollution data for arraial do cabo, BR
Retrieved pollution data for nguigmi, NE
Retrieved pollution data for ushuaia, AR
Retrieved pollution data for invercargill, NZ
Retrieved pollution data for tura, IN
Retrieved pollution data for vernon, CA
Retrieved pollution data for blackmans bay, AU
Retrieved pollution data for malanje, AO
Retrieved pollution data for bandarbeyla, SO
Retrieved pol

In [88]:
air_quality_df.to_csv('Resources/air_quality_data.csv', index=False)

In [125]:
# WHO API: Air pollution attributable DALYs
who_url = "https://ghoapi.azureedge.net/api/AIR_35"
response = requests.get(who_url)

# Check for valid response
if response.status_code == 200:
    data = response.json()['value']
    # Parse the data into a DataFrame
    who_data = [
        {
            'Country': entry['SpatialDim'],
            'Year': entry['TimeDim'],
            'DALYs_per_100k': entry['NumericValue']
        }
        for entry in data
    ]
    
    df_who = pd.DataFrame(who_data)
    print(df_who.head())
    
    df_who.to_csv('Resources/who_air_pollution_dalys.csv', index=False)
else:
    print(f"Failed to retrieve data. Status code: {response.status_code}")


  Country  Year  DALYs_per_100k
0     BTN  2012         289.341
1     COG  2018        2063.071
2     KGZ  2017         175.166
3     CHN  2018       44691.668
4     PAN  2011          25.873


In [126]:
import pycountry

# Function to convert ISO-3 to ISO-2
def iso3_to_iso2(iso3_code):
    try:
        return pycountry.countries.get(alpha_3=iso3_code).alpha_2
    except AttributeError:
        return None  

df_who = pd.read_csv('Resources/who_air_pollution_dalys.csv')

# Convert WHO's ISO-3 codes to ISO-2
df_who['Country_ISO2'] = df_who['Country'].apply(iso3_to_iso2)

df_who_clean = df_who.dropna(subset=['Country_ISO2'])

df_weather = pd.read_csv('Resources/city_country_map.csv')

merged_df = pd.merge(air_quality_df, df_who_clean, left_on='Country', right_on='Country_ISO2', how='inner')

print(merged_df.head())

merged_df.to_csv('Resources/merged_air_quality_health.csv', index=False)


    City Country_x  PM2.5  PM10   NO2     O3   SO2     CO   NH3 Country_y  \
0  tiksi        RU    1.7  5.99  0.62  98.71  0.38  198.6  0.17       RUS   
1  tiksi        RU    1.7  5.99  0.62  98.71  0.38  198.6  0.17       RUS   
2  tiksi        RU    1.7  5.99  0.62  98.71  0.38  198.6  0.17       RUS   
3  tiksi        RU    1.7  5.99  0.62  98.71  0.38  198.6  0.17       RUS   
4  tiksi        RU    1.7  5.99  0.62  98.71  0.38  198.6  0.17       RUS   

   Year  DALYs_per_100k Country_ISO2  
0  2010      187804.732           RU  
1  2011      117145.137           RU  
2  2010        4091.222           RU  
3  2010       99488.703           RU  
4  2017       88567.167           RU  


In [127]:
count = len(merged_df)
count

69480

In [128]:
# Drop redundant columns (dropping 'Country_ISO2' and 'Country_y')
merged_df = merged_df.drop(['Country_ISO2', 'Country_y'], axis=1)

# Rename columns for clarity
merged_df = merged_df.rename(columns={'Country_x': 'Country'})

In [129]:
merged_df.to_csv('Resources/merged_air_quality_health.csv', index=False)

In [131]:
# Load the merged dataset
df = pd.read_csv('Resources/merged_air_quality_health.csv')

# Group by City and Country to calculate average values across all years
grouped_df = df.groupby(['City', 'Country'], as_index=False).agg({
    'PM2.5': 'mean',
    'PM10': 'mean',
    'NO2': 'mean',
    'O3': 'mean',
    'SO2': 'mean',
    'CO': 'mean',
    'NH3': 'mean',
    'DALYs_per_100k': 'mean'
})

grouped_df['DALYs_per_100k'] = grouped_df['DALYs_per_100k'].round(1)

grouped_df.to_csv('Resources/average_air_quality_health.csv', index=False)

print(grouped_df.head(10))



          City Country  PM2.5  PM10   NO2     O3   SO2     CO   NH3  \
0  abong mbang      CM    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
1       acarau      BR    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
2    ad dindar      SD    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
3        aioun      MR    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
4        aketi      CD    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
5        aktau      KZ    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
6     akureyri      IS    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
7    al bawiti      EG    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
8     alaghsas      NE    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
9       albany      US    1.7  5.99  0.62  98.71  0.38  198.6  0.17   

   DALYs_per_100k  
0          5871.4  
1         15847.8  
2          7159.5  
3           627.5  
4         20697.8  
5          3645.7  
6            15.0  
7         14541.2  
8          5487.8  
9         22347.7 