In [185]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

In [186]:
# Generate random latitude and longitude
lats = np.random.uniform(-90, 90, size=1000)
lngs = np.random.uniform(-180, 180, size=1000)


cities = []

# Use citipy to determine the nearest city for each lat-lng combination
for lat, lng in zip(lats, lngs):
    city = citipy.nearest_city(lat, lng).city_name
    if city not in cities:
        cities.append(city)

print(f"Generated {len(cities)} unique cities.")

Generated 435 unique cities.


In [187]:
# Base URLs
weather_url = "http://api.openweathermap.org/data/2.5/weather"
air_quality_url = "http://api.openweathermap.org/data/2.5/air_pollution"

# Dictionary to store valid city data with coordinates
valid_cities = {}

for city in cities:
    try:
        # Fetch city coordinates using the weather endpoint
        query_url = f"{weather_url}?q={city}&appid={weather_api_key}"
        response = requests.get(query_url)

        if response.status_code != 200:
            print(f"City {city} not found (Status Code: {response.status_code}). Skipping...")
            continue

        # Parse JSON response for coordinates
        data = response.json()
        lat = data['coord']['lat']
        lon = data['coord']['lon']
        country = data['sys']['country']

        # Store in filtered_cities dictionary
        valid_cities[city] = {'city': city, 'lat': lat, 'lon': lon, 'country': country}

        print(f"Retrieved coordinates for {city}: ({lat}, {lon})")

    except KeyError as key_err:
        print(f"KeyError: {key_err} for {city}. Skipping...")
    except requests.exceptions.RequestException as req_err:
        print(f"Request error: {req_err} for {city}. Skipping...")

    # Pause to avoid rate limiting
    time.sleep(1)

print(f"Total valid cities with coordinates: {len(valid_cities)}")

Retrieved coordinates for port-aux-francais: (-49.35, 70.2167)
Retrieved coordinates for ribeira grande: (38.5167, -28.7)
Retrieved coordinates for korenovsk: (45.4686, 39.4519)
Retrieved coordinates for srandakan: (-7.9386, 110.2506)
City 'ohonua not found (Status Code: 404). Skipping...
Retrieved coordinates for rio grande: (-32.035, -52.0986)
Retrieved coordinates for al muwayh: (22.4333, 41.7583)
City taiohae not found (Status Code: 404). Skipping...
Retrieved coordinates for thompson: (55.7435, -97.8558)
Retrieved coordinates for midvagur: (62.0511, -7.1939)
Retrieved coordinates for grytviken: (-54.2811, -36.5092)
Retrieved coordinates for aleksandrovskoye: (44.7142, 43.0008)
Retrieved coordinates for petropavlivka: (48.4564, 36.4367)
Retrieved coordinates for adamstown: (-25.066, -130.1015)
Retrieved coordinates for haiku-pauwela: (20.9219, -156.3051)
Retrieved coordinates for port elizabeth: (-33.918, 25.5701)
Retrieved coordinates for christchurch: (-43.5333, 172.6333)
Retriev

In [141]:
print(valid_cities)

{'tiksi': {'city': 'tiksi', 'lat': 71.6872, 'lon': 128.8694, 'country': 'RU'}, 'waitangi': {'city': 'waitangi', 'lat': -43.9535, 'lon': -176.5597, 'country': 'NZ'}, 'thompson': {'city': 'thompson', 'lat': 55.7435, 'lon': -97.8558, 'country': 'CA'}, 'punta arenas': {'city': 'punta arenas', 'lat': -53.15, 'lon': -70.9167, 'country': 'CL'}, 'carnarvon': {'city': 'carnarvon', 'lat': -24.8667, 'lon': 113.6333, 'country': 'AU'}, 'bilibino': {'city': 'bilibino', 'lat': 68.0546, 'lon': 166.4372, 'country': 'RU'}, 'puerto natales': {'city': 'puerto natales', 'lat': -51.7236, 'lon': -72.4875, 'country': 'CL'}, 'kulhudhuffushi': {'city': 'kulhudhuffushi', 'lat': 6.6221, 'lon': 73.07, 'country': 'MV'}, 'albany': {'city': 'albany', 'lat': 42.6001, 'lon': -73.9662, 'country': 'US'}, 'ribeira grande': {'city': 'ribeira grande', 'lat': 38.5167, 'lon': -28.7, 'country': 'PT'}, 'touros': {'city': 'touros', 'lat': -5.1989, 'lon': -35.4608, 'country': 'BR'}, 'chonchi': {'city': 'chonchi', 'lat': -42.6128,

In [142]:
city_names = list(valid_cities.keys())
print(city_names)

['tiksi', 'waitangi', 'thompson', 'punta arenas', 'carnarvon', 'bilibino', 'puerto natales', 'kulhudhuffushi', 'albany', 'ribeira grande', 'touros', 'chonchi', 'port-aux-francais', 'avarua', 'hadibu', 'arraial do cabo', 'nguigmi', 'ushuaia', 'invercargill', 'tura', 'vernon', 'blackmans bay', 'malanje', 'bandarbeyla', 'iqaluit', 'mbaiki', 'narasannapeta', 'adamstown', 'baley', 'vilyuysk', 'taulaga', "vrangel'", 'san fernando del valle de catamarca', 'holualoa', 'grytviken', 'beau vallon', 'papatowai', 'polyarnyy', 'rio grande', 'west island', 'menongue', 'margaret river', 'utrik', 'harper', 'mar del plata', 'yatou', 'sitka', 'hermanus', 'kodiak', 'petropavlovsk-kamchatsky', 'happy valley-goose bay', 'thunder bay', 'bredasdorp', 'aasiaat', 'basco', 'sao joao da barra', 'aktau', 'xinyuan', 'talnakh', 'chipata', 'hasaki', 'ilulissat', 'puerto ayora', 'geraldton', 'humaita', 'isla aguada', 'safford', 'skeldon', 'ust-nera', 'abong mbang', 'yellowknife', 'olonkinbyen', 'uturoa', 'quimili', 'g

In [144]:
# List to store air quality data
air_quality_data = []

# Loop through each city to get pollution data
for city, info in valid_cities.items():
    lat = info['lat']
    lon = info['lon']

    try:
        # Query the Air Pollution API
        pollution_url = f"{air_quality_url}?lat={lat}&lon={lon}&appid={weather_api_key}"
        pollution_response = requests.get(pollution_url).json()

        # Extract pollution components
        components = pollution_response['list'][0]['components']
        pm25 = components.get('pm2_5', None)
        pm10 = components.get('pm10', None)
        no2 = components.get('no2', None)
        o3 = components.get('o3', None)
        so2 = components.get('so2', None)
        co = components.get('co', None)
        nh3 = components.get('nh3', None)

        # Store the pollution data
        air_quality_data.append({
            'City': city,
            'Country': info['country'],
            'PM2.5': pm25,
            'PM10': pm10,
            'NO2': no2,
            'O3': o3,
            'SO2': so2,
            'CO': co,
            'NH3': nh3
        })

        print(f"Retrieved pollution data for {city}, {info['country']}")

    except Exception as e:
        print(f"Error retrieving data for {city}: {e}")

    # Pause to avoid rate limiting
    time.sleep(1)

# Convert the collected data into a DataFrame
air_quality_df = pd.DataFrame(air_quality_data)
print(air_quality_df.head())


Retrieved pollution data for tiksi, RU, PM2.5: 0.5
Retrieved pollution data for waitangi, NZ, PM2.5: 0.83
Retrieved pollution data for thompson, CA, PM2.5: 0.5
Retrieved pollution data for punta arenas, CL, PM2.5: 0.5
Retrieved pollution data for carnarvon, AU, PM2.5: 0.72
Retrieved pollution data for bilibino, RU, PM2.5: 0.5
Retrieved pollution data for puerto natales, CL, PM2.5: 0.5
Retrieved pollution data for kulhudhuffushi, MV, PM2.5: 0.89
Retrieved pollution data for albany, US, PM2.5: 0.5
Retrieved pollution data for ribeira grande, PT, PM2.5: 3.32
Retrieved pollution data for touros, BR, PM2.5: 1.58
Retrieved pollution data for chonchi, CL, PM2.5: 0.92
Retrieved pollution data for port-aux-francais, TF, PM2.5: 0.5
Retrieved pollution data for avarua, CK, PM2.5: 1.56
Retrieved pollution data for hadibu, YE, PM2.5: 7.94
Retrieved pollution data for arraial do cabo, BR, PM2.5: 2.51
Retrieved pollution data for nguigmi, NE, PM2.5: 56.58
Retrieved pollution data for ushuaia, AR, PM2

In [145]:
# Create a dictionary to store city-country pairs
city_country_map = {}

for city in city_names:
    try:
        query_url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={weather_api_key}"
        response = requests.get(query_url).json()
        
        # Store the city and corresponding country code
        country = response['sys']['country']
        city_country_map[city] = country

        # Print city and country
        print(f"City: {city}, Country: {country}")

    except KeyError as key_err:
        print(f"KeyError: {key_err} for {city}. Skipping...")
    except requests.exceptions.RequestException as req_err:
        print(f"Request error: {req_err} for {city}. Skipping...")

    # Pause to avoid rate limiting
    time.sleep(1)

print(city_country_map) 


City: tiksi, Country: RU
City: waitangi, Country: NZ
City: thompson, Country: CA
City: punta arenas, Country: CL
City: carnarvon, Country: AU
City: bilibino, Country: RU
City: puerto natales, Country: CL
City: kulhudhuffushi, Country: MV
City: albany, Country: US
City: ribeira grande, Country: PT
City: touros, Country: BR
City: chonchi, Country: CL
City: port-aux-francais, Country: TF
City: avarua, Country: CK
City: hadibu, Country: YE
City: arraial do cabo, Country: BR
City: nguigmi, Country: NE
City: ushuaia, Country: AR
City: invercargill, Country: NZ
City: tura, Country: IN
City: vernon, Country: CA
City: blackmans bay, Country: AU
City: malanje, Country: AO
City: bandarbeyla, Country: SO
City: iqaluit, Country: CA
City: mbaiki, Country: CF
City: narasannapeta, Country: IN
City: adamstown, Country: PN
City: baley, Country: RU
City: vilyuysk, Country: RU
City: taulaga, Country: AS
City: vrangel', Country: RU
City: san fernando del valle de catamarca, Country: AR
City: holualoa, Coun

In [147]:
air_quality_df.to_csv('Resources/air_quality_data.csv', index=False)

In [148]:
print(f"Total cities: {len(city_country_map)}")

Total cities: 421


In [149]:
data = list(city_country_map.items())

# Create the DataFrame
df = pd.DataFrame(data, columns=['City', 'Country'])

# Save the DataFrame to a CSV file
df.to_csv('Resources/city_country_map.csv', index=False, encoding='utf-8')

In [150]:
# Get all the unique countries from the city-country map
unique_countries = set(city_country_map.values())

# Print the count and the list of unique countries
print(f"Total unique countries: {len(unique_countries)}")
print(unique_countries)


Total unique countries: 117
{'RE', 'DE', 'IR', 'SJ', 'OM', 'MX', 'CN', 'PT', 'LR', 'ID', 'ZA', 'SN', 'EG', 'UZ', 'AS', 'PY', 'GE', 'BW', 'MM', 'CF', 'PE', 'CU', 'ET', 'PA', 'IS', 'IE', 'NA', 'PG', 'WS', 'TH', 'KG', 'EC', 'KZ', 'ST', 'NO', 'BR', 'BO', 'AF', 'JM', 'FR', 'MU', 'GS', 'SB', 'TM', 'GY', 'US', 'AO', 'CD', 'KR', 'KI', 'CI', 'GB', 'AR', 'RO', 'TD', 'PF', 'SD', 'CA', 'TC', 'VN', 'SO', 'BF', 'CM', 'MR', 'ZM', 'MA', 'CL', 'HN', 'SE', 'JP', 'MH', 'VE', 'IN', 'LY', 'RU', 'SH', 'TR', 'TK', 'FI', 'BI', 'AU', 'GU', 'SV', 'MY', 'KE', 'UA', 'DZ', 'LV', 'CK', 'FJ', 'MV', 'GR', 'CO', 'MN', 'IT', 'TF', 'GL', 'VU', 'FO', 'UY', 'WF', 'TZ', 'CC', 'MW', 'GH', 'CX', 'NE', 'YE', 'BD', 'MP', 'GF', 'PN', 'MG', 'NC', 'PH', 'FM', 'NZ'}


In [83]:
import pycountry

# WHO API: Get all available countries and their alpha-3 codes
who_url = "https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues"
who_response = requests.get(who_url).json()

# Convert WHO countries from alpha-3 to alpha-2 codes to be matched withOpenWeather Country Code
who_countries_alpha2 = {}

for entry in who_response['value']:
    alpha_3 = entry['Code']
    country_name = entry['Title']

    # Convert alpha-3 to alpha-2 code using pycountry
    try:
        alpha_2 = pycountry.countries.get(alpha_3=alpha_3).alpha_2
        who_countries_alpha2[alpha_2] = country_name
    except AttributeError:
        print(f"Skipping {alpha_3} - No matching alpha-2 code found.")

print(f"WHO Countries (alpha-2): {who_countries_alpha2}")

who_country_codes = who_countries_alpha2.keys()
print(f"WHO Country Codes: {list(who_country_codes)}")


Skipping CHI - No matching alpha-2 code found.
Skipping ME1 - No matching alpha-2 code found.
Skipping SDN736 - No matching alpha-2 code found.
Skipping XKX - No matching alpha-2 code found.
WHO Countries (alpha-2): {'AW': 'Aruba', 'AF': 'Afghanistan', 'AO': 'Angola', 'AI': 'Anguilla', 'AL': 'Albania', 'AD': 'Andorra', 'AE': 'United Arab Emirates', 'AR': 'Argentina', 'AM': 'Armenia', 'AS': 'American Samoa', 'AG': 'Antigua and Barbuda', 'AU': 'Australia', 'AT': 'Austria', 'AZ': 'Azerbaijan', 'BI': 'Burundi', 'BE': 'Belgium', 'BJ': 'Benin', 'BQ': 'Bonaire, Saint Eustatius and Saba', 'BF': 'Burkina Faso', 'BD': 'Bangladesh', 'BG': 'Bulgaria', 'BH': 'Bahrain', 'BS': 'Bahamas', 'BA': 'Bosnia and Herzegovina', 'BY': 'Belarus', 'BZ': 'Belize', 'BM': 'Bermuda', 'BO': 'Bolivia (Plurinational State of)', 'BR': 'Brazil', 'BB': 'Barbados', 'BN': 'Brunei Darussalam', 'BT': 'Bhutan', 'BW': 'Botswana', 'CF': 'Central African Republic', 'CA': 'Canada', 'CH': 'Switzerland', 'CL': 'Chile', 'CN': 'China'

In [151]:
# Filter cities where the country code exists in the WHO data
filtered_cities = {
    city: code for city, code in city_country_map.items() if code in who_country_codes
}

print(f"Cities with matching WHO data: {len(filtered_cities)}")
print(filtered_cities)


Cities with matching WHO data: 414
{'tiksi': 'RU', 'waitangi': 'NZ', 'thompson': 'CA', 'punta arenas': 'CL', 'carnarvon': 'AU', 'bilibino': 'RU', 'puerto natales': 'CL', 'kulhudhuffushi': 'MV', 'albany': 'US', 'ribeira grande': 'PT', 'touros': 'BR', 'chonchi': 'CL', 'avarua': 'CK', 'hadibu': 'YE', 'arraial do cabo': 'BR', 'nguigmi': 'NE', 'ushuaia': 'AR', 'invercargill': 'NZ', 'tura': 'IN', 'vernon': 'CA', 'blackmans bay': 'AU', 'malanje': 'AO', 'bandarbeyla': 'SO', 'iqaluit': 'CA', 'mbaiki': 'CF', 'narasannapeta': 'IN', 'baley': 'RU', 'vilyuysk': 'RU', 'taulaga': 'AS', "vrangel'": 'RU', 'san fernando del valle de catamarca': 'AR', 'holualoa': 'US', 'beau vallon': 'MU', 'papatowai': 'NZ', 'polyarnyy': 'RU', 'rio grande': 'BR', 'menongue': 'AO', 'margaret river': 'AU', 'utrik': 'MH', 'harper': 'LR', 'mar del plata': 'AR', 'yatou': 'CN', 'sitka': 'US', 'hermanus': 'ZA', 'kodiak': 'US', 'petropavlovsk-kamchatsky': 'RU', 'happy valley-goose bay': 'CA', 'thunder bay': 'CA', 'bredasdorp': 'Z

In [179]:
# WHO API: Air pollution attributable DALYs
who_url = "https://ghoapi.azureedge.net/api/AIR_42"
response = requests.get(who_url)

# Check for valid response
if response.status_code == 200:
    data = response.json()['value']
    # Parse the data into a DataFrame
    who_data = [
        {
            'Country': entry['SpatialDim'],
            'Year': entry['TimeDim'],
            'MortalityRate_per_100k': entry['NumericValue']
        }
        for entry in data
    ]
    
    df_who = pd.DataFrame(who_data)
    print(df_who.head())
    
    df_who.to_csv('Resources/who_air_pollution_dalys.csv', index=False)
else:
    print(f"Failed to retrieve data. Status code: {response.status_code}")


  Country  Year  MortalityRate_per_100k
0     UGA  2016                   5.384
1     BIH  2017                   1.519
2     TCD  2012                 113.317
3     SVK  2016                   4.787
4     CIV  2010                  43.841


In [180]:
# Function to convert ISO-3 to ISO-2
def iso3_to_iso2(iso3_code):
    try:
        return pycountry.countries.get(alpha_3=iso3_code).alpha_2
    except AttributeError:
        return None  

df_who = pd.read_csv('Resources/who_air_pollution_dalys.csv')

# Convert WHO's ISO-3 codes to ISO-2
df_who['Country_ISO2'] = df_who['Country'].apply(iso3_to_iso2)

df_who_clean = df_who.dropna(subset=['Country_ISO2'])

df_weather = pd.read_csv('Resources/city_country_map.csv')

merged_df = pd.merge(air_quality_df, df_who_clean, left_on='Country', right_on='Country_ISO2', how='inner')

print(merged_df.head())

merged_df.to_csv('Resources/merged_air_quality_health.csv', index=False)


    City Country_x  PM2.5  PM10   NO2     O3   SO2      CO  NH3 Country_y  \
0  tiksi        RU    0.5  1.78  0.18  58.65  0.06  243.66  0.0       RUS   
1  tiksi        RU    0.5  1.78  0.18  58.65  0.06  243.66  0.0       RUS   
2  tiksi        RU    0.5  1.78  0.18  58.65  0.06  243.66  0.0       RUS   
3  tiksi        RU    0.5  1.78  0.18  58.65  0.06  243.66  0.0       RUS   
4  tiksi        RU    0.5  1.78  0.18  58.65  0.06  243.66  0.0       RUS   

   Year  MortalityRate_per_100k Country_ISO2  
0  2011                   0.289           RU  
1  2014                  11.601           RU  
2  2012                   1.040           RU  
3  2014                  54.595           RU  
4  2014                   3.638           RU  


In [181]:
count = len(merged_df)
count

70020

In [182]:
# Drop redundant columns (dropping 'Country_ISO2' and 'Country_y')
merged_df = merged_df.drop(['Country_ISO2', 'Country_y'], axis=1)

# Rename columns for clarity
merged_df = merged_df.rename(columns={'Country_x': 'Country'})

In [183]:
merged_df.to_csv('Resources/merged_air_quality_health.csv', index=False)

In [184]:
# Load the merged dataset
df = pd.read_csv('Resources/merged_air_quality_health.csv')

# Group by City and Country to calculate average values across all years
grouped_df = df.groupby(['City', 'Country'], as_index=False).agg({
    'PM2.5': 'mean',
    'PM10': 'mean',
    'NO2': 'mean',
    'O3': 'mean',
    'SO2': 'mean',
    'CO': 'mean',
    'NH3': 'mean',
    'MortalityRate_per_100k': 'mean'
})

grouped_df = grouped_df.round(1)

grouped_df.to_csv('Resources/average_air_quality_health.csv', index=False)

print(grouped_df.head(10))



          City Country  PM2.5   PM10  NO2    O3  SO2     CO  NH3  \
0  abong mbang      CM    0.6    0.7  0.0   1.6  0.0  260.4  0.0   
1       acarau      BR    3.2   15.2  0.2  37.9  0.2  390.5  0.0   
2    ad dindar      SD   16.7   42.7  0.3  39.7  0.3  201.9  0.8   
3        aioun      MR   49.0  227.2  0.1  34.0  0.0  210.3  0.2   
4        aketi      CD    0.5    0.7  0.1   0.0  0.0  170.2  0.0   
5        aktau      KZ   17.5   90.2  2.5  67.2  4.5  227.0  0.1   
6     akureyri      IS    0.6    1.7  1.2  32.9  0.4  220.3  0.6   
7    al bawiti      EG   18.5   84.1  1.0  68.7  0.2  200.3  0.0   
8     alaghsas      NE   64.2  266.0  0.1  46.5  0.0  176.9  0.0   
9       albany      US    0.5    0.6  2.5  79.4  0.1  217.0  0.2   

   MortalityRate_per_100k  
0                    38.3  
1                     9.6  
2                    24.8  
3                    21.3  
4                    26.2  
5                    30.5  
6                     3.6  
7                    36.2  