In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

In [2]:
# Generate random latitude and longitude
lats = np.random.uniform(-90, 90, size=1000)
lngs = np.random.uniform(-180, 180, size=1000)


cities = []

# Use citipy to determine the nearest city for each lat-lng combination
for lat, lng in zip(lats, lngs):
    city = citipy.nearest_city(lat, lng).city_name
    if city not in cities:
        cities.append(city)

print(f"Generated {len(cities)} unique cities.")

Generated 463 unique cities.


In [51]:
# Base URL for OpenWeatherMap
air_quality_url = "http://api.openweathermap.org/data/2.5/air_pollution"

valid_cities = []

for city in cities:
    try:
        # Fetch city coordinates using the weather endpoint
        query_url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={weather_api_key}"
        response = requests.get(query_url)

        # Check if the request was successful
        if response.status_code != 200:
            print(f"City {city} not found (Status Code: {response.status_code}). Skipping...")
            continue

        # Parse JSON response
        data = response.json()
        lat = data['coord']['lat']
        lon = data['coord']['lon']
        country = data['sys']['country']
        # Make an API call to the Air Pollution endpoint
        pollution_response = requests.get(
            f"{air_quality_url}?lat={lat}&lon={lon}&appid={weather_api_key}"
        )

        # Append a dictionary with detailed information


        # Check pollution API response
        if pollution_response.status_code == 200:
            valid_cities.append({
                'City': city,
                'Latitude': lat,
                'Longitude': lon,
                'Country': country
            })
        else:
            print(f"Air pollution data not found for {city}. Skipping...")

    except KeyError as key_err:
        print(f"KeyError: {key_err} for {city}. Skipping...")
    except requests.exceptions.RequestException as req_err:
        print(f"Request error: {req_err} for {city}. Skipping...")

    # Pause to avoid rate limiting
    time.sleep(1)

print(f"Valid cities with air quality data: {len(valid_cities)}")


City cable beach not found (Status Code: 404). Skipping...
City nkurenkuru not found (Status Code: 404). Skipping...
City taiohae not found (Status Code: 404). Skipping...
City hayma' not found (Status Code: 404). Skipping...
City tranovaho not found (Status Code: 404). Skipping...
City nalerigu not found (Status Code: 404). Skipping...
City fuvahmulah not found (Status Code: 404). Skipping...
City ain beni mathar not found (Status Code: 404). Skipping...
City kulia village not found (Status Code: 404). Skipping...
City 'ohonua not found (Status Code: 404). Skipping...
City fonadhoo not found (Status Code: 404). Skipping...
City ho not found (Status Code: 404). Skipping...
City ahau not found (Status Code: 404). Skipping...
City puerto san carlos not found (Status Code: 404). Skipping...
City gueltat zemmour not found (Status Code: 404). Skipping...
City kataragama not found (Status Code: 404). Skipping...
City devinuwara not found (Status Code: 404). Skipping...
City satupa'itea not f

In [71]:
print(valid_cities)

[{'City': 'san vincenzo', 'Latitude': 43.0928, 'Longitude': 10.5408, 'Country': 'IT'}, {'City': 'san patricio', 'Latitude': 28.017, 'Longitude': -97.5169, 'Country': 'US'}, {'City': 'beira', 'Latitude': -19.8436, 'Longitude': 34.8389, 'Country': 'MZ'}, {'City': 'yeppoon', 'Latitude': -23.1333, 'Longitude': 150.7333, 'Country': 'AU'}, {'City': 'anak', 'Latitude': 38.5108, 'Longitude': 125.4942, 'Country': 'KP'}, {'City': 'constantia', 'Latitude': 44.1833, 'Longitude': 28.65, 'Country': 'RO'}, {'City': 'kodiak', 'Latitude': 57.79, 'Longitude': -152.4072, 'Country': 'US'}, {'City': 'guerrero negro', 'Latitude': 27.9769, 'Longitude': -114.0611, 'Country': 'MX'}, {'City': 'tiksi', 'Latitude': 71.6872, 'Longitude': 128.8694, 'Country': 'RU'}, {'City': 'margaret river', 'Latitude': -33.95, 'Longitude': 115.0667, 'Country': 'AU'}, {'City': 'port-aux-francais', 'Latitude': -49.35, 'Longitude': 70.2167, 'Country': 'TF'}, {'City': 'bilibino', 'Latitude': 68.0546, 'Longitude': 166.4372, 'Country':

In [72]:
# Create a dictionary to store city-country pairs
city_country_map = {}

# Use existing data from valid_cities
for city_info in valid_cities:
    try:
        # Extract the city name and country code from the existing data
        city = city_info['City']
        country = city_info['Country']

        # Store the city and corresponding country code
        city_country_map[city] = country

        # Print city and country
        print(f"City: {city}, Country: {country}")

    except KeyError as key_err:
        print(f"KeyError: {key_err} for {city_info}. Skipping...")

print(city_country_map)


City: san vincenzo, Country: IT
City: san patricio, Country: US
City: beira, Country: MZ
City: yeppoon, Country: AU
City: anak, Country: KP
City: constantia, Country: RO
City: kodiak, Country: US
City: guerrero negro, Country: MX
City: tiksi, Country: RU
City: margaret river, Country: AU
City: port-aux-francais, Country: TF
City: bilibino, Country: RU
City: taunggyi, Country: MM
City: aykhal, Country: RU
City: pacific grove, Country: US
City: inongo, Country: CD
City: college, Country: US
City: ushuaia, Country: AR
City: hawkesbury, Country: CA
City: knyaze-volkonskoye, Country: RU
City: selkirk, Country: CA
City: west end, Country: BS
City: ust-nera, Country: RU
City: eraan, Country: PH
City: puerto natales, Country: CL
City: punta arenas, Country: CL
City: san antonio de pale, Country: GQ
City: bethel, Country: US
City: santa rosa de copan, Country: HN
City: caleta de carquin, Country: PE
City: adamstown, Country: PN
City: haiku-pauwela, Country: US
City: olonkinbyen, Country: SJ
Cit

In [73]:
print(f"Total cities: {len(city_country_map)}")

Total cities: 445


In [74]:
data = list(city_country_map.items())

# Create the DataFrame
df = pd.DataFrame(data, columns=['City', 'Country'])

# Save the DataFrame to a CSV file
df.to_csv('Resources/city_country_map.csv', index=False, encoding='utf-8')

In [57]:
# Get all the unique countries from the city-country map
unique_countries = set(city_country_map.values())

# Print the count and the list of unique countries
print(f"Total unique countries: {len(unique_countries)}")
print(unique_countries)


Total unique countries: 124
{'PG', 'MN', 'MA', 'CL', 'PF', 'VI', 'IT', 'SS', 'GD', 'YT', 'CI', 'BS', 'EG', 'MU', 'KP', 'BW', 'TK', 'ES', 'MY', 'GY', 'GF', 'NU', 'OM', 'PT', 'BO', 'SJ', 'CD', 'ET', 'PH', 'LK', 'RU', 'NI', 'JM', 'NG', 'NE', 'AU', 'PN', 'GR', 'CX', 'UY', 'IN', 'UA', 'MH', 'TO', 'FJ', 'GS', 'NA', 'SE', 'AR', 'MR', 'SY', 'CK', 'SO', 'BB', 'NZ', 'ID', 'CO', 'ZA', 'CV', 'ZW', 'GL', 'DE', 'MX', 'PW', 'AO', 'SH', 'DZ', 'FR', 'MM', 'NC', 'CA', 'ZM', 'BT', 'TM', 'KR', 'RE', 'MZ', 'CR', 'JP', 'IR', 'TR', 'IS', 'GH', 'TC', 'RW', 'MG', 'MV', 'KZ', 'GW', 'CN', 'YE', 'PL', 'SL', 'WF', 'PE', 'ML', 'GQ', 'SA', 'IE', 'RO', 'TD', 'FO', 'LY', 'TF', 'BA', 'VU', 'SC', 'EC', 'CC', 'CF', 'GU', 'US', 'NP', 'MP', 'FM', 'PK', 'FI', 'HN', 'BR', 'NO', 'GB', 'TH', 'LR', 'KI'}


In [58]:
import pycountry

# WHO API: Get all available countries and their alpha-3 codes
who_url = "https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues"
who_response = requests.get(who_url).json()

# Convert WHO countries from alpha-3 to alpha-2 codes to be matched withOpenWeather Country Code
who_countries_alpha2 = {}

for entry in who_response['value']:
    alpha_3 = entry['Code']
    country_name = entry['Title']

    # Convert alpha-3 to alpha-2 code using pycountry
    try:
        alpha_2 = pycountry.countries.get(alpha_3=alpha_3).alpha_2
        who_countries_alpha2[alpha_2] = country_name
    except AttributeError:
        print(f"Skipping {alpha_3} - No matching alpha-2 code found.")

print(f"WHO Countries (alpha-2): {who_countries_alpha2}")

who_country_codes = who_countries_alpha2.keys()
print(f"WHO Country Codes: {list(who_country_codes)}")


Skipping CHI - No matching alpha-2 code found.
Skipping ME1 - No matching alpha-2 code found.
Skipping SDN736 - No matching alpha-2 code found.
Skipping XKX - No matching alpha-2 code found.
WHO Countries (alpha-2): {'AW': 'Aruba', 'AF': 'Afghanistan', 'AO': 'Angola', 'AI': 'Anguilla', 'AL': 'Albania', 'AD': 'Andorra', 'AE': 'United Arab Emirates', 'AR': 'Argentina', 'AM': 'Armenia', 'AS': 'American Samoa', 'AG': 'Antigua and Barbuda', 'AU': 'Australia', 'AT': 'Austria', 'AZ': 'Azerbaijan', 'BI': 'Burundi', 'BE': 'Belgium', 'BJ': 'Benin', 'BQ': 'Bonaire, Saint Eustatius and Saba', 'BF': 'Burkina Faso', 'BD': 'Bangladesh', 'BG': 'Bulgaria', 'BH': 'Bahrain', 'BS': 'Bahamas', 'BA': 'Bosnia and Herzegovina', 'BY': 'Belarus', 'BZ': 'Belize', 'BM': 'Bermuda', 'BO': 'Bolivia (Plurinational State of)', 'BR': 'Brazil', 'BB': 'Barbados', 'BN': 'Brunei Darussalam', 'BT': 'Bhutan', 'BW': 'Botswana', 'CF': 'Central African Republic', 'CA': 'Canada', 'CH': 'Switzerland', 'CL': 'Chile', 'CN': 'China'

In [66]:
# Filter cities where the country code exists in the WHO data
filtered_cities = {
    city: code for city, code in city_country_map.items() if code in who_country_codes
}

print(f"Cities with matching WHO data: {len(filtered_cities)}")
print(filtered_cities)


Cities with matching WHO data: 438
{'san vincenzo': 'IT', 'san patricio': 'US', 'beira': 'MZ', 'yeppoon': 'AU', 'anak': 'KP', 'constantia': 'RO', 'kodiak': 'US', 'guerrero negro': 'MX', 'tiksi': 'RU', 'margaret river': 'AU', 'bilibino': 'RU', 'taunggyi': 'MM', 'aykhal': 'RU', 'pacific grove': 'US', 'inongo': 'CD', 'college': 'US', 'ushuaia': 'AR', 'hawkesbury': 'CA', 'knyaze-volkonskoye': 'RU', 'selkirk': 'CA', 'west end': 'BS', 'ust-nera': 'RU', 'eraan': 'PH', 'puerto natales': 'CL', 'punta arenas': 'CL', 'san antonio de pale': 'GQ', 'bethel': 'US', 'santa rosa de copan': 'HN', 'caleta de carquin': 'PE', 'haiku-pauwela': 'US', 'puerto ayora': 'EC', "podporozh'ye": 'RU', 'massenya': 'TD', 'gwadar': 'PK', 'blackmans bay': 'AU', 'invercargill': 'NZ', 'papatowai': 'NZ', 'hermanus': 'ZA', 'qaqortoq': 'GL', 'port moresby': 'PG', 'port elizabeth': 'ZA', 'thompson': 'CA', 'lapy': 'PL', 'galveston': 'US', 'ponta delgada': 'PT', 'carloforte': 'IT', "st. john's": 'CA', 'nagrota': 'IN', 'hamilton

In [70]:
# Store air quality data for cities
air_quality_data = []

# Loop through each city to get pollution data
for city_info in filtered_cities:
    city = city_info['City']
    country = city_info['Country']
    lat = city_info['Latitude']
    lon = city_info['Longitude']
    
    try:
        # Query the Air Pollution API using the city's coordinates
        pollution_url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={weather_api_key}"
        pollution_response = requests.get(pollution_url).json()

        # Check if the response contains expected data
        if 'list' in pollution_response and pollution_response['list']:
            # Extract all pollution components
            components = pollution_response['list'][0].get('components', {})
            pm25 = components.get('pm2_5', None)
            pm10 = components.get('pm10', None)
            no2 = components.get('no2', None)
            o3 = components.get('o3', None)
            so2 = components.get('so2', None)
            co = components.get('co', None)
            nh3 = components.get('nh3', None)

            # Store the data for each city
            air_quality_data.append({
                'City': city,
                'Country': country,
                'PM2.5': pm25,
                'PM10': pm10,
                'NO2': no2,
                'O3': o3,
                'SO2': so2,
                'CO': co,
                'NH3': nh3
            })

            print(f"Retrieved pollution data for {city}, {country}")
        else:
            print(f"No data found for {city}, {country}")

    except Exception as e:
        print(f"Error retrieving data for {city}: {e}")

    # Pause to avoid API rate limits
    time.sleep(1)

# Convert the collected data into a DataFrame
air_quality_df = pd.DataFrame(air_quality_data)
print(air_quality_df.head())


TypeError: string indices must be integers

In [62]:
san_vincenzo_data = air_quality_df[air_quality_df['City'].str.lower() == 'san vincenzo']
for index, row in san_vincenzo_data.iterrows():
    lat = row.get('Latitude')
    lon = row.get('Longitude')
    print(f"{row['City']} (Lat: {lat}, Lon: {lon}): PM2.5 = {row['PM2.5']}, PM10 = {row['PM10']}")


san vincenzo (Lat: None, Lon: None): PM2.5 = 0.5, PM10 = 1.52


In [88]:
air_quality_df.to_csv('Resources/air_quality_data.csv', index=False)

In [125]:
# WHO API: Air pollution attributable DALYs
who_url = "https://ghoapi.azureedge.net/api/AIR_35"
response = requests.get(who_url)

# Check for valid response
if response.status_code == 200:
    data = response.json()['value']
    # Parse the data into a DataFrame
    who_data = [
        {
            'Country': entry['SpatialDim'],
            'Year': entry['TimeDim'],
            'DALYs_per_100k': entry['NumericValue']
        }
        for entry in data
    ]
    
    df_who = pd.DataFrame(who_data)
    print(df_who.head())
    
    df_who.to_csv('Resources/who_air_pollution_dalys.csv', index=False)
else:
    print(f"Failed to retrieve data. Status code: {response.status_code}")


  Country  Year  DALYs_per_100k
0     BTN  2012         289.341
1     COG  2018        2063.071
2     KGZ  2017         175.166
3     CHN  2018       44691.668
4     PAN  2011          25.873


In [126]:
import pycountry

# Function to convert ISO-3 to ISO-2
def iso3_to_iso2(iso3_code):
    try:
        return pycountry.countries.get(alpha_3=iso3_code).alpha_2
    except AttributeError:
        return None  

df_who = pd.read_csv('Resources/who_air_pollution_dalys.csv')

# Convert WHO's ISO-3 codes to ISO-2
df_who['Country_ISO2'] = df_who['Country'].apply(iso3_to_iso2)

df_who_clean = df_who.dropna(subset=['Country_ISO2'])

df_weather = pd.read_csv('Resources/city_country_map.csv')

merged_df = pd.merge(air_quality_df, df_who_clean, left_on='Country', right_on='Country_ISO2', how='inner')

print(merged_df.head())

merged_df.to_csv('Resources/merged_air_quality_health.csv', index=False)


    City Country_x  PM2.5  PM10   NO2     O3   SO2     CO   NH3 Country_y  \
0  tiksi        RU    1.7  5.99  0.62  98.71  0.38  198.6  0.17       RUS   
1  tiksi        RU    1.7  5.99  0.62  98.71  0.38  198.6  0.17       RUS   
2  tiksi        RU    1.7  5.99  0.62  98.71  0.38  198.6  0.17       RUS   
3  tiksi        RU    1.7  5.99  0.62  98.71  0.38  198.6  0.17       RUS   
4  tiksi        RU    1.7  5.99  0.62  98.71  0.38  198.6  0.17       RUS   

   Year  DALYs_per_100k Country_ISO2  
0  2010      187804.732           RU  
1  2011      117145.137           RU  
2  2010        4091.222           RU  
3  2010       99488.703           RU  
4  2017       88567.167           RU  


In [127]:
count = len(merged_df)
count

69480

In [128]:
# Drop redundant columns (dropping 'Country_ISO2' and 'Country_y')
merged_df = merged_df.drop(['Country_ISO2', 'Country_y'], axis=1)

# Rename columns for clarity
merged_df = merged_df.rename(columns={'Country_x': 'Country'})

In [129]:
merged_df.to_csv('Resources/merged_air_quality_health.csv', index=False)

In [131]:
# Load the merged dataset
df = pd.read_csv('Resources/merged_air_quality_health.csv')

# Group by City and Country to calculate average values across all years
grouped_df = df.groupby(['City', 'Country'], as_index=False).agg({
    'PM2.5': 'mean',
    'PM10': 'mean',
    'NO2': 'mean',
    'O3': 'mean',
    'SO2': 'mean',
    'CO': 'mean',
    'NH3': 'mean',
    'DALYs_per_100k': 'mean'
})

grouped_df['DALYs_per_100k'] = grouped_df['DALYs_per_100k'].round(1)

grouped_df.to_csv('Resources/average_air_quality_health.csv', index=False)

print(grouped_df.head(10))



          City Country  PM2.5  PM10   NO2     O3   SO2     CO   NH3  \
0  abong mbang      CM    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
1       acarau      BR    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
2    ad dindar      SD    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
3        aioun      MR    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
4        aketi      CD    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
5        aktau      KZ    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
6     akureyri      IS    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
7    al bawiti      EG    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
8     alaghsas      NE    1.7  5.99  0.62  98.71  0.38  198.6  0.17   
9       albany      US    1.7  5.99  0.62  98.71  0.38  198.6  0.17   

   DALYs_per_100k  
0          5871.4  
1         15847.8  
2          7159.5  
3           627.5  
4         20697.8  
5          3645.7  
6            15.0  
7         14541.2  
8          5487.8  
9         22347.7 