## 💡 Import Libraries

In [16]:
import requests
import config
import time
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import openmeteo_requests
from retry_requests import retry
import requests_cache


pd.set_option('display.max_columns', None)

## 📊 Getting the Data 

### 🗺️ Parishes + Municipalities + Districts

#### Scrape Lisbon's Parishes

In [3]:
url = "https://pt.wikipedia.org/wiki/Lista_de_freguesias_de_Lisboa"

response = requests.get(url)
response

<Response [200]>

In [4]:
# Scrape wiki's table with Lisbon's Parishes
wiki_table = pd.read_html(url)
parish_table = wiki_table[0]
lisbon_parishes = parish_table.iloc[:, 2]
lisbon_parishes

0                           Ajuda (Ocidental)
1                       Alcântara (Ocidental)
2                           Alvalade (Centro)
3                           Alvalade (Centro)
4                           Alvalade (Centro)
5                            Areeiro (Centro)
6                            Areeiro (Centro)
7                            Arroios (Centro)
8                            Arroios (Centro)
9                            Arroios (Centro)
10                    Avenidas Novas (Centro)
11                    Avenidas Novas (Centro)
12                           Beato (Oriental)
13                          Belém (Ocidental)
14                          Belém (Ocidental)
15                            Benfica (Norte)
16        Campo de Ourique (Centro Historico)
17        Campo de Ourique (Centro Historico)
18                         Campolide (Centro)
19                            Carnide (Norte)
20                 Estrela (Centro Histórico)
21                 Estrela (Centro

In [5]:
# Dropping duplicates as the Pandas Series returns duplicated values 
lisbon_parishes.drop_duplicates(inplace=True)

# Extracting only the Parish name and dropping the index
lisbon_parishes = lisbon_parishes.str.extract(r"^(.*?)(?:\s*[\(\[]|$)")[0]
lisbon_parishes.reset_index(drop=True, inplace=True)
lisbon_parishes


0                       Ajuda
1                   Alcântara
2                    Alvalade
3                     Areeiro
4                     Arroios
5              Avenidas Novas
6                       Beato
7                       Belém
8                     Benfica
9            Campo de Ourique
10                  Campolide
11                    Carnide
12                    Estrela
13                     Lumiar
14                    Marvila
15               Misericórdia
16                    Olivais
17          Parque das Nações
18            Penha de França
19                Santa Clara
20          Santa Maria Maior
21              Santo António
22    São Domingos de Benfica
23                São Vicente
Name: 0, dtype: object

#### Fetching the coordinates for each Parish

In [6]:
parish_coords = {}  # Dictionary to store coordinates
geolocator = Nominatim(user_agent="parishes", timeout=10)

for parish in lisbon_parishes:
    location = geolocator.geocode(f"{parish}, Lisboa, Portugal")
    if location:
        parish_coords[parish] = (location.latitude, location.longitude)

In [7]:
parish_coords

{'Ajuda': (38.7046757, -9.1996035),
 'Alcântara': (38.7038836, -9.1823883),
 'Alvalade': (38.7473212, -9.1395885),
 'Areeiro': (38.74225775, -9.13348308807447),
 'Arroios': (38.731258600000004, -9.139435800538248),
 'Avenidas Novas': (38.73643625, -9.149824916390864),
 'Beato': (38.7317082, -9.1074964),
 'Belém': (38.705541499999995, -9.21322651478078),
 'Benfica': (38.7481712, -9.1994775),
 'Campo de Ourique': (38.7153262, -9.1679353),
 'Campolide': (38.727895, -9.1646135),
 'Carnide': (38.7598932, -9.1896195),
 'Estrela': (38.7108223, -9.1596848),
 'Lumiar': (38.7685509, -9.1624841),
 'Marvila': (38.7460112, -9.1056191),
 'Misericórdia': (38.7114084, -9.1507477),
 'Olivais': (38.7683121, -9.1174618),
 'Parque das Nações': (38.7638712, -9.0953729),
 'Penha de França': (38.7261609, -9.1269126),
 'Santa Clara': (38.78605975, -9.153361346130197),
 'Santa Maria Maior': (38.7122372, -9.135738),
 'Santo António': (38.7220648, -9.1514289),
 'São Domingos de Benfica': (38.7466028, -9.1765868)

#### Scrape Municipalities

In [121]:
url = "https://pt.wikipedia.org/wiki/Distrito_de_Lisboa"

response = requests.get(url)
response

<Response [200]>

In [126]:
# Scrape wiki's table with Lisbon's Municipalities
wiki_table = pd.read_html(url)
mun_table = wiki_table[2]
lisbon_mun = mun_table.iloc[:, 1]
lisbon_mun

0                   Alenquer
1                    Amadora
2          Arruda dos Vinhos
3                   Azambuja
4                    Cadaval
5                    Cascais
6                     Lisboa
7                     Loures
8                   Lourinhã
9                      Mafra
10                  Odivelas
11                    Oeiras
12                    Sintra
13    Sobral de Monte Agraço
14             Torres Vedras
15       Vila Franca de Xira
Name: Município, dtype: object

#### Fetching the coordinates for each Municipality

In [127]:
mun_coords = {}  # Dictionary to store coordinates
geolocator = Nominatim(user_agent="mun", timeout=10)

for mun in lisbon_mun:
    location = geolocator.geocode(f"{mun}, Lisboa, Portugal")
    if location:
        mun_coords[mun] = (location.latitude, location.longitude)

In [128]:
mun_coords

{'Alenquer': (39.056633, -9.0076057),
 'Amadora': (38.758959, -9.2365233),
 'Arruda dos Vinhos': (38.98303, -9.07763),
 'Azambuja': (39.0689884, -8.8684921),
 'Cadaval': (39.242859, -9.1026628),
 'Cascais': (38.6968919, -9.4204495),
 'Lisboa': (38.7077507, -9.1365919),
 'Loures': (38.8308741, -9.1684512),
 'Lourinhã': (39.2433932, -9.3119326),
 'Mafra': (38.9369782, -9.3282374),
 'Odivelas': (38.7904338, -9.1792617),
 'Oeiras': (38.6925777, -9.3123076),
 'Sintra': (38.79846, -9.3881),
 'Sobral de Monte Agraço': (39.01835, -9.15171),
 'Torres Vedras': (39.0930856, -9.260741),
 'Vila Franca de Xira': (38.91206415, -8.988923439153135)}

#### Scrape Districts

In [8]:
url = r"https://pt.wikipedia.org/wiki/Lista_de_distritos_e_regi%C3%B5es_aut%C3%B3nomas_de_Portugal_ordenados_por_%C3%A1rea"

response = requests.get(url)
response

<Response [200]>

In [9]:
# Scrape wiki's table with Lisbon's Parishes
wiki_table = pd.read_html(url)
parish_table = wiki_table[1]
districts = parish_table.iloc[:, 1]
districts 

0                 Beja
1                Évora
2             Santarém
3       Castelo Branco
4             Bragança
5           Portalegre
6               Guarda
7              Setúbal
8                Viseu
9                 Faro
10           Vila Real
11             Coimbra
12              Leiria
13              Aveiro
14              Lisboa
15               Braga
16               Porto
17              Açores
18    Viana do Castelo
19             Madeira
20            PORTUGAL
Name: Distrito / região autónoma, dtype: object

In [None]:
# Removing Portugal since that's the country we're referring to
districts = districts[~districts.isin(["PORTUGAL", "Madeira", "Açores"])]

In [82]:
# Dropping duplicates as the Pandas Series returns duplicated values 
districts.drop_duplicates(inplace=True)

# Extracting only the District name and dropping the index
districts = districts.str.extract(r"^(.*?)(?:\s*[\(\[]|$)")[0]
districts.reset_index(drop=True, inplace=True)
districts

0                 Beja
1                Évora
2             Santarém
3       Castelo Branco
4             Bragança
5           Portalegre
6               Guarda
7              Setúbal
8                Viseu
9                 Faro
10           Vila Real
11             Coimbra
12              Leiria
13              Aveiro
14              Lisboa
15               Braga
16               Porto
17    Viana do Castelo
Name: 0, dtype: object

#### Fetching the coordinates for each District

In [83]:
district_coords = {}  # Dictionary to store coordinates
geolocator = Nominatim(user_agent="districts", timeout=10)

for district in districts:
    location = geolocator.geocode(f"{district}, Portugal")
    if location:
        district_coords[district] = (location.latitude, location.longitude)

In [84]:
district_coords

{'Beja': (38.0154479, -7.8650368),
 'Évora': (38.5707742, -7.9092808),
 'Santarém': (39.2363637, -8.6867081),
 'Castelo Branco': (39.97675825, -7.446059929966704),
 'Bragança': (41.5084468, -6.773302360533066),
 'Portalegre': (39.2076447, -7.721513354015343),
 'Guarda': (40.7046066, -7.195139236071309),
 'Setúbal': (38.5241783, -8.8932341),
 'Viseu': (40.6574713, -7.9138664),
 'Faro': (37.0162727, -7.9351771),
 'Vila Real': (41.5229299, -7.5466312481173325),
 'Coimbra': (40.2111931, -8.4294632),
 'Leiria': (39.7437902, -8.8071119),
 'Aveiro': (40.640496, -8.6537841),
 'Lisboa': (38.7077507, -9.1365919),
 'Braga': (41.5510583, -8.4280045),
 'Porto': (41.1494512, -8.6107884),
 'Viana do Castelo': (41.694867, -8.831088)}

### 👥 Population per district (2023)

In [90]:
url = r"https://pt.wikipedia.org/wiki/Lista_de_munic%C3%ADpios_de_Portugal_por_popula%C3%A7%C3%A3o"

# Scrape wiki's table with population per district
wiki_table = pd.read_html(url)
population_table = wiki_table[0]
print(population_table.head())

   N.º  Município        Município.1  População (2023) Variação (2022-2023)  \
0    1        NaN             Lisboa           567.131               1,72 %   
1    2        NaN             Sintra           395.528               1,24 %   
2    3        NaN  Vila Nova de Gaia           311.223               0,81 %   
3    4        NaN              Porto           248.769               2,37 %   
4    5        NaN            Cascais           219.636               1,11 %   

  Distrito                   Sub-região         Região  
0   Lisboa                Grande Lisboa  Grande Lisboa  
1   Lisboa                Grande Lisboa  Grande Lisboa  
2    Porto  Área Metropolitana do Porto          Norte  
3    Porto  Área Metropolitana do Porto          Norte  
4   Lisboa                Grande Lisboa  Grande Lisboa  


In [None]:
# Filter the population results for each previously scraped district
population_table = population_table[population_table['Município.1'].isin(districts)].reset_index(drop=True)

In [None]:
# Dropping unecessary columns to reduce noise
population_table.drop(columns=['N.º', 'Município', 'Variação (2022-2023)', 'Distrito', 'Sub-região', 'Região'], inplace=True)

# Renaming the columns that I'll be using
population_table.rename(columns={'Município.1': 'district', 'População (2023)': 'population'}, inplace=True)

In [None]:
# Creating a list with dicts so there is a difference between latitude and longitude
locations = []

for district, coords in district_coords.items():
    loc = {"name": district, "latitude": coords[0], "longitude": coords[1]}
    locations.append(loc)

In [None]:
# Converting a Pandas Series into a dataframe with three cols
# There is a col for name (that I'm renaming below), a col for latitude and a col for longitude
locations_df = pd.DataFrame(locations)
locations_df.rename(columns={"name": "district"}, inplace=True)


In [None]:
# Merging the population table with the locations_df so each district has their coordinates + population
population_table = population_table.merge(locations_df, on="district", how="left")

In [113]:
population_table

Unnamed: 0,district,population,latitude,longitude
0,Lisboa,567.131,38.707751,-9.136592
1,Porto,248.769,41.149451,-8.610788
2,Braga,201.583,41.551058,-8.428005
3,Coimbra,144.822,40.211193,-8.429463
4,Leiria,133.795,39.74379,-8.807112
5,Setúbal,123.548,38.524178,-8.893234
6,Viseu,101.977,40.657471,-7.913866
7,Viana do Castelo,86.78,41.694867,-8.831088
8,Aveiro,86.037,40.640496,-8.653784
9,Faro,69.468,37.016273,-7.935177


In [114]:
population_table.to_csv('population_data.csv', index=False)

### 🌿 Air Quality 

#### Parishes

In [61]:
locations = []

for parish, coords in parish_coords.items():
    loc = {"name": parish, "latitude": coords[0], "longitude": coords[1]}
    locations.append(loc)

In [26]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 86400)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://air-quality-api.open-meteo.com/v1/air-quality"

all_locations = []

for loc in locations:
	params = {
		"latitude": loc["latitude"],
		"longitude": loc["longitude"],
		"hourly": ["dust", "uv_index", "uv_index_clear_sky", "alder_pollen", "birch_pollen", "grass_pollen", "mugwort_pollen", "olive_pollen", "ragweed_pollen", "ozone", "pm2_5", "pm10", "carbon_monoxide", "carbon_dioxide", "nitrogen_dioxide", "sulphur_dioxide", "european_aqi"],
		"timezone": "GMT",
		"start_date": "2023-01-01",
		"end_date": "2024-12-31"
	}
	responses = openmeteo.weather_api(url, params=params)


	# Process first location. Add a for-loop for multiple locations or weather models
	response = responses[0]
	print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
	print(f"Elevation {response.Elevation()} m asl")
	print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
	print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")


	# Process hourly data. The order of variables needs to be the same as requested.
	hourly = response.Hourly()
	hourly_dust = hourly.Variables(0).ValuesAsNumpy()
	hourly_uv_index = hourly.Variables(1).ValuesAsNumpy()
	hourly_uv_index_clear_sky = hourly.Variables(2).ValuesAsNumpy()
	hourly_alder_pollen = hourly.Variables(3).ValuesAsNumpy()
	hourly_birch_pollen = hourly.Variables(4).ValuesAsNumpy()
	hourly_grass_pollen = hourly.Variables(5).ValuesAsNumpy()
	hourly_mugwort_pollen = hourly.Variables(6).ValuesAsNumpy()
	hourly_olive_pollen = hourly.Variables(7).ValuesAsNumpy()
	hourly_ragweed_pollen = hourly.Variables(8).ValuesAsNumpy()
	hourly_ozone = hourly.Variables(9).ValuesAsNumpy()
	hourly_pm2_5 = hourly.Variables(10).ValuesAsNumpy()
	hourly_pm10 = hourly.Variables(11).ValuesAsNumpy()
	hourly_carbon_monoxide = hourly.Variables(12).ValuesAsNumpy()
	hourly_carbon_dioxide = hourly.Variables(13).ValuesAsNumpy()
	hourly_nitrogen_dioxide = hourly.Variables(14).ValuesAsNumpy()
	hourly_sulphur_dioxide = hourly.Variables(15).ValuesAsNumpy()
	hourly_european_aqi = hourly.Variables(16).ValuesAsNumpy()

	hourly_data = {"date": pd.date_range(
		start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
		end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
		freq = pd.Timedelta(seconds = hourly.Interval()),
		inclusive = "left"
		), 
		"location": [loc["name"]] * len(hourly_dust),
		"latitude": [loc["latitude"]] * len(hourly_dust),
		"longitude": [loc["longitude"]] * len(hourly_dust)
	}

	hourly_data["dust"] = hourly_dust
	hourly_data["uv_index"] = hourly_uv_index
	hourly_data["uv_index_clear_sky"] = hourly_uv_index_clear_sky
	hourly_data["alder_pollen"] = hourly_alder_pollen
	hourly_data["birch_pollen"] = hourly_birch_pollen
	hourly_data["grass_pollen"] = hourly_grass_pollen
	hourly_data["mugwort_pollen"] = hourly_mugwort_pollen
	hourly_data["olive_pollen"] = hourly_olive_pollen
	hourly_data["ragweed_pollen"] = hourly_ragweed_pollen
	hourly_data["ozone"] = hourly_ozone
	hourly_data["pm2_5"] = hourly_pm2_5
	hourly_data["pm10"] = hourly_pm10
	hourly_data["carbon_monoxide"] = hourly_carbon_monoxide
	hourly_data["carbon_dioxide"] = hourly_carbon_dioxide
	hourly_data["nitrogen_dioxide"] = hourly_nitrogen_dioxide
	hourly_data["sulphur_dioxide"] = hourly_sulphur_dioxide
	hourly_data["european_aqi"] = hourly_european_aqi

	hourly_dataframe = pd.DataFrame(data=hourly_data)
	all_locations.append(hourly_dataframe)
	
lis_air_df = pd.concat(all_locations, ignore_index=True)
lis_air_df.head()


Coordinates 38.70000076293945°N -9.199999809265137°E
Elevation 62.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.70000076293945°N -9.199999809265137°E
Elevation 32.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.70000076293945°N -9.09999942779541°E
Elevation 82.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.70000076293945°N -9.09999942779541°E
Elevation 85.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.70000076293945°N -9.09999942779541°E
Elevation 75.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.70000076293945°N -9.09999942779541°E
Elevation 88.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.70000076293945°N -9.09999942779541°E
Elevation 15.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.70000076293945°N -9.199999809265137°E
Elevation 85.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coord

Unnamed: 0,date,location,latitude,longitude,dust,uv_index,uv_index_clear_sky,alder_pollen,birch_pollen,grass_pollen,mugwort_pollen,olive_pollen,ragweed_pollen,ozone,pm2_5,pm10,carbon_monoxide,carbon_dioxide,nitrogen_dioxide,sulphur_dioxide,european_aqi
0,2023-01-01 00:00:00+00:00,Ajuda,38.704676,-9.199604,13.0,0.0,0.0,0.0,,,,0.0,,61.0,14.3,31.700001,157.0,,10.3,3.5,26.766666
1,2023-01-01 01:00:00+00:00,Ajuda,38.704676,-9.199604,13.0,0.0,0.0,0.0,,,,0.0,,63.0,17.4,33.599998,145.0,,10.3,3.5,27.141668
2,2023-01-01 02:00:00+00:00,Ajuda,38.704676,-9.199604,12.0,0.0,0.0,0.0,,,,0.0,,62.0,14.8,33.0,139.0,,9.9,3.5,27.700003
3,2023-01-01 03:00:00+00:00,Ajuda,38.704676,-9.199604,12.0,0.0,0.0,0.0,,,,0.0,,61.0,15.4,32.900002,152.0,,9.6,3.4,28.025
4,2023-01-01 04:00:00+00:00,Ajuda,38.704676,-9.199604,11.0,0.0,0.0,0.0,,,,0.0,,60.0,14.9,32.5,151.0,,8.3,3.3,28.391665


In [27]:
# Saving the data so I can work with it later
lis_air_df.to_csv('lis_air.csv', index=False)

#### Districts

In [49]:
locations = []

for district, coords in district_coords.items():
    loc = {"name": district, "latitude": coords[0], "longitude": coords[1]}
    locations.append(loc)

In [33]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 86400)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://air-quality-api.open-meteo.com/v1/air-quality"

all_locations = []

for loc in locations:
	params = {
		"latitude": loc["latitude"],
		"longitude": loc["longitude"],
		"hourly": ["dust", "uv_index", "uv_index_clear_sky", "alder_pollen", "birch_pollen", "grass_pollen", "mugwort_pollen", "olive_pollen", "ragweed_pollen", "ozone", "pm2_5", "pm10", "carbon_monoxide", "carbon_dioxide", "nitrogen_dioxide", "sulphur_dioxide", "european_aqi"],
		"timezone": "GMT",
		"start_date": "2023-01-01",
		"end_date": "2024-12-31"
	}
	responses = openmeteo.weather_api(url, params=params)


	# Process first location. Add a for-loop for multiple locations or weather models
	response = responses[0]
	print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
	print(f"Elevation {response.Elevation()} m asl")
	print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
	print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")


	# Process hourly data. The order of variables needs to be the same as requested.
	hourly = response.Hourly()
	hourly_dust = hourly.Variables(0).ValuesAsNumpy()
	hourly_uv_index = hourly.Variables(1).ValuesAsNumpy()
	hourly_uv_index_clear_sky = hourly.Variables(2).ValuesAsNumpy()
	hourly_alder_pollen = hourly.Variables(3).ValuesAsNumpy()
	hourly_birch_pollen = hourly.Variables(4).ValuesAsNumpy()
	hourly_grass_pollen = hourly.Variables(5).ValuesAsNumpy()
	hourly_mugwort_pollen = hourly.Variables(6).ValuesAsNumpy()
	hourly_olive_pollen = hourly.Variables(7).ValuesAsNumpy()
	hourly_ragweed_pollen = hourly.Variables(8).ValuesAsNumpy()
	hourly_ozone = hourly.Variables(9).ValuesAsNumpy()
	hourly_pm2_5 = hourly.Variables(10).ValuesAsNumpy()
	hourly_pm10 = hourly.Variables(11).ValuesAsNumpy()
	hourly_carbon_monoxide = hourly.Variables(12).ValuesAsNumpy()
	hourly_carbon_dioxide = hourly.Variables(13).ValuesAsNumpy()
	hourly_nitrogen_dioxide = hourly.Variables(14).ValuesAsNumpy()
	hourly_sulphur_dioxide = hourly.Variables(15).ValuesAsNumpy()
	hourly_european_aqi = hourly.Variables(16).ValuesAsNumpy()

	hourly_data = {"date": pd.date_range(
		start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
		end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
		freq = pd.Timedelta(seconds = hourly.Interval()),
		inclusive = "left"
		), 
		"location": [loc["name"]] * len(hourly_dust),
		"latitude": [loc["latitude"]] * len(hourly_dust),
		"longitude": [loc["longitude"]] * len(hourly_dust)
	}

	hourly_data["dust"] = hourly_dust
	hourly_data["uv_index"] = hourly_uv_index
	hourly_data["uv_index_clear_sky"] = hourly_uv_index_clear_sky
	hourly_data["alder_pollen"] = hourly_alder_pollen
	hourly_data["birch_pollen"] = hourly_birch_pollen
	hourly_data["grass_pollen"] = hourly_grass_pollen
	hourly_data["mugwort_pollen"] = hourly_mugwort_pollen
	hourly_data["olive_pollen"] = hourly_olive_pollen
	hourly_data["ragweed_pollen"] = hourly_ragweed_pollen
	hourly_data["ozone"] = hourly_ozone
	hourly_data["pm2_5"] = hourly_pm2_5
	hourly_data["pm10"] = hourly_pm10
	hourly_data["carbon_monoxide"] = hourly_carbon_monoxide
	hourly_data["carbon_dioxide"] = hourly_carbon_dioxide
	hourly_data["nitrogen_dioxide"] = hourly_nitrogen_dioxide
	hourly_data["sulphur_dioxide"] = hourly_sulphur_dioxide
	hourly_data["european_aqi"] = hourly_european_aqi

	hourly_dataframe = pd.DataFrame(data=hourly_data)
	all_locations.append(hourly_dataframe)
	
district_air_df = pd.concat(all_locations, ignore_index=True)
district_air_df.head()


Coordinates 38.0°N -7.899999618530273°E
Elevation 286.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.599998474121094°N -7.899999618530273°E
Elevation 297.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.20000076293945°N -8.69999885559082°E
Elevation 104.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 40.0°N -7.399999618530273°E
Elevation 386.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 41.5°N -6.799999237060547°E
Elevation 710.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.20000076293945°N -7.69999885559082°E
Elevation 205.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 40.70000076293945°N -7.19999885559082°E
Elevation 554.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.5°N -8.899999618530273°E
Elevation 7.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 40.70000076293945°N -7.8999996185302

Unnamed: 0,date,location,latitude,longitude,dust,uv_index,uv_index_clear_sky,alder_pollen,birch_pollen,grass_pollen,mugwort_pollen,olive_pollen,ragweed_pollen,ozone,pm2_5,pm10,carbon_monoxide,carbon_dioxide,nitrogen_dioxide,sulphur_dioxide,european_aqi
0,2023-01-01 00:00:00+00:00,Beja,38.015448,-7.865037,9.0,0.0,0.0,0.0,,,,0.0,,49.0,8.7,21.1,127.0,,4.3,4.0,20.449999
1,2023-01-01 01:00:00+00:00,Beja,38.015448,-7.865037,8.0,0.0,0.0,0.0,,,,0.0,,46.0,8.9,21.6,127.0,,4.1,4.0,20.637497
2,2023-01-01 02:00:00+00:00,Beja,38.015448,-7.865037,9.0,0.0,0.0,0.0,,,,0.0,,44.0,9.0,21.1,127.0,,4.6,3.7,20.795832
3,2023-01-01 03:00:00+00:00,Beja,38.015448,-7.865037,9.0,0.0,0.0,0.0,,,,0.0,,43.0,9.2,21.299999,128.0,,4.7,3.1,21.008335
4,2023-01-01 04:00:00+00:00,Beja,38.015448,-7.865037,8.0,0.0,0.0,0.0,,,,0.0,,42.0,8.5,20.6,134.0,,5.7,3.3,21.195833


In [34]:
# Saving the data so I can work with it later
district_air_df.to_csv('district_air.csv', index=False)

#### Municipalities

In [129]:
locations = []

for mun, coords in mun_coords.items():
    loc = {"name": mun, "latitude": coords[0], "longitude": coords[1]}
    locations.append(loc)

In [132]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 86400)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://air-quality-api.open-meteo.com/v1/air-quality"

all_locations = []

for loc in locations:
	params = {
		"latitude": loc["latitude"],
		"longitude": loc["longitude"],
		"hourly": ["dust", "uv_index", "uv_index_clear_sky", "alder_pollen", "birch_pollen", "grass_pollen", "mugwort_pollen", "olive_pollen", "ragweed_pollen", "ozone", "pm2_5", "pm10", "carbon_monoxide", "carbon_dioxide", "nitrogen_dioxide", "sulphur_dioxide", "european_aqi"],
		"timezone": "GMT",
		"start_date": "2023-01-01",
		"end_date": "2024-12-31"
	}
	responses = openmeteo.weather_api(url, params=params)


	# Process first location. Add a for-loop for multiple locations or weather models
	response = responses[0]
	print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
	print(f"Elevation {response.Elevation()} m asl")
	print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
	print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")


	# Process hourly data. The order of variables needs to be the same as requested.
	hourly = response.Hourly()
	hourly_dust = hourly.Variables(0).ValuesAsNumpy()
	hourly_uv_index = hourly.Variables(1).ValuesAsNumpy()
	hourly_uv_index_clear_sky = hourly.Variables(2).ValuesAsNumpy()
	hourly_alder_pollen = hourly.Variables(3).ValuesAsNumpy()
	hourly_birch_pollen = hourly.Variables(4).ValuesAsNumpy()
	hourly_grass_pollen = hourly.Variables(5).ValuesAsNumpy()
	hourly_mugwort_pollen = hourly.Variables(6).ValuesAsNumpy()
	hourly_olive_pollen = hourly.Variables(7).ValuesAsNumpy()
	hourly_ragweed_pollen = hourly.Variables(8).ValuesAsNumpy()
	hourly_ozone = hourly.Variables(9).ValuesAsNumpy()
	hourly_pm2_5 = hourly.Variables(10).ValuesAsNumpy()
	hourly_pm10 = hourly.Variables(11).ValuesAsNumpy()
	hourly_carbon_monoxide = hourly.Variables(12).ValuesAsNumpy()
	hourly_carbon_dioxide = hourly.Variables(13).ValuesAsNumpy()
	hourly_nitrogen_dioxide = hourly.Variables(14).ValuesAsNumpy()
	hourly_sulphur_dioxide = hourly.Variables(15).ValuesAsNumpy()
	hourly_european_aqi = hourly.Variables(16).ValuesAsNumpy()

	hourly_data = {"date": pd.date_range(
		start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
		end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
		freq = pd.Timedelta(seconds = hourly.Interval()),
		inclusive = "left"
		), 
		"location": [loc["name"]] * len(hourly_dust),
		"latitude": [loc["latitude"]] * len(hourly_dust),
		"longitude": [loc["longitude"]] * len(hourly_dust)
	}

	hourly_data["dust"] = hourly_dust
	hourly_data["uv_index"] = hourly_uv_index
	hourly_data["uv_index_clear_sky"] = hourly_uv_index_clear_sky
	hourly_data["alder_pollen"] = hourly_alder_pollen
	hourly_data["birch_pollen"] = hourly_birch_pollen
	hourly_data["grass_pollen"] = hourly_grass_pollen
	hourly_data["mugwort_pollen"] = hourly_mugwort_pollen
	hourly_data["olive_pollen"] = hourly_olive_pollen
	hourly_data["ragweed_pollen"] = hourly_ragweed_pollen
	hourly_data["ozone"] = hourly_ozone
	hourly_data["pm2_5"] = hourly_pm2_5
	hourly_data["pm10"] = hourly_pm10
	hourly_data["carbon_monoxide"] = hourly_carbon_monoxide
	hourly_data["carbon_dioxide"] = hourly_carbon_dioxide
	hourly_data["nitrogen_dioxide"] = hourly_nitrogen_dioxide
	hourly_data["sulphur_dioxide"] = hourly_sulphur_dioxide
	hourly_data["european_aqi"] = hourly_european_aqi

	hourly_dataframe = pd.DataFrame(data=hourly_data)
	all_locations.append(hourly_dataframe)
	
mun_air_df = pd.concat(all_locations, ignore_index=True)
mun_air_df.head()


Coordinates 39.099998474121094°N -9.0°E
Elevation 67.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.79999923706055°N -9.199999809265137°E
Elevation 135.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.0°N -9.09999942779541°E
Elevation 100.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.099998474121094°N -8.899999618530273°E
Elevation 16.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.20000076293945°N -9.09999942779541°E
Elevation 104.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.70000076293945°N -9.399999618530273°E
Elevation 12.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.70000076293945°N -9.09999942779541°E
Elevation 7.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.79999923706055°N -9.199999809265137°E
Elevation 14.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.20000076293

Unnamed: 0,date,location,latitude,longitude,dust,uv_index,uv_index_clear_sky,alder_pollen,birch_pollen,grass_pollen,mugwort_pollen,olive_pollen,ragweed_pollen,ozone,pm2_5,pm10,carbon_monoxide,carbon_dioxide,nitrogen_dioxide,sulphur_dioxide,european_aqi
0,2023-01-01 00:00:00+00:00,Alenquer,39.056633,-9.007606,16.0,0.0,0.0,0.0,,,,0.0,,62.0,13.5,28.200001,163.0,,6.8,2.4,24.799999
1,2023-01-01 01:00:00+00:00,Alenquer,39.056633,-9.007606,15.0,0.0,0.0,0.0,,,,0.0,,62.0,13.3,26.6,145.0,,7.3,2.6,24.799999
2,2023-01-01 02:00:00+00:00,Alenquer,39.056633,-9.007606,14.0,0.0,0.0,0.0,,,,0.0,,58.0,13.1,26.0,136.0,,7.2,2.8,23.841665
3,2023-01-01 03:00:00+00:00,Alenquer,39.056633,-9.007606,14.0,0.0,0.0,0.0,,,,0.0,,59.0,13.1,25.1,133.0,,7.0,2.6,24.033333
4,2023-01-01 04:00:00+00:00,Alenquer,39.056633,-9.007606,14.0,0.0,0.0,0.0,,,,0.0,,56.0,12.7,25.200001,133.0,,6.6,2.6,24.224998


In [133]:
# Saving the data so I can work with it later
mun_air_df.to_csv('mun_air_df.csv', index=False)

### ☀️ Weather

#### Hourly Parishes Weather

In [68]:
locations = []

for parish, coords in parish_coords.items():
    loc = {"name": parish, "latitude": coords[0], "longitude": coords[1]}
    locations.append(loc)

In [69]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"

all_locations_2 = []

for loc in locations:
	params = {
		"latitude": loc["latitude"],
		"longitude": loc["longitude"],
		"start_date": "2023-01-01",
		"end_date": "2024-12-31",
		"daily": ["sunset", "sunrise", "temperature_2m_max", "temperature_2m_min", "daylight_duration", "wind_speed_10m_max", "wind_direction_10m_dominant"],
		"hourly": ["temperature_2m", "rain", "precipitation", "relative_humidity_2m", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm", "cloud_cover", "pressure_msl", "apparent_temperature", "cloud_cover_mid", "cloud_cover_low"],
		"timezone": "GMT"
	}
	responses = openmeteo.weather_api(url, params=params)

	# Process first location. Add a for-loop for multiple locations or weather models
	response = responses[0]
	print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
	print(f"Elevation {response.Elevation()} m asl")
	print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
	print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

	# Process hourly data. The order of variables needs to be the same as requested.
	hourly = response.Hourly()
	hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
	hourly_apparent_temperature = hourly.Variables(11).ValuesAsNumpy()
	hourly_rain = hourly.Variables(1).ValuesAsNumpy()
	hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
	hourly_relative_humidity_2m = hourly.Variables(3).ValuesAsNumpy()
	hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
	hourly_wind_speed_100m = hourly.Variables(5).ValuesAsNumpy()
	hourly_wind_direction_10m = hourly.Variables(6).ValuesAsNumpy()
	hourly_wind_direction_100m = hourly.Variables(7).ValuesAsNumpy()
	hourly_soil_temperature_0_to_7cm = hourly.Variables(8).ValuesAsNumpy()
	hourly_soil_moisture_0_to_7cm = hourly.Variables(9).ValuesAsNumpy()
	hourly_cloud_cover = hourly.Variables(10).ValuesAsNumpy()
	hourly_pressure_msl = hourly.Variables(11).ValuesAsNumpy()
	hourly_apparent_temperature = hourly.Variables(12).ValuesAsNumpy()
	hourly_cloud_cover_mid = hourly.Variables(13).ValuesAsNumpy()
	hourly_cloud_cover_low = hourly.Variables(14).ValuesAsNumpy()

	hourly_data = {"date": pd.date_range(
		start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
		end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
		freq = pd.Timedelta(seconds = hourly.Interval()),
		inclusive = "left"
		), "location": [loc["name"]] * len(hourly_temperature_2m),
		"latitude": [loc["latitude"]] * len(hourly_temperature_2m),
		"longitude": [loc["longitude"]] * len(hourly_temperature_2m)
	}

	hourly_data["temperature_2m"] = hourly_temperature_2m
	hourly_data["apparent_temperature"] = hourly_apparent_temperature
	hourly_data["rain"] = hourly_rain
	hourly_data["precipitation"] = hourly_precipitation
	hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
	hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
	hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
	hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
	hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
	hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
	hourly_data["soil_moisture_0_to_7cm"] = hourly_soil_moisture_0_to_7cm
	hourly_data["cloud_cover"] = hourly_cloud_cover
	hourly_data["pressure_msl"] = hourly_pressure_msl
	hourly_data["apparent_temperature"] = hourly_apparent_temperature
	hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
	hourly_data["cloud_cover_low"] = hourly_cloud_cover_low

	hourly_dataframe = pd.DataFrame(data=hourly_data)
	all_locations_2.append(hourly_dataframe)

lis_weather_df = pd.concat(all_locations_2, ignore_index=True)
lis_weather_df.head()

Coordinates 38.69947052001953°N -9.1961669921875°E
Elevation 62.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.69947052001953°N -9.1961669921875°E
Elevation 32.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.769771575927734°N -9.208740234375°E
Elevation 82.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.769771575927734°N -9.208740234375°E
Elevation 85.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.69947052001953°N -9.1961669921875°E
Elevation 75.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.769771575927734°N -9.208740234375°E
Elevation 88.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.69947052001953°N -8.950958251953125°E
Elevation 15.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.69947052001953°N -9.1961669921875°E
Elevation 85.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38

Unnamed: 0,date,location,latitude,longitude,temperature_2m,apparent_temperature,rain,precipitation,relative_humidity_2m,wind_speed_10m,wind_speed_100m,wind_direction_10m,wind_direction_100m,soil_temperature_0_to_7cm,soil_moisture_0_to_7cm,cloud_cover,pressure_msl,cloud_cover_mid,cloud_cover_low
0,2023-01-01 00:00:00+00:00,Ajuda,38.704676,-9.199604,16.4505,14.71204,0.0,0.0,84.888634,20.598795,36.17955,185.013031,185.710495,16.050501,0.384,55.0,1020.799988,35.0,1.0
1,2023-01-01 01:00:00+00:00,Ajuda,38.704676,-9.199604,16.3505,14.613682,0.0,0.0,86.265656,20.957443,36.17955,184.927017,185.710495,15.7505,0.384,19.0,1020.0,11.0,0.0
2,2023-01-01 02:00:00+00:00,Ajuda,38.704676,-9.199604,16.2505,14.519455,0.0,0.0,85.976433,20.548401,36.044971,183.012726,182.86235,15.5005,0.383,14.0,1019.299988,14.0,0.0
3,2023-01-01 03:00:00+00:00,Ajuda,38.704676,-9.199604,16.4505,14.292339,0.0,0.0,84.065964,23.110207,39.373531,184.467072,184.720047,15.4005,0.383,53.0,1019.0,40.0,0.0
4,2023-01-01 04:00:00+00:00,Ajuda,38.704676,-9.199604,16.7005,14.149893,0.0,0.0,83.009964,25.922499,43.2015,180.7957,180.477448,15.4005,0.383,100.0,1018.599976,100.0,0.0


#### Hourly Municipalities Weather

In [138]:
locations = []

for mun, coords in mun_coords.items():
    loc = {"name": mun, "latitude": coords[0], "longitude": coords[1]}
    locations.append(loc)

In [139]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"

all_locations_2 = []

for loc in locations:
	params = {
		"latitude": loc["latitude"],
		"longitude": loc["longitude"],
		"start_date": "2023-01-01",
		"end_date": "2024-12-31",
		"daily": ["sunset", "sunrise", "temperature_2m_max", "temperature_2m_min", "daylight_duration", "wind_speed_10m_max", "wind_direction_10m_dominant"],
		"hourly": ["temperature_2m", "rain", "precipitation", "relative_humidity_2m", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm", "cloud_cover", "pressure_msl", "apparent_temperature", "cloud_cover_mid", "cloud_cover_low"],
		"timezone": "GMT"
	}
	responses = openmeteo.weather_api(url, params=params)

	# Process first location. Add a for-loop for multiple locations or weather models
	response = responses[0]
	print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
	print(f"Elevation {response.Elevation()} m asl")
	print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
	print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

	# Process hourly data. The order of variables needs to be the same as requested.
	hourly = response.Hourly()
	hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
	hourly_apparent_temperature = hourly.Variables(11).ValuesAsNumpy()
	hourly_rain = hourly.Variables(1).ValuesAsNumpy()
	hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
	hourly_relative_humidity_2m = hourly.Variables(3).ValuesAsNumpy()
	hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
	hourly_wind_speed_100m = hourly.Variables(5).ValuesAsNumpy()
	hourly_wind_direction_10m = hourly.Variables(6).ValuesAsNumpy()
	hourly_wind_direction_100m = hourly.Variables(7).ValuesAsNumpy()
	hourly_soil_temperature_0_to_7cm = hourly.Variables(8).ValuesAsNumpy()
	hourly_soil_moisture_0_to_7cm = hourly.Variables(9).ValuesAsNumpy()
	hourly_cloud_cover = hourly.Variables(10).ValuesAsNumpy()
	hourly_pressure_msl = hourly.Variables(11).ValuesAsNumpy()
	hourly_apparent_temperature = hourly.Variables(12).ValuesAsNumpy()
	hourly_cloud_cover_mid = hourly.Variables(13).ValuesAsNumpy()
	hourly_cloud_cover_low = hourly.Variables(14).ValuesAsNumpy()

	hourly_data = {"date": pd.date_range(
		start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
		end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
		freq = pd.Timedelta(seconds = hourly.Interval()),
		inclusive = "left"
		), "location": [loc["name"]] * len(hourly_temperature_2m),
		"latitude": [loc["latitude"]] * len(hourly_temperature_2m),
		"longitude": [loc["longitude"]] * len(hourly_temperature_2m)
	}

	hourly_data["temperature_2m"] = hourly_temperature_2m
	hourly_data["apparent_temperature"] = hourly_apparent_temperature
	hourly_data["rain"] = hourly_rain
	hourly_data["precipitation"] = hourly_precipitation
	hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
	hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
	hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
	hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
	hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
	hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
	hourly_data["soil_moisture_0_to_7cm"] = hourly_soil_moisture_0_to_7cm
	hourly_data["cloud_cover"] = hourly_cloud_cover
	hourly_data["pressure_msl"] = hourly_pressure_msl
	hourly_data["apparent_temperature"] = hourly_apparent_temperature
	hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
	hourly_data["cloud_cover_low"] = hourly_cloud_cover_low

	hourly_dataframe = pd.DataFrame(data=hourly_data)
	all_locations_2.append(hourly_dataframe)

mun_weather_df = pd.concat(all_locations_2, ignore_index=True)
mun_weather_df.head()

Coordinates 39.05096435546875°N -9.0123291015625°E
Elevation 67.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.769771575927734°N -9.208740234375°E
Elevation 135.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.98066711425781°N -9.123291015625°E
Elevation 100.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.05096435546875°N -8.888885498046875°E
Elevation 16.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.26185989379883°N -9.049591064453125°E
Elevation 104.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.69947052001953°N -9.31878662109375°E
Elevation 12.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.69947052001953°N -9.1961669921875°E
Elevation 7.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.84006881713867°N -9.098358154296875°E
Elevation 14.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordina

Unnamed: 0,date,location,latitude,longitude,temperature_2m,apparent_temperature,rain,precipitation,relative_humidity_2m,wind_speed_10m,wind_speed_100m,wind_direction_10m,wind_direction_100m,soil_temperature_0_to_7cm,soil_moisture_0_to_7cm,cloud_cover,pressure_msl,cloud_cover_mid,cloud_cover_low
0,2023-01-01 00:00:00+00:00,Alenquer,39.056633,-9.007606,15.649,14.174212,0.0,0.0,86.759445,17.698677,32.431984,175.333221,182.544754,15.349,0.363,21.0,1020.700012,22.0,2.0
1,2023-01-01 01:00:00+00:00,Alenquer,39.056633,-9.007606,15.549,13.952209,0.0,0.0,87.886978,18.775301,33.847656,175.601379,181.218842,14.999,0.363,4.0,1019.900024,3.0,0.0
2,2023-01-01 02:00:00+00:00,Alenquer,39.056633,-9.007606,15.349,13.828251,0.0,0.0,88.443573,18.014393,33.857227,177.709442,181.827927,14.749,0.363,3.0,1019.299988,3.0,0.0
3,2023-01-01 03:00:00+00:00,Alenquer,39.056633,-9.007606,15.399,13.773319,0.0,0.0,88.160347,18.733839,34.57687,177.797455,181.789871,14.549,0.363,48.0,1019.0,14.0,0.0
4,2023-01-01 04:00:00+00:00,Alenquer,39.056633,-9.007606,15.698999,13.741028,0.0,0.0,86.201118,20.892412,37.827415,178.025116,182.181595,14.499,0.363,97.0,1018.5,86.0,0.0


In [140]:
# Saving the data so I can work with it later
mun_weather_df.to_csv('hourly_mun_weather.csv', index=False)

#### Hourly Districts Weather

In [76]:
locations = []

for district, coords in district_coords.items():
    loc = {"name": district, "latitude": coords[0], "longitude": coords[1]}
    locations.append(loc)

In [77]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"

all_locations_2 = []

for loc in locations:
	params = {
		"latitude": loc["latitude"],
		"longitude": loc["longitude"],
		"start_date": "2023-01-01",
		"end_date": "2024-12-31",
		"daily": ["sunset", "sunrise", "temperature_2m_max", "temperature_2m_min", "daylight_duration", "wind_speed_10m_max", "wind_direction_10m_dominant"],
		"hourly": ["temperature_2m", "rain", "precipitation", "relative_humidity_2m", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm", "cloud_cover", "pressure_msl", "apparent_temperature", "cloud_cover_mid", "cloud_cover_low"],
		"timezone": "GMT"
	}
	responses = openmeteo.weather_api(url, params=params)

	# Process first location. Add a for-loop for multiple locations or weather models
	response = responses[0]
	print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
	print(f"Elevation {response.Elevation()} m asl")
	print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
	print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

	# Process hourly data. The order of variables needs to be the same as requested.
	hourly = response.Hourly()
	hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
	hourly_apparent_temperature = hourly.Variables(11).ValuesAsNumpy()
	hourly_rain = hourly.Variables(1).ValuesAsNumpy()
	hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
	hourly_relative_humidity_2m = hourly.Variables(3).ValuesAsNumpy()
	hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
	hourly_wind_speed_100m = hourly.Variables(5).ValuesAsNumpy()
	hourly_wind_direction_10m = hourly.Variables(6).ValuesAsNumpy()
	hourly_wind_direction_100m = hourly.Variables(7).ValuesAsNumpy()
	hourly_soil_temperature_0_to_7cm = hourly.Variables(8).ValuesAsNumpy()
	hourly_soil_moisture_0_to_7cm = hourly.Variables(9).ValuesAsNumpy()
	hourly_cloud_cover = hourly.Variables(10).ValuesAsNumpy()
	hourly_pressure_msl = hourly.Variables(11).ValuesAsNumpy()
	hourly_apparent_temperature = hourly.Variables(12).ValuesAsNumpy()
	hourly_cloud_cover_mid = hourly.Variables(13).ValuesAsNumpy()
	hourly_cloud_cover_low = hourly.Variables(14).ValuesAsNumpy()

	hourly_data = {"date": pd.date_range(
		start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
		end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
		freq = pd.Timedelta(seconds = hourly.Interval()),
		inclusive = "left"
		), "location": [loc["name"]] * len(hourly_temperature_2m),
		"latitude": [loc["latitude"]] * len(hourly_temperature_2m),
		"longitude": [loc["longitude"]] * len(hourly_temperature_2m)
	}

	hourly_data["temperature_2m"] = hourly_temperature_2m
	hourly_data["apparent_temperature"] = hourly_apparent_temperature
	hourly_data["rain"] = hourly_rain
	hourly_data["precipitation"] = hourly_precipitation
	hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
	hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
	hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
	hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
	hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
	hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
	hourly_data["soil_moisture_0_to_7cm"] = hourly_soil_moisture_0_to_7cm
	hourly_data["cloud_cover"] = hourly_cloud_cover
	hourly_data["pressure_msl"] = hourly_pressure_msl
	hourly_data["apparent_temperature"] = hourly_apparent_temperature
	hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
	hourly_data["cloud_cover_low"] = hourly_cloud_cover_low

	hourly_dataframe = pd.DataFrame(data=hourly_data)
	all_locations_2.append(hourly_dataframe)

district_weather_df = pd.concat(all_locations_2, ignore_index=True)
district_weather_df.head()

Coordinates 37.996482849121094°N -7.8629150390625°E
Elevation 286.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.55887222290039°N -7.9483642578125°E
Elevation 297.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.26185989379883°N -8.677703857421875°E
Elevation 104.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.964847564697266°N -7.41619873046875°E
Elevation 386.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 41.51142120361328°N -6.8731689453125°E
Elevation 710.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 39.19156265258789°N -7.675384521484375°E
Elevation 205.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 40.738136291503906°N -7.14892578125°E
Elevation 554.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coordinates 38.55887222290039°N -8.9266357421875°E
Elevation 7.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
Coor

Unnamed: 0,date,location,latitude,longitude,temperature_2m,apparent_temperature,rain,precipitation,relative_humidity_2m,wind_speed_10m,wind_speed_100m,wind_direction_10m,wind_direction_100m,soil_temperature_0_to_7cm,soil_moisture_0_to_7cm,cloud_cover,pressure_msl,cloud_cover_mid,cloud_cover_low
0,2023-01-01 00:00:00+00:00,Beja,38.015448,-7.865037,12.063,10.118144,0.0,0.0,95.169319,16.119801,30.962091,156.297348,156.713913,13.613,0.327,40.0,1024.0,95.0,0.0
1,2023-01-01 01:00:00+00:00,Beja,38.015448,-7.865037,12.513,10.513935,0.0,0.0,94.559746,17.238699,31.559086,151.294144,154.311905,13.363,0.327,78.0,1023.299988,69.0,0.0
2,2023-01-01 02:00:00+00:00,Beja,38.015448,-7.865037,12.263,10.227627,0.0,0.0,96.442444,17.595861,32.696644,149.23735,151.742065,13.113,0.327,63.0,1022.700012,21.0,0.0
3,2023-01-01 03:00:00+00:00,Beja,38.015448,-7.865037,12.013,9.782242,0.0,0.0,96.435585,18.398781,33.818928,149.420853,152.071121,12.863,0.327,60.0,1022.400024,44.0,0.0
4,2023-01-01 04:00:00+00:00,Beja,38.015448,-7.865037,12.013,9.583671,0.0,0.0,95.167465,19.33639,35.261623,151.049103,152.650208,12.713,0.327,99.0,1022.0,99.0,2.0


In [78]:
# Saving the data so I can work with it later
district_weather_df.to_csv('hourly_district_weather.csv', index=False)

## 🔀 Merge and Save the Data

In [116]:
# Merging the Air Quality DF with the Weather DF - Lisbon

df_parishes = pd.merge(lis_air_df, lis_weather_df, on=['date','location'], how='outer')

In [117]:
# Merging the Air Quality DF with the Weather DF - Portugal's Districts

df_districts = pd.merge(district_air_df, district_weather_df, on=['date','location'], how='inner')

In [141]:
# Merging the Air Quality DF with the Weather DF - Lisbons's Municipalities

df_mun = pd.merge(mun_air_df, mun_weather_df, on=['date','location'], how='inner')

In [None]:
df_parishes.to_csv('df_parishes.csv', index=False)
df_districts.to_csv('df_districts.csv', index=False)
df_mun.to_csv('df_mun.csv', index=False)