# Weather Conditions Extraction -- Portugal (Meteostat)

This notebook collects **daily weather conditions** for each grid tile centroid in Portugal using the [Meteostat](https://dev.meteostat.net/) API.

It retrieves temperature, precipitation, and related variables from the **nearest weather stations** for the same date range used in the SMAP soil moisture dataset (`2020–2025`).  
The results are saved locally for integration with the per-tile environmental dataset.


## 1. Imports and paths

In [1]:
import os
from datetime import datetime
import pandas as pd
from meteostat import Point, Daily, Stations

## 2. Load per-tile SMAP export

In [2]:
data_path = os.path.join('data', 'csv', 'moisture_portugal.csv')
sm_df = pd.read_csv(data_path)
sm_df

Unnamed: 0,system:index,centroid_alt,centroid_lat,centroid_lon,date,soil_moisture_am,soil_moisture_pm,tile_id,.geo
0,1_20200101_0,0,39.343777,-31.223818,2020-01-01,0.000000,0.000000,0,"{""type"":""Polygon"",""coordinates"":[[[-31.2688178..."
1,1_20200101_1,584,39.433777,-31.223818,2020-01-01,0.000000,0.000000,1,"{""type"":""Polygon"",""coordinates"":[[[-31.2688178..."
2,1_20200101_2,0,39.523777,-31.223818,2020-01-01,0.000000,0.000000,2,"{""type"":""Polygon"",""coordinates"":[[[-31.2688178..."
3,1_20200101_3,0,39.343777,-31.133818,2020-01-01,0.000000,0.000000,3,"{""type"":""Polygon"",""coordinates"":[[[-31.1788177..."
4,1_20200101_4,0,39.433777,-31.133818,2020-01-01,0.000000,0.000000,4,"{""type"":""Polygon"",""coordinates"":[[[-31.1788178..."
...,...,...,...,...,...,...,...,...,...
2611890,2_20250330_1400,714,41.503777,-6.293818,2025-03-30,0.289945,0.277564,1400,"{""type"":""Polygon"",""coordinates"":[[[-6.33881780..."
2611891,2_20250330_1401,761,41.593777,-6.293818,2025-03-30,0.311957,0.294475,1401,"{""type"":""Polygon"",""coordinates"":[[[-6.33881780..."
2611892,2_20250330_1402,854,41.683777,-6.293818,2025-03-30,0.308779,0.305330,1402,"{""type"":""Polygon"",""coordinates"":[[[-6.33881780..."
2611893,2_20250330_1403,726,41.503777,-6.203818,2025-03-30,0.309153,0.263115,1403,"{""type"":""Polygon"",""coordinates"":[[[-6.24881780..."


In [3]:
roi_df = sm_df[['tile_id', 'centroid_lat', 'centroid_lon', 'centroid_alt', 'date']].drop_duplicates(subset = 'tile_id')

In [4]:
sm_df['date'] = pd.to_datetime(sm_df['date'])
# sm_df.info()

In [5]:
roi_df

Unnamed: 0,tile_id,centroid_lat,centroid_lon,centroid_alt,date
0,0,39.343777,-31.223818,0,2020-01-01
1,1,39.433777,-31.223818,584,2020-01-01
2,2,39.523777,-31.223818,0,2020-01-01
3,3,39.343777,-31.133818,0,2020-01-01
4,4,39.433777,-31.133818,0,2020-01-01
...,...,...,...,...,...
1400,1400,41.503777,-6.293818,714,2020-01-01
1401,1401,41.593777,-6.293818,761,2020-01-01
1402,1402,41.683777,-6.293818,854,2020-01-01
1403,1403,41.503777,-6.203818,726,2020-01-01


## 4. Determine overall date range for weather retrieval

In [6]:
date_start, date_end = sm_df.date.min(), sm_df.date.max()
date_end = datetime(date_end.year, date_end.month, date_end.day)
date_start = datetime(date_start.year, date_start.month, date_start.day)
date_start, date_end

(datetime.datetime(2020, 1, 1, 0, 0), datetime.datetime(2025, 3, 30, 0, 0))

# 5. Fetch daily weather from nearest Meteostat station(s)

In [10]:
# Fetch daily weather from nearest Meteostat station(s)
def get_weather_data(lat, lon, start, end):
    stations = Stations().nearby(lat, lon).fetch(5) # fetch top 5 nearby stations
    for station_id in stations.index:
        df = Daily(station_id, start, end).fetch()
        if not df.empty:
            return df
    return None  # If all stations fail

import warnings

# Suppresses `FutureWarning`s for cleaner output. 
# Loops tiles, tries up to 5 nearby stations until data is found, and concatenates results.
with warnings.catch_warnings():
    warnings.simplefilter("ignore", FutureWarning)

    weather_df = pd.DataFrame()
    number_of_tiles = len(roi_df)
    for tile_id, row in roi_df.iterrows():
        print(f'Tile {tile_id+1}/{number_of_tiles}', end = '\r')

        lat, lon =  row["centroid_lat"], row["centroid_lon"]
        data_fetch = get_weather_data(lat, lon, date_start, date_end)

        if not data_fetch.empty:
            data_fetch.loc[:, 'tile_id'] = tile_id

        weather_df = pd.concat([weather_df, data_fetch], axis = 0)

# weather_df

Tile 1405/1405

In [8]:
weather_df.tile_id.nunique()/roi_df.tile_id.nunique()

1.0

In [11]:
# Save consolidated weather to CSVoutput_folder = 'data'
output_folder = 'data/csv'

os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, 'weather_conditions.csv')
weather_df.to_csv(output_path)