In [1]:
import gdown
import os
import zipfile

import rasterio
import numpy as np
from tqdm import tqdm
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from datetime import datetime
import pandas as pd

In [2]:
def download_weather_data(data_url, file_name, output_dir="../../drive_downloaded_files"):
    """
    Downloads a weather data file (e.g., temperature, precipitation, humidity) from Google Drive.
    
    Parameters:
        data_url (str): The URL to download the weather data file from.
        file_name (str): The file name including the variable and year (e.g., "2m_temperature_2023.netcdf").
        output_dir (str): The directory where the downloaded file will be saved. Defaults to "../../drive_downloaded_files".
    """
    # Create the output file path using the provided file_name
    output_path = os.path.join(output_dir, file_name)

    # Create the output folder if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Download the file if it doesn't exist
    if not os.path.exists(output_path):
        print(f"Downloading the file from {data_url}...")
        gdown.download(data_url, output_path, quiet=False)
    else:
        print(f"The file {output_path} already exists.")


def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculates shortest distance between points on Earth's surface.
    Accounts for Earth's curvature, more accurate than Euclidean distance.
    """
    R = 6371  # Earth's radius in km
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    
    return R * c

def find_nearest_point(lat, lon, ds):
    """
    Finds the nearest grid point in an ERA5-Land dataset given input coordinates.
    """
    # Normalize longitude to [0, 360) range used by ERA5-Land
    # Example: -10° → 350°, 370° → 10°
    lon = lon % 360
    
    lats = ds.latitude.values
    lons = ds.longitude.values
    
    lat_idx = np.abs(lats - lat).argmin()
    lon_idx = np.abs(lons - lon).argmin()
    
    lat_nearest = float(lats[lat_idx])
    lon_nearest = float(lons[lon_idx])
    
    distance = haversine_distance(lat, lon, lat_nearest, lon_nearest)
    
    return lat_idx, lon_idx, lat_nearest, lon_nearest, distance

def haversine_distance(lat1, lon1, lat2, lon2):
    """
    Calculates shortest distance between points on Earth's surface.
    Accounts for Earth's curvature, more accurate than Euclidean distance.
    """
    R = 6371  # Earth's radius in km
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    
    return R * c

def find_nearest_point(lat, lon, ds):
    """
    Finds the nearest grid point in an ERA5-Land dataset given input coordinates.
    """
    # Normalize longitude to [0, 360) range used by ERA5-Land
    # Example: -10° → 350°, 370° → 10°
    lon = lon % 360
    
    lats = ds.latitude.values
    lons = ds.longitude.values
    
    lat_idx = np.abs(lats - lat).argmin()
    lon_idx = np.abs(lons - lon).argmin()
    
    lat_nearest = float(lats[lat_idx])
    lon_nearest = float(lons[lon_idx])
    
    distance = haversine_distance(lat, lon, lat_nearest, lon_nearest)
    
    return lat_idx, lon_idx, lat_nearest, lon_nearest, distance

def get_variable_series(ds, lat, lon, var_name=None):
    """
    Extracts time series for the nearest point to given coordinates.
    Args:
        ds: xarray dataset
        lat: latitude
        lon: longitude
        var_name: variable name (e.g., 't2m', 'd2m', 'tp'). If None, uses first data variable
    """
    lat_idx, lon_idx, _, _, _ = find_nearest_point(lat, lon, ds)
    
    if var_name is None:
        # Get the first data variable name if none specified
        var_name = list(ds.data_vars)[0]
    
    return ds[var_name].isel(latitude=lat_idx, longitude=lon_idx)

def get_variable_average(ds, lat, lon, var_name=None):
    """
    Calculates average value for the nearest point.
    """
    series = get_variable_series(ds, lat, lon, var_name)
    return float(series.mean())

## Countries 

In [None]:
countries_zip_url = "https://drive.google.com/uc?id=1UQzdO7suT0BnwKBeNybMG97vM9GIDogA"
countries_zip_file_path = "../../allCountries.zip"

# Download the ZIP file if it doesn't exist; otherwise, proceed to read the TXT file.
if not os.path.exists(countries_zip_file_path):
    gdown.download(countries_zip_url, countries_zip_file_path, quiet=False)

with zipfile.ZipFile(countries_zip_file_path) as z:
    countries_txt_filename = "allCountries.txt"

    with z.open(countries_txt_filename) as txt_file:
        countries_df = pd.read_csv(txt_file, sep="\t", header=None)


# https://download.geonames.org/export/dump/
countries_df.columns = [
    'geonameid',         
    'name',             
    'asciiname',        
    'alternatenames',  
    'latitude',         
    'longitude',       
    'feature class',    
    'feature code',      
    'iso alpha 2',      
    'cc2',              
    'admin1 code',     
    'admin2 code',       
    'admin3 code',      
    'admin4 code',   
    'population',      
    'elevation',       
    'dem',             
    'timezone',          
    'modification date'  
]

print(f"\nshape: {countries_df.shape}")
countries_df.head()

## EUI

In [None]:
eui_url = "https://drive.google.com/uc?id=12qGq_DLefI1RihIF_RKQUyJtm480-xRC"
eui_df = pd.read_csv(eui_url)

print(f"shape: {eui_df.shape}")
eui_df.head()

In [None]:
merged_df = pd.merge(
    countries_df, eui_df, left_on="geonameid", right_on="Geonames ID", how="inner"
)
assert merged_df.shape[0] == eui_df.shape[0]
print(f"shape: {merged_df.shape}")
merged_df[["geonameid","name","latitude","longitude"]]

# 2m temperature (K)

This parameter is the temperature of air at 2m above the surface of land, sea or inland waters. 2m temperature is calculated by interpolating between the lowest model level and the Earth's surface, taking account of the atmospheric conditions. This parameter has units of kelvin (K). Temperature measured in kelvin can be converted to degrees Celsius (°C) by subtracting 273.15.

In [None]:
drive_url_2m_temperature_2023 = "https://drive.google.com/uc?export=download&id=15ZCGXk4YdgF46RxIDz8z1zgqPqBCx0C8"
file_name_2m_temperature_2023 = "2m_temperature_2023.netcdf"
download_weather_data(drive_url_2m_temperature_2023, file_name_2m_temperature_2023)

In [None]:
ds_2m_temperature_2023 = xr.open_dataset("../../drive_downloaded_files/" + file_name_2m_temperature_2023)
ds_2m_temperature_2023

In [None]:
# Santiago coordinates
lat = -33.3927
lon = -70.7858

# Find nearest point
lat_idx, lon_idx, lat_near, lon_near, distance = find_nearest_point(lat, lon, ds_2m_temperature_2023)

print(f"Nearest point:")
print(f"Latitude: {lat_near:.4f}°")
print(f"Longitude: {lon_near:.4f}°")
print(f"Distance: {distance:.2f} km")

# Get temperature series for this point
temp_avg = get_variable_average(ds_2m_temperature_2023, lat, lon, 't2m')
print(f"Average temperature: {temp_avg} K")

# 2m dewpoint temperature (K)
This parameter is the temperature to which the air, at 2 metres above the surface of the Earth, would have to be cooled for saturation to occur. It is a measure of the humidity of the air. Combined with temperature, it can be used to calculate the relative humidity. 2m dew point temperature is calculated by interpolating between the lowest model level and the Earth's surface, taking account of the atmospheric conditions. This parameter has units of kelvin (K). Temperature measured in kelvin can be converted to degrees Celsius (°C) by subtracting 273.15.

In [None]:
drive_url_2m_dewpoint_temperature_2023 = "https://drive.google.com/uc?export=download&id=1OK8q9zxVaVvVAHORRGWaHxTszYEgrhyR"
file_name_2m_dewpoint_temperature_2023 = "2m_dewpoint_temperature_2023.netcdf"
download_weather_data(drive_url_2m_dewpoint_temperature_2023, file_name_2m_dewpoint_temperature_2023)

In [None]:
ds_2m_dewpoint_temperature_2023 = xr.open_dataset("../../drive_downloaded_files/" + file_name_2m_dewpoint_temperature_2023)
ds_2m_dewpoint_temperature_2023

In [None]:
dewpoint_avg = get_variable_average(ds_2m_dewpoint_temperature_2023, lat, lon, 'd2m')
print(f"Average dewpoint: {dewpoint_avg} K")

In [None]:
# Visually check the data for a specific datetime (dt).
specific_time = '2023-01-01T12:00'
dewpoint_one_time = ds_2m_dewpoint_temperature_2023['d2m'].sel(valid_time=specific_time)

# Create the map
plt.figure(figsize=(15, 10))
ax = plt.axes(projection=ccrs.Robinson())
ax.coastlines()
ax.gridlines()

# Plot the data
# The data is in Kelvin, convert to Celsius by subtracting 273.15
im = (dewpoint_one_time - 273.15).plot(
    transform=ccrs.PlateCarree(),
    cmap='RdYlBu_r',  # Red-Yellow-Blue colormap, reversed
    vmin=-30,         # minimum temperature in Celsius
    vmax=30,          # maximum temperature in Celsius
    cbar_kwargs={'label': 'Dewpoint Temperature (°C)'}
)

plt.title(f'Global 2m Dewpoint Temperature\n{specific_time}')
plt.show()

# Total precipitation (m)
This parameter is the accumulated liquid and frozen water, comprising rain and snow, that falls to the Earth's surface. It is the sum of large-scale precipitation and convective precipitation. Large-scale precipitation is generated by the cloud scheme in the ECMWF Integrated Forecasting System (IFS). The cloud scheme represents the formation and dissipation of clouds and large-scale precipitation due to changes in atmospheric quantities (such as pressure, temperature and moisture) predicted directly by the IFS at spatial scales of the grid box or larger. Convective precipitation is generated by the convection scheme in the IFS, which represents convection at spatial scales smaller than the grid box. This parameter does not include fog, dew or the precipitation that evaporates in the atmosphere before it lands at the surface of the Earth. This parameter is accumulated over a particular time period which depends on the data extracted. For the reanalysis, the accumulation period is over the 1 hour ending at the validity date and time. For the ensemble members, ensemble mean and ensemble spread, the accumulation period is over the 3 hours ending at the validity date and time. The units of this parameter are depth in metres of water equivalent. It is the depth the water would have if it were spread evenly over the grid box. Care should be taken when comparing model parameters with observations, because observations are often local to a particular point in space and time, rather than representing averages over a model grid box.

In [None]:
drive_url_total_precipitation_2023 = "https://drive.google.com/uc?export=download&id=1HtqZBwzgGxRtP47MNVGzMlhI68mX3vaR"
file_name_total_precipitation_2023 = "total_precipitation_2023.netcdf"
download_weather_data(drive_url_total_precipitation_2023, file_name_total_precipitation_2023)

In [None]:
ds_total_precipitation_2023 = xr.open_dataset("../../drive_downloaded_files/" + file_name_total_precipitation_2023)
ds_total_precipitation_2023

In [None]:
precip_avg = get_variable_average(ds_total_precipitation_2023, lat, lon, 'tp')
print(f"Average precipitation: {precip_avg} m")

In [None]:
# Visually check the data for a specific datetime (dt).
specific_time = '2023-01-01T12:00'
precipitation_one_time = ds_total_precipitation_2023['tp'].sel(valid_time=specific_time)

# Create the map
plt.figure(figsize=(15, 10))
ax = plt.axes(projection=ccrs.Robinson())
ax.coastlines()
ax.gridlines()

# Plot the data
# Convert from m to mm (multiply by 1000)
im = precipitation_one_time.plot(
    transform=ccrs.PlateCarree(),
    cmap='Blues',
    vmin=0,
    vmax=np.percentile(precipitation_one_time, 99),  # use 99th percentile for better visualization
    cbar_kwargs={'label': 'Precipitation (mm)'}
)

plt.title(f'Global Total Precipitation\n{specific_time}')
plt.show()