In [2]:
import os
import zipfile
import xarray as xr
import numpy as np
import torch
from torch.utils.data import DataLoader, random_split

import sys; sys.path.append('/home/jovyan/work/deep4downscaling')
import deep4downscaling.viz
import deep4downscaling.trans
import deep4downscaling.deep.loss
import deep4downscaling.deep.utils
import deep4downscaling.deep.models
import deep4downscaling.deep.train
import deep4downscaling.deep.pred
import deep4downscaling.metrics
import deep4downscaling.metrics_ccs

In [3]:
# Select the domain to downscale
domain = 'ALPS' # (ALPS, NZ)

In [6]:
# Set Paths
DATA_PATH = f'./data/CORDEXBench/{domain}/{domain}_domain'
FIGURES_PATH = f'./figures/CORDEXBench/{domain}'
MODELS_PATH = f'./models/CORDEXBench/{domain}'

os.makedirs(DATA_PATH, exist_ok=True)
os.makedirs(FIGURES_PATH, exist_ok=True)
os.makedirs(MODELS_PATH, exist_ok=True)

### CORDEXBench Data Download

In [7]:
# Download data
if domain == 'ALPS':
    !wget -P ./data/CORDEXBench/ALPS https://zenodo.org/records/15797226/files/ALPS_domain.zip?download=1
    !mv ./data/CORDEXBench/ALPS/ALPS_domain.zip* ./data/CORDEXBench/ALPS/ALPS_domain.zip 
    
    with zipfile.ZipFile('./data/CORDEXBench/ALPS/ALPS_domain.zip', 'r') as zip_ref:
        zip_ref.extractall('./data/CORDEXBench/ALPS')
    
    !rm ./data/CORDEXBench/ALPS/ALPS_domain.zip 
    
elif domain == 'NZ':
    raise ValueError('TODO')

--2025-07-04 09:47:18--  https://zenodo.org/records/15797226/files/ALPS_domain.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.43.25, 188.185.45.92, 188.185.48.194, ...
Connecting to zenodo.org (zenodo.org)|188.185.43.25|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3620287187 (3.4G) [application/octet-stream]
Saving to: ‘./data/CORDEXBench/ALPS/ALPS_domain.zip?download=1’


2025-07-04 09:49:41 (24.2 MB/s) - ‘./data/CORDEXBench/ALPS/ALPS_domain.zip?download=1’ saved [3620287187/3620287187]



### Training Experiment

In [9]:
# Set the experiment for training the model
training_experiment = 'ESD_pseudo_reality' # (ESD-pseudo-reality, Emulator_hist_fut)
gcm_name = 'CNRM-CM5'

# Set the period
if training_experiment == 'ESD_pseudo_reality':
    period_training = '1961-1980'
elif training_experiment == 'Emulator_hist_fut':
    period_training = '1961-1980_2080-2099'
else:
    raise ValueError('Provide a valid date')

# Set the target variable
target_var = 'tasmax' # (tasmax, pr)

In [10]:
# Load predictors
predictor_filename = f'{DATA_PATH}/train/{training_experiment}/predictors/{gcm_name}_{period_training}.nc'
predictor = xr.open_dataset(predictor_filename)
print(predictor)

<xarray.Dataset> Size: 112MB
Dimensions:  (time: 7305, lat: 16, lon: 16)
Coordinates:
  * time     (time) datetime64[ns] 58kB 1961-01-01T12:00:00 ... 1980-12-31T12...
  * lon      (lon) float64 128B -4.0 -2.0 0.0 2.0 4.0 ... 20.0 22.0 24.0 26.0
  * lat      (lat) float64 128B 32.0 34.0 36.0 38.0 40.0 ... 56.0 58.0 60.0 62.0
Data variables: (12/15)
    u_850    (time, lat, lon) float32 7MB ...
    u_700    (time, lat, lon) float32 7MB ...
    u_500    (time, lat, lon) float32 7MB ...
    v_850    (time, lat, lon) float32 7MB ...
    v_700    (time, lat, lon) float32 7MB ...
    v_500    (time, lat, lon) float32 7MB ...
    ...       ...
    t_850    (time, lat, lon) float32 7MB ...
    t_700    (time, lat, lon) float32 7MB ...
    t_500    (time, lat, lon) float32 7MB ...
    z_850    (time, lat, lon) float32 7MB ...
    z_700    (time, lat, lon) float32 7MB ...
    z_500    (time, lat, lon) float32 7MB ...
Attributes: (12/28)
    CDI:                            Climate Data Interface v

In [11]:
# Visualize the predictors
deep4downscaling.viz.multiple_map_plot(data=predictor.mean('time'),
                                       output_path=f'./{FIGURES_PATH}/predictor_climatology_{training_experiment}.pdf')

In [12]:
# Load predictand
predictand_filename = f'{DATA_PATH}/train/{training_experiment}/target/pr_tasmax_{gcm_name}_{period_training}.nc'
predictand = xr.open_dataset(predictand_filename)
predictand = predictand[[target_var]] # Univariate
print(predictand)

<xarray.Dataset> Size: 479MB
Dimensions:  (time: 7305, y: 128, x: 128)
Coordinates:
  * time     (time) datetime64[ns] 58kB 1961-01-01T12:00:00 ... 1980-12-31T12...
    lon      (y, x) float64 131kB ...
    lat      (y, x) float64 131kB ...
  * x        (x) float64 1kB 2.062e+03 2.075e+03 ... 3.638e+03 3.65e+03
  * y        (y) float64 1kB 1.412e+03 1.425e+03 1.438e+03 ... 2.988e+03 3e+03
Data variables:
    tasmax   (time, y, x) float32 479MB ...
Attributes: (12/26)
    description:                    Created by xios
    title:                          Created by xios
    Conventions:                    CF-1.6
    creation_date:                  2018-10-24T11:26:47Z
    name:                           EUR-11_CNRM-CERFACS-CNRM-CM5_historical_r...
    institute_id:                   CNRM
    ...                             ...
    references:                     http://www.umr-cnrm.fr/spip.php?article12...
    comment:                        CORDEX Europe EUR-11 CNRM-ALADIN 6.3 L91 ...


In [13]:
# Plot the predictand
deep4downscaling.viz.simple_map_plot(data=predictand.isel(time=100),
                                     colorbar='hot_r', var_to_plot=target_var,
                                     output_path=f'./{FIGURES_PATH}/predictand_day_{training_experiment}.pdf')

In [14]:
# Remove days with nans in the predictor
predictor = deep4downscaling.trans.remove_days_with_nans(predictor)

# Align both datasets in time
predictor, predictand = deep4downscaling.trans.align_datasets(predictor, predictand, 'time')

There are no observations containing null values


In [15]:
# Selec training and test periods
if training_experiment == 'ESD_pseudo_reality':
    years_train = list(range(1961, 1975))
    years_test = list(range(1975, 1980+1))
elif training_experiment == 'Emulator_hist_fut':
    years_train = list(range(1961, 1980+1)) + list(range(2080, 2090))
    years_test = list(range(2090, 2099+1))

In [16]:
# Subet into training and test sets
x_train = predictor.sel(time=np.isin(predictor['time'].dt.year, years_train))
y_train = predictand.sel(time=np.isin(predictand['time'].dt.year, years_train))

x_test = predictor.sel(time=np.isin(predictor['time'].dt.year, years_test))
y_test = predictand.sel(time=np.isin(predictand['time'].dt.year, years_test))

In [17]:
# Standardize the predictor
x_train_stand = deep4downscaling.trans.standardize(data_ref=x_train, data=x_train)

In [18]:
# Flat the target data
y_train_stack = y_train.stack(gridpoint=('x', 'y'))

In [19]:
# Select the loss function
loss_function = deep4downscaling.deep.loss.MseLoss(ignore_nans=False)

In [20]:
# Transform the data to numpy arrays
x_train_stand_arr = deep4downscaling.trans.xarray_to_numpy(x_train_stand)
y_train_arr = deep4downscaling.trans.xarray_to_numpy(y_train_stack)

In [21]:
# Create Dataset
train_dataset = deep4downscaling.deep.utils.StandardDataset(x=x_train_stand_arr,
                                                            y=y_train_arr)

# Split into training and validation sets
train_dataset, valid_dataset = random_split(train_dataset,
                                            [0.9, 0.1])

# Create DataLoaders
batch_size = 64

train_dataloader = DataLoader(train_dataset, batch_size=batch_size,
                              shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size,
                              shuffle=True)

In [22]:
# Set the model
model_name = f'DeepESD_{training_experiment}_{target_var}'
model = deep4downscaling.deep.models.DeepESDpr(x_shape=x_train_stand_arr.shape,
                                               y_shape=y_train_arr.shape,
                                               filters_last_conv=1,
                                               stochastic=False)

In [23]:
# Set some hyperparameters
num_epochs = 10000
patience_early_stopping = 20

learning_rate = 0.0001
optimizer = torch.optim.Adam(model.parameters(),
                             lr=learning_rate)

In [24]:
# Set the training device
device = ('cuda' if torch.cuda.is_available() else 'cpu')

In [26]:
# Train the model
train_loss, val_loss = deep4downscaling.deep.train.standard_training_loop(
                            model=model, model_name=model_name, model_path=MODELS_PATH,
                            device=device, num_epochs=num_epochs,
                            loss_function=loss_function, optimizer=optimizer,
                            train_data=train_dataloader, valid_data=valid_dataloader,
                            patience_early_stopping=patience_early_stopping)

In [27]:
# Load the model weights into the DeepESD architecture
model.load_state_dict(torch.load(f'{MODELS_PATH}/{model_name}.pt'))

# Standardize the test data
x_test_stand = deep4downscaling.trans.standardize(data_ref=x_train, data=x_test)

In [28]:
# Generate mask to unflat the prediction
y_mask = xr.ones_like(y_train.isel(time=0))

In [29]:
# Compute predictions
pred_test = deep4downscaling.deep.pred.compute_preds_standard(
                                x_data=x_test_stand, model=model,
                                device=device, var_target=target_var,
                                mask=y_mask, batch_size=16,
                                spatial_dims=('x', 'y'))

In [30]:
# Visualize the predictions
deep4downscaling.viz.simple_map_plot(data=pred_test.mean('time'),
                                     colorbar='hot_r', var_to_plot=target_var,
                                     output_path=f'./{FIGURES_PATH}/prediction_test_mean_{training_experiment}.pdf')

In [31]:
# Perform some evaluation
bias_mean = deep4downscaling.metrics.bias_mean(target=y_test, pred=pred_test,
                                               var_target=target_var) 

bias_p02 = deep4downscaling.metrics.bias_quantile(target=y_test, pred=pred_test,
                                                  quantile=0.02, var_target=target_var)

bias_p98 = deep4downscaling.metrics.bias_quantile(target=y_test, pred=pred_test,
                                                  quantile=0.98, var_target=target_var)

In [32]:
# Plot the results
deep4downscaling.viz.simple_map_plot(data=bias_mean, var_to_plot=target_var,
                                     colorbar='RdBu_r', vlimits=(-2, 2),
                                     output_path=f'./{FIGURES_PATH}/bias_mean_test_{training_experiment}.pdf')

deep4downscaling.viz.simple_map_plot(data=bias_p02, var_to_plot=target_var,
                                     colorbar='RdBu_r', vlimits=(-2, 2),
                                     output_path=f'./{FIGURES_PATH}/bias_p02_test_{training_experiment}.pdf')

deep4downscaling.viz.simple_map_plot(data=bias_p98, var_to_plot=target_var,
                                     colorbar='RdBu_r', vlimits=(-2, 2),
                                     output_path=f'./{FIGURES_PATH}/bias_p98_test_{training_experiment}.pdf')

### Evaluation Experiment

In [33]:
# Set the experiment for training the model
evaluation_experiment = 'mid_century' # (historical, mid_century, end_century)
mode = 'imperfect' # (imperfect, perfect)
gcm_name = 'MPI-ESM-LR' # (CNRM-CM5, MPI-ESM-LR)

# Set the period
if evaluation_experiment == 'historical':
    period_evaluation = '1981-2000'
elif evaluation_experiment == 'mid_century':
    period_evaluation = '2041-2060'
elif evaluation_experiment == 'end_century':
    period_evaluation = '2080-2099'
else:
    raise ValueError('Provide a valid date')

In [34]:
# Load predictors
predictor_evaluation_filename = f'{DATA_PATH}/test/{evaluation_experiment}/predictors/{mode}/{gcm_name}_{period_evaluation}.nc'
predictor_evaluation = xr.open_dataset(predictor_evaluation_filename)
print(predictor_evaluation)

<xarray.Dataset> Size: 112MB
Dimensions:  (time: 7305, lat: 16, lon: 16)
Coordinates:
  * time     (time) datetime64[ns] 58kB 2041-01-01T12:00:00 ... 2060-12-31T12...
  * lat      (lat) float64 128B 32.0 34.0 36.0 38.0 40.0 ... 56.0 58.0 60.0 62.0
  * lon      (lon) float64 128B -4.0 -2.0 0.0 2.0 4.0 ... 20.0 22.0 24.0 26.0
Data variables: (12/15)
    u_850    (time, lat, lon) float32 7MB ...
    u_700    (time, lat, lon) float32 7MB ...
    u_500    (time, lat, lon) float32 7MB ...
    v_850    (time, lat, lon) float32 7MB ...
    v_700    (time, lat, lon) float32 7MB ...
    v_500    (time, lat, lon) float32 7MB ...
    ...       ...
    z_850    (time, lat, lon) float32 7MB ...
    z_700    (time, lat, lon) float32 7MB ...
    z_500    (time, lat, lon) float32 7MB ...
    q_850    (time, lat, lon) float32 7MB ...
    q_700    (time, lat, lon) float32 7MB ...
    q_500    (time, lat, lon) float32 7MB ...
Attributes:
    regrid_method:  conservative


In [35]:
# Standardize the predictor
predictor_evaluation_stand = deep4downscaling.trans.standardize(data_ref=x_train, data=predictor_evaluation)

In [36]:
# Compute predictions
pred_evaluation = deep4downscaling.deep.pred.compute_preds_standard(
                                    x_data=predictor_evaluation_stand, model=model,
                                    device=device, var_target=target_var,
                                    mask=y_mask, batch_size=16,
                                    spatial_dims=('x', 'y'))

In [37]:
# Plot the climatology
deep4downscaling.viz.simple_map_plot(data=pred_evaluation.mean('time'), var_to_plot=target_var,
                                     colorbar='hot_r', vlimits=(270, 310),
                                     output_path=f'./{FIGURES_PATH}/climatology_predictand_{evaluation_experiment}_{gcm_name}_{mode}.pdf')