In [1]:
import sys
import os

# Add the parent directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
os.chdir("..")

In [None]:
from prob_unet_utils import compute_annual_block_maxima, gev_return_level, gev_parametric_bootstrap, get_empirical_return_periods
import torch
import random
import numpy as np
import climex_utils as cu
from scipy.stats import genextreme
import matplotlib.pyplot as plt
import train_prob_unet_model as tm  

In [None]:
# new version
args = tm.get_args()
def set_seed(seed):
    random.seed(seed) 
    np.random.seed(seed)  
    torch.manual_seed(seed) 
    torch.cuda.manual_seed(seed)  
    torch.cuda.manual_seed_all(seed)  
    torch.backends.cudnn.deterministic = True  
    torch.backends.cudnn.benchmark = False  
    os.environ['PYTHONHASHSEED'] = str(seed)
# -- 1) Set seed for reproducibility
set_seed(42) 

def compute_return_levels_for_random_pixel(
    model,
    dataset,
    device,
    years,             
    num_samples=10,    
    chosen_pixel=None,
    variable='pr'   
):
    """
    Generate daily variable at one pixel across all days and multiple ensemble draws.
    Fit GEV, compute return levels, and do param. bootstrap for confidence intervals.
    """

    if chosen_pixel is None:
        # pick random pixel within the domain
        chosen_pixel = (random.randint(0,127), random.randint(0,127))
    pix_y, pix_x = chosen_pixel

    print(f"\n[INFO] Computing return levels at pixel = ({pix_y}, {pix_x}).")
    print(f"\n[INFO] number of samples = {num_samples}")
    print(f"\n[INFO] variable = {variable}")

    model.eval()

    
    total_days = len(dataset)  
    # we'll store daily_data shape: [total_days, num_samples]
    daily_data = np.zeros((total_days, num_samples), dtype=np.float32)

    # Iterate over each day
    for day_idx in range(total_days):
        sample_dict = dataset[day_idx]   # This loads a single day from dataset
        # Prepare inputs
        inputs = sample_dict['inputs'].unsqueeze(0).to(device)   # shape [1, C, H, W]
        lrinterp = sample_dict['lrinterp'].unsqueeze(0).to(device)
        timestamps = sample_dict['timestamps'].unsqueeze(0).to(device)

        # We'll produce 'num_samples' draws from the model
        for r in range(num_samples):
            # forward pass
            with torch.no_grad():
                output = model(inputs, t=timestamps, training=False)
            
            # convert from residual to actual
            # shape [1, nvars, H, W]
            hr_pred = dataset.residual_to_hr(output.cpu(), lrinterp.cpu())

            if variable == 'pr':
                # For precipitation: use softplus and unit conversion (kg/m²/s to mm/day)
                pr_val = cu.softplus(hr_pred[:, 0])
                pr_val = cu.kgm2sTommday(pr_val)
                pixel_val = pr_val[0, pix_y, pix_x].item()
            elif variable == 'tasmax':
                # For tasmax: compute as softplus(channel 2, c=0) + channel 1, then convert from Kelvin to Celsius
                tasmax = hr_pred[:, 1] + cu.softplus(hr_pred[:, 2], c=0)
                tasmax = cu.KToC(tasmax)
                pixel_val = tasmax[0, pix_y, pix_x].item()
            elif variable == 'tasmin':
                tasmin = hr_pred[:, 1]  # No transformation needed for tasmin
                tasmin = cu.KToC(tasmin)
                pixel_val = tasmin[0, pix_y, pix_x].item()
            else:
                raise ValueError("Unsupported variable. Use 'pr', 'tasmax' or 'tasmin'.")

            daily_data[day_idx, r] = pixel_val

    # Now daily_data shape = [total_days, num_samples]. We should have the entire record.

    # Next: get block maxima. We must know how many years we have and days_per_year
    days_per_year = 365 
    n_years = len(years) 
    # check total_days == n_years * days_per_year, or adapt as needed
    print(f"Total days = {total_days}, years = {n_years}, days_per_year = {days_per_year}")
    
    block_maxima = compute_annual_block_maxima(daily_data, years, days_per_year=days_per_year)
    # block_maxima => shape (#years * num_samples,)

    # Fit GEV
    shape_hat, loc_hat, scale_hat = genextreme.fit(block_maxima)
    print(f"GEV fit => shape={shape_hat:.3f}, loc={loc_hat:.3f}, scale={scale_hat:.3f}")

    np.save(f"{args.plotdir}/pixel_{pix_y}_{pix_x}_block_maxima.npy", block_maxima)

    # We define which return periods we want
    return_periods = [1.1 ,2, 5, 10, 20, 50, 100, 200, 300, 500, 700, 1000]
    # Compute return levels
    rl_values = [gev_return_level(shape_hat, loc_hat, scale_hat, T) for T in return_periods]

    # Bootstrap for confidence intervals
    rl_boot = gev_parametric_bootstrap(shape_hat, loc_hat, scale_hat,
                                       sample_size=len(block_maxima),
                                       return_periods=return_periods,
                                       n_bootstrap=1000)  

    rl_ci_lower = {}
    rl_ci_upper = {}
    for T in return_periods:
        vals_T = np.array(rl_boot[T])
        rl_ci_lower[T] = np.percentile(vals_T, 2.5)
        rl_ci_upper[T] = np.percentile(vals_T, 97.5)

    print("\nReturn Levels (mm/day) with 95% CI at the chosen pixel:")
    for T, rl in zip(return_periods, rl_values):
        ci_low = rl_ci_lower[T]
        ci_high = rl_ci_upper[T]
        print(f"  {T:g}-year RL = {rl:.2f}  [95% CI: {ci_low:.2f}, {ci_high:.2f}]")


  
    plt.figure(figsize=(8,5))
    Ts = np.array(return_periods)
    rl_means = np.array(rl_values)
    rl_low = np.array([rl_ci_lower[T] for T in return_periods])
    rl_high= np.array([rl_ci_upper[T] for T in return_periods])

    # Plot GEV fit as straight orange line without markers
    plt.plot(Ts, rl_means, color='orange', linewidth=2, label='Fitted Return Level')
    
    # Plot confidence interval borders as black dashed lines without fill
    plt.plot(Ts, rl_low, color='black', linestyle='--', linewidth=1, label='95% Confidence Interval')
    plt.plot(Ts, rl_high, color='black', linestyle='--', linewidth=1)

    # Calculate empirical return periods and sort block maxima
    sorted_maxima, empirical_T = get_empirical_return_periods(block_maxima)

    # Plot the empirical points
    plt.scatter(empirical_T, sorted_maxima, marker='o', color='blue', s=15, label='Model Predictions')

    plt.xscale('log')
    if variable == 'pr':
        plt.ylabel('Precipitation (mm/day)')
        # plt.title(f"Precipitation GEV Return Levels at Pixel ({pix_y},{pix_x})")
    elif variable == 'tasmax':
        plt.ylabel('Tasmax (°C)')
        # plt.title(f"Tasmax GEV Return Levels at Pixel ({pix_y},{pix_x})")
    elif variable == 'tasmin':
        plt.ylabel('Tasmin (°C)')
        # plt.title(f"Tasmin GEV Return Levels at Pixel ({pix_y},{pix_x})")

    plt.xlabel('Return Period (years)')
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.tight_layout()

    # Save figure or show
    plt.savefig(f"{args.plotdir}/pixel_{pix_y}_{pix_x}_return_levels.pdf", dpi=200)
    plt.close()

    return daily_data, block_maxima

In [5]:
args.years_test = range(1998, 2028)
dataset_test = cu.climex2torch(
    datadir=args.datadir,
    years=args.years_test,
    variables=args.variables,
    coords=args.coords,
    lowres_scale=args.lowres_scale,
    type="lrinterp_to_residuals",
    transfo=True
)

print("Test dataset length (days):", len(dataset_test))

Opening and lazy loading netCDF files
Loading dataset into memory
Converting xarray Dataset to Pytorch tensor

##########################################
############ PROCESSING DONE #############
##########################################

Test dataset length (days): 10950


In [6]:
from prob_unet import ProbabilisticUNet
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create your model with the same structure:
probunet_model = ProbabilisticUNet(
    input_channels=len(args.variables),
    num_classes=len(args.variables),
    latent_dim=16,
    num_filters=[32, 64, 128, 256],
    model_channels=32,
    channel_mult=[1, 2, 4, 8],
    beta_0=0.0,
    beta_1=0.0,
    beta_2=0.0
).to(device)

In [7]:
# Load the trained weights
checkpoint_path = "./results/plots/03/28/202514:19:12/probunet_model_lat_dim_16.pth"
probunet_model.load_state_dict(
    torch.load(checkpoint_path, map_location=device)
)
probunet_model.eval()

print("Model loaded successfully!")

Model loaded successfully!


In [8]:
daily_data, block_maxima = compute_return_levels_for_random_pixel(
    model=probunet_model,
    dataset=dataset_test,
    device=device,
    years=args.years_test,      
    num_samples=10,        
    chosen_pixel=(56, 40),
    variable= "pr"    
)


[INFO] Computing return levels at pixel = (56, 40).

[INFO] number of samples = 10

[INFO] variable = pr
Computing statistics for standardization
Total days = 10950, years = 30, days_per_year = 365
GEV fit => shape=-0.366, loc=106.781, scale=50.022

Return Levels (mm/day) with 95% CI at the chosen pixel:
  1.1-year RL = 69.34  [95% CI: 37.98, 73.79]
  2-year RL = 126.40  [95% CI: 42.73, 134.36]
  5-year RL = 206.72  [95% CI: 94.94, 229.42]
  10-year RL = 281.45  [95% CI: 243.04, 1066.77]
  20-year RL = 375.22  [95% CI: 317.77, 12221.23]
  50-year RL = 539.67  [95% CI: 432.79, 256006.86]
  100-year RL = 705.33  [95% CI: 537.71, 2356517.72]
  200-year RL = 918.26  [95% CI: 659.75, 24900588.36]
  300-year RL = 1070.10  [95% CI: 745.35, 100438610.69]
  500-year RL = 1296.29  [95% CI: 857.29, 581316768.47]
  700-year RL = 1470.04  [95% CI: 942.96, 1846888518.91]
  1000-year RL = 1679.09  [95% CI: 1041.77, 6287560116.75]


In [None]:
daily_data_tasmax, block_maxima_tasmax = compute_return_levels_for_random_pixel(
    model=probunet_model,
    dataset=dataset_test,
    device=device,
    years=args.years_test,      
    num_samples=10,        
    chosen_pixel=(56, 40),
    variable= "tasmax"    
)


[INFO] Computing return levels at pixel = (56, 40).

[INFO] number of samples = 10

[INFO] variable = tasmax
Computing statistics for standardization
Total days = 10950, years = 30, days_per_year = 365
GEV fit => shape=0.156, loc=33.022, scale=1.973

Return Levels (mm/day) with 95% CI at the chosen pixel:
    2-year RL = 33.72  [95% CI: 33.47, 33.98]
    5-year RL = 35.66  [95% CI: 35.34, 35.96]
   10-year RL = 36.77  [95% CI: 36.38, 37.12]
   20-year RL = 37.71  [95% CI: 37.21, 38.16]
   50-year RL = 38.78  [95% CI: 38.10, 39.40]
  100-year RL = 39.49  [95% CI: 38.67, 40.27]
  200-year RL = 40.13  [95% CI: 39.14, 41.09]
  300-year RL = 40.47  [95% CI: 39.38, 41.55]
  500-year RL = 40.86  [95% CI: 39.65, 42.11]
  700-year RL = 41.11  [95% CI: 39.82, 42.46]
  1000-year RL = 41.35  [95% CI: 39.97, 42.81]


In [None]:
daily_data_tasmin, block_maxima_tasmin = compute_return_levels_for_random_pixel(
    model=probunet_model,
    dataset=dataset_test,
    device=device,
    years=args.years_test,      
    num_samples=10,        
    chosen_pixel=(56, 40),
    variable= "tasmin"    
)


[INFO] Computing return levels at pixel = (56, 40).

[INFO] number of samples = 10

[INFO] variable = tasmin


KeyboardInterrupt: 

# Observed vs. Model Return Levels Analysis

The plot comparing observed vs. model precipitation return levels reveals several important insights:

## Why are there fewer blue crosses (observed empirical) than red crosses (model empirical)?

This difference is due to sample size:
- **Observed data**: We have 30 years (1998-2028) with 1 observation per year = 30 empirical points
- **Model data**: We have 30 years with 30 samples per year = 900 empirical points (30×30)

This 30x difference explains the density difference in empirical markers.

## Interpretation of the Results:

1. **Model Underestimation**: The model GEV curve (red) falls significantly below the observed GEV curve (blue), indicating that our model systematically underestimates precipitation extremes.

2. **Statistical Significance**: The model curve falls outside the 95% confidence interval (blue shaded area) of observed GEV for most return periods, confirming this bias is statistically significant.

3. **Distribution Behavior**: 
   - **Observed data**: The empirical blue crosses align well with the fitted GEV curve
   - **Model data**: The empirical red crosses show a plateau around 75 mm/day, while the fitted model GEV continues rising - suggesting the model has an artificial ceiling for extreme precipitation

4. **Return Period Sensitivity**: The underestimation grows more severe for longer return periods, indicating our model particularly struggles with very extreme events.

5. **Physical Limitations**: The plateau in model empirical points suggests our probabilistic U-Net may have inherent limitations in generating sufficiently extreme precipitation events.

This analysis confirms our professor's approach was valuable - comparing against observed data reveals systematic biases that weren't apparent when only analyzing the model's internal consistency.

In [1]:
import sys
import os

# Add the parent directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
os.chdir("..")

In [2]:
import os, random, numpy as np, torch, matplotlib.pyplot as plt
from scipy.stats import genextreme
import climex_utils as cu
from scipy.interpolate import interp1d 
from prob_unet_utils import (
    compute_annual_block_maxima, gev_return_level, gev_parametric_bootstrap,
    get_empirical_return_periods
)
from prob_unet import ProbabilisticUNet
import train_prob_unet_model as tm

In [3]:
args = tm.get_args()
def set_seed(seed):
    random.seed(seed) 
    np.random.seed(seed)  
    torch.manual_seed(seed) 
    torch.cuda.manual_seed(seed)  
    torch.cuda.manual_seed_all(seed)  
    torch.backends.cudnn.deterministic = True  
    torch.backends.cudnn.benchmark = False  
    os.environ['PYTHONHASHSEED'] = str(seed)
    
# -- 1) Set seed for reproducibility
set_seed(42) 

# --- config (your list) ---
model_paths = [
    "./results/plots/08/01/202508:49:05/probunet_model_lat_dim_16.pth",  # λ=0
    "./results/plots/08/01/202508:23:04/probunet_model_lat_dim_16.pth",  # λ=1
    "./results/plots/07/22/202512:51:39/probunet_model_lat_dim_16.pth",  # λ=0.158
    "./results/plots/08/17/202515:50:03/probunet_model_lat_dim_16.pth",  # afCRPS
]
labels = [r'$\lambda=0$', r'$\lambda=1$', r'$\lambda=0.158$', 'afCRPS']

# nice colors/markers per model
colors  = ['#1f77b4', "#F09A4E", '#2ca02c', '#d62728']
markers = ['o', 's', '^', 'D']

# ---------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------
def load_probunet_model(path: str, device: torch.device) -> ProbabilisticUNet:
    m = ProbabilisticUNet(
        input_channels=len(args.variables),
        num_classes=len(args.variables),
        latent_dim=16,
        num_filters=[32, 64, 128, 256],
        model_channels=32,
        channel_mult=[1, 2, 4, 8],
        beta_0=0.0, beta_1=0.0, beta_2=0.0
    ).to(device)
    sd = torch.load(path, map_location=device)
    m.load_state_dict(sd)
    m.eval()
    return m

def extract_pixel_gt(series_item, variable: str, y: int, x: int) -> float:
    """series_item is dataset[day_idx]; returns GT value at pixel (y,x) in real units."""
    hr = series_item['hr']  # [3, H, W] in transfo domain (since dataset built with transfo=True)
    if variable == 'pr':
        val = cu.kgm2sTommday(cu.softplus(hr[0]))[y, x].item()
    elif variable == 'tasmax':
        val = cu.KToC(cu.softplus(hr[2], c=0) + hr[1])[y, x].item()
    elif variable == 'tasmin':
        val = cu.KToC(hr[1])[y, x].item()
    else:
        raise ValueError("variable must be 'pr', 'tasmax', or 'tasmin'")
    return float(val)

def extract_pixel_from_model(model, series_item, device, variable: str, y: int, x: int, num_samples: int):
    """Returns [num_samples] values at (y,x) for one day from a model ensemble."""
    inputs     = series_item['inputs'].unsqueeze(0).to(device)    # [1,C,H,W] (transfo)
    lrinterp   = series_item['lrinterp'].unsqueeze(0).to(device)  # [1,C,H,W] (transfo)
    timestamps = series_item['timestamps'].unsqueeze(0).to(device)
    vals = np.zeros((num_samples,), dtype=np.float32)
    with torch.no_grad():
        for r in range(num_samples):
            out_residual = model(inputs, t=timestamps, training=False)        # [1,C,H,W] residual/transfo
            out_hr_trans = dataset_test.residual_to_hr(out_residual.cpu(), lrinterp.cpu())  # [1,C,H,W], transfo
            if variable == 'pr':
                pr = cu.kgm2sTommday(cu.softplus(out_hr_trans[:,0]))          # mm/day
                vals[r] = pr[0, y, x].item()
            elif variable == 'tasmax':
                tmax = cu.KToC(out_hr_trans[:,1] + cu.softplus(out_hr_trans[:,2], c=0))
                vals[r] = tmax[0, y, x].item()
            elif variable == 'tasmin':
                tmin = cu.KToC(out_hr_trans[:,1])
                vals[r] = tmin[0, y, x].item()
    return vals  # shape [num_samples]

def fit_gev_from_blockmax(block_maxima, return_periods, n_bootstrap=1000):
    shape_hat, loc_hat, scale_hat = genextreme.fit(block_maxima)
    rl_curve = [gev_return_level(shape_hat, loc_hat, scale_hat, T) for T in return_periods]
    boot = gev_parametric_bootstrap(
        shape_hat, loc_hat, scale_hat,
        sample_size=len(block_maxima),
        return_periods=return_periods,
        n_bootstrap=n_bootstrap
    )
    rl_low  = {T: np.percentile(boot[T],  2.5) for T in return_periods}
    rl_high = {T: np.percentile(boot[T], 97.5) for T in return_periods}
    return (shape_hat, loc_hat, scale_hat), rl_curve, rl_low, rl_high

def make_return_level_plot(variable, pixel, gt_params, return_periods, rl_curve, rl_low, rl_high, model_empirical, save_dir):
    ylab = {'pr': 'Precipitation (mm/day)', 'tasmax': 'Tasmax (°C)', 'tasmin': 'Tasmin (°C)'}[variable]
    Ts = np.array(return_periods)
    rl_mean = np.array(rl_curve)
    rl_lo   = np.array([rl_low[T]  for T in return_periods])
    rl_hi   = np.array([rl_high[T] for T in return_periods])

    plt.figure(figsize=(7.2, 4.6))
    # GT fitted RL + CI
    plt.plot(Ts, rl_mean, color='grey', linewidth=2.2, label='GEV fit (GT)')
    plt.plot(Ts, rl_lo,  color='black', linestyle='--', linewidth=1.1, label='95% CI (GT)')
    plt.plot(Ts, rl_hi,  color='black', linestyle='--', linewidth=1.1)

    # Empirical points per model
    for (lab, col, mkr), (emp_T, emp_vals) in zip(zip(labels, colors, markers), model_empirical):
        plt.scatter(emp_T, emp_vals, s=18, color=col, marker=mkr, label=lab, alpha=0.9)

    plt.xscale('log')
    plt.xlabel('Return period (years)')
    plt.ylabel(ylab)
    plt.grid(True, alpha=0.3)
    plt.legend(loc='upper left', frameon=True, ncol=2)
    plt.tight_layout()

    os.makedirs(save_dir, exist_ok=True)
    fname = os.path.join(save_dir, f"return_levels_{variable}_y{pixel[0]}_x{pixel[1]}.pdf")
    plt.savefig(fname, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Saved: {fname}")

In [4]:
args.years_test = range(1998, 2028)
dataset_test = cu.climex2torch(
    datadir=args.datadir,
    years=args.years_test,
    variables=args.variables,
    coords=args.coords,
    lowres_scale=args.lowres_scale,
    type="lrinterp_to_residuals",
    transfo=True
)

print("Test dataset length (days):", len(dataset_test))

Opening and lazy loading netCDF files
Loading dataset into memory
Converting xarray Dataset to Pytorch tensor

##########################################
############ PROCESSING DONE #############
##########################################

Test dataset length (days): 10950


In [5]:
# ---------------------------------------------------------------------
# main: GT GEV + four model empirical curves, per variable
# ---------------------------------------------------------------------
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
chosen_pixel = (55, 110)     # change if you like
num_samples  = 5          # ensemble draws per model & day
days_per_year = 365
years = list(args.years_test)
save_dir = os.path.join(args.plotdir, "return_levels_multi_model")

# Ensure dataset_test already exists (from your previous cell)
print("Test dataset length (days):", len(dataset_test))

# Trim series to an integer number of years if needed
N_expected = len(years) * days_per_year
if len(dataset_test) < N_expected:
    raise RuntimeError(f"Dataset shorter than expected: {len(dataset_test)} < {N_expected}")
elif len(dataset_test) > N_expected:
    print(f"[WARN] Trimming dataset from {len(dataset_test)} to {N_expected} days to match {len(years)} years.")
    idx_keep = N_expected  # keep first N_expected days

# Loop over variables
for variable in ['pr', 'tasmax', 'tasmin']: 
    print(f"\n=== Variable: {variable} ===")
    y, x = chosen_pixel

    # ---- Ground truth daily series at (y,x) ----
    gt_daily = np.zeros((N_expected, 1), dtype=np.float32)
    for d in range(N_expected):
        item = dataset_test[d]
        gt_daily[d, 0] = extract_pixel_gt(item, variable, y, x)

    # Block maxima from GT and GEV fit
    block_gt = compute_annual_block_maxima(gt_daily, years, days_per_year=days_per_year)
    return_periods = [1.1, 2, 5, 10, 20, 50, 100, 300]
    (shape_hat, loc_hat, scale_hat), rl_curve, rl_low, rl_high = fit_gev_from_blockmax(
        block_gt, return_periods, n_bootstrap=1000
    )
    print(f"GEV (GT) params: shape={shape_hat:.3f}, loc={loc_hat:.3f}, scale={scale_hat:.3f}")

    # ---- Models: empirical RL points (no re-fit) ----
    model_empirical = []  # list of tuples (empirical_T, sorted_maxima) in plotting order
    for path, lab in zip(model_paths, labels):
        print(f"  -> Sampling model {lab}")
        model = load_probunet_model(path, device)
        daily_model = np.zeros((N_expected, num_samples), dtype=np.float32)
        for d in range(N_expected):
            item = dataset_test[d]
            daily_model[d, :] = extract_pixel_from_model(
                model, item, device, variable, y, x, num_samples=num_samples
            )
        block_m = compute_annual_block_maxima(daily_model, years, days_per_year=days_per_year)
        sorted_maxima, empirical_T = get_empirical_return_periods(block_m)
        model_empirical.append((empirical_T, sorted_maxima))

    # ---- Plot: one GT curve/CI + four model empirical sets ----
    make_return_level_plot(
        variable=variable,
        pixel=chosen_pixel,
        gt_params=(shape_hat, loc_hat, scale_hat),
        return_periods=return_periods,
        rl_curve=rl_curve,
        rl_low=rl_low,
        rl_high=rl_high,
        model_empirical=model_empirical,
        save_dir=save_dir
    )

Test dataset length (days): 10950

=== Variable: pr ===
Computing statistics for standardization
Bootstrap: kept 1000/1000 valid samples
GEV (GT) params: shape=-0.019, loc=59.091, scale=15.112
  -> Sampling model $\lambda=0$
  -> Sampling model $\lambda=1$
  -> Sampling model $\lambda=0.158$
  -> Sampling model afCRPS
Saved: ./results/plots/08/21/202521:59:44/return_levels_multi_model/return_levels_pr_y55_x110.pdf

=== Variable: tasmax ===
Bootstrap: kept 1000/1000 valid samples
GEV (GT) params: shape=0.211, loc=23.952, scale=1.109
  -> Sampling model $\lambda=0$
  -> Sampling model $\lambda=1$
  -> Sampling model $\lambda=0.158$
  -> Sampling model afCRPS
Saved: ./results/plots/08/21/202521:59:44/return_levels_multi_model/return_levels_tasmax_y55_x110.pdf

=== Variable: tasmin ===
Bootstrap: kept 1000/1000 valid samples
GEV (GT) params: shape=0.140, loc=20.303, scale=0.786
  -> Sampling model $\lambda=0$
  -> Sampling model $\lambda=1$
  -> Sampling model $\lambda=0.158$
  -> Sampling