# Carga de datos

In [1]:
library(grid)
library(dplyr)
library(gridExtra)
library(visualizeR)
library(downscaleR)
library(transformeR)
library(RColorBrewer)
library(easyVerification)

color = colorRampPalette(rev(brewer.pal(n = 9, "RdYlBu")))


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘gridExtra’


The following object is masked from ‘package:dplyr’:

    combine


Loading required package: transformeR




    _______   ____  ___________________  __  ________ 
   / ___/ /  / /  |/  / __  /_  __/ __/ / / / / __  / 
  / /  / /  / / /|_/ / /_/ / / / / __/ / /_/ / /_/_/  
 / /__/ /__/ / /  / / __  / / / / /__ /___  / / \ \ 
 \___/____/_/_/  /_/_/ /_/ /_/  \___/    /_/\/   \_\ 
 
      github.com/SantanderMetGroup/climate4R



transformeR version 2.2.2 (2023-10-26) is loaded


Get the latest stable version (2.2.3) using <devtools::install_github('SantanderMetGroup/transformeR')>

Please see 'citation("transformeR")' to cite this package.

visualizeR version 1.6.4 (2023-10-26) is loaded

Please see 'citation("visualizeR")' to cite this package.

downscaleR version 3.3.4 (2023-06-22) is loaded

Please use 'citation("downscaleR")' to cite this package.

Loading required package: SpecsVerification


Attaching package: ‘easyVerification’


The following object is masked from ‘package:SpecsVerification’:

    EnsCorr




El primer paso es preparar los datos de nuestro predictando, la temperatura media (tas) de ERA5-Land a 0.1º, y los datos de nuestros predictores, la temperatura media (tas) y la presión en superficie (sp) de ERA5 a 0.25º, pero habiendo escalado los datos a la resolución de nuestro modelo del ECMWF, en este caso 1º.

Además, dividimos los datos en train (1993-2016) y test (2021-2022).

In [2]:
# Predictando (Y) - ERA5-Land (Alta Resolución 0.1°)
y_obs = readRDS('../../data/analogs/downscaling/tas_cgdds_ERA5-Land.rds')
y_obs = subsetGrid(y_obs, season = c(4:8))
yT_obs = subsetGrid(y_obs, years = 1993:2016)  # training
yt_obs = subsetGrid(y_obs, years = 2021:2022)  # test

# Predictores (X) - ERA5 (Resolución Original 0.25º - Interpolada a Resolución SEAS5 1º)
x_sp = readRDS('../../data/analogs/downscaling/sp_ERA5.rds')
x_sp = subsetGrid(x_sp, season = c(4:8))
xT_sp = subsetGrid(x_sp, years = 1993:2016)  # training
xt_sp = subsetGrid(x_sp, years = 2021:2022)  # test

x_tas = readRDS('../../data/analogs/downscaling/tas_ERA5.rds')
x_tas = subsetGrid(x_tas, season = c(4:8))
xT_tas = subsetGrid(x_tas, years = 1993:2016)  # training
xt_tas = subsetGrid(x_tas, years = 2021:2022)  # test

# Unimos los grid con makeMultiGrid
xT = makeMultiGrid(xT_tas, xT_sp)
xt = makeMultiGrid(xt_tas, xt_sp)

# Climatología de la temperatura de ERA5-Land

In [3]:
# Valor medio
mean_ref = spatialPlot(climatology(yT_obs),
                       backdrop.theme = "countries",
                       main = "Mean (train)",
                       col.regions = color) %>% suppressMessages %>% suppressWarnings

# Percentil 05
p5_fun = function(x, ...) quantile(x, probs = 0.05, na.rm = TRUE)
p5 = climatology(yT_obs, clim.fun = list(FUN = p5_fun, na.rm = TRUE)) %>% suppressMessages %>% suppressWarnings

p5_ref = spatialPlot(climatology(p5),
                     backdrop.theme = "countries",
                     main = "P05 (train)",
                     col.regions = color) %>% suppressMessages %>% suppressWarnings

# Percentil 95
p95_fun = function(x, ...) quantile(x, probs = 0.95, na.rm = TRUE)
p95 = climatology(yT_obs, clim.fun = list(FUN = p95_fun, na.rm = TRUE)) %>% suppressMessages %>% suppressWarnings

p95_ref = spatialPlot(climatology(p95),
                      backdrop.theme = "countries",
                      main = "P95 (train)",
                      col.regions = color) %>% suppressMessages %>% suppressWarnings

In [4]:
png("metricas_ERA5-Land.png", width = 2000, height = 1000, res = 150)

titulo_fila1 = textGrob("tas (ºC) ERA5-Land (0.1º)",
                        gp = gpar(fontsize = 18, fontface = "bold"))

grid.arrange(titulo_fila1,
             arrangeGrob(mean_ref, p5_ref, p95_ref, ncol = 3),
             ncol = 1,
             heights = c(0.1, 1))

dev.off()

In [5]:
# Media del grid para p5
mean_p5 = mean(apply(p5$Data, c(2,3), function(x) mean(x, na.rm = TRUE)), na.rm = TRUE)

# Media del grid para p95
mean_p95 = mean(apply(p95$Data, c(2,3), function(x) mean(x, na.rm = TRUE)), na.rm = TRUE)

# Media del grid para yT_obs
mean_yT = mean(apply(yT_obs$Data, c(2,3), function(x) mean(x, na.rm = TRUE)), na.rm = TRUE)

# Mostrar resultados
cat("Media del grid (ºC):\n",
    "p5: ", mean_p5, "\n",
    "p95: ", mean_p95, "\n",
    "yT_obs: ", mean_yT, "\n")

Media del grid (ºC):
 p5:  9.844189 
 p95:  26.95517 
 yT_obs:  19.34074 


# Entrenamiento del modelo

In [6]:
# Preparar los datos (alineación temporal) y PCA de las variables combinadas reteniendo un 95% de la varianza
data = prepareData(x = xT, y = yT_obs,
                   spatial.predictors = list(
                       v.exp = 0.95,
                       which.combine = getVarNames(xT)))

[2025-12-30 14:45:11.719507] Performing PC analysis on 2 variables plus a combination ...

[2025-12-30 14:45:14.096355] Done.



In [7]:
# Entreno el modelo con los datos de train
model = downscaleTrain(
    obj = data,
    method = "analogs", 
    n.analogs = 1)

# Model cross-validation

In [8]:
analog.cv = downscaleCV(x = xT, y = yT_obs, method = "analogs", n.analogs = 1,
                        sampling.strategy = "leave-one-year-out",
                        prepareData.args = list(
                            spatial.predictors = list(which.combine = getVarNames(xT), v.exp = 0.95)))

fold: 1 --> calculating...

[2025-12-30 14:47:01.293893] Performing PC analysis on 2 variables plus a combination ...

[2025-12-30 14:47:03.642204] Done.

fold: 2 --> calculating...

[2025-12-30 14:47:17.45861] Performing PC analysis on 2 variables plus a combination ...

[2025-12-30 14:47:19.813094] Done.

fold: 3 --> calculating...

[2025-12-30 14:47:33.377382] Performing PC analysis on 2 variables plus a combination ...

[2025-12-30 14:47:35.751133] Done.

fold: 4 --> calculating...

[2025-12-30 14:47:48.636781] Performing PC analysis on 2 variables plus a combination ...

[2025-12-30 14:47:50.997131] Done.

fold: 5 --> calculating...

[2025-12-30 14:48:03.998024] Performing PC analysis on 2 variables plus a combination ...

[2025-12-30 14:48:06.336886] Done.

fold: 6 --> calculating...

[2025-12-30 14:48:19.471467] Performing PC analysis on 2 variables plus a combination ...

[2025-12-30 14:48:21.833846] Done.

fold: 7 --> calculating...

[2025-12-30 14:48:35.062634] Performing PC 

# Validation

### Función auxiliar

In [9]:
# Función para calcular correlación de Pearson y valores p entre datos de modelo y observaciones en una grilla espacial
# Además, identifica y marca los puntos con correlación estadísticamente significativa según un umbral de p-valor
#
# Args:
#   model_data: objeto con datos del modelo, estructura esperada con dimensión [miembros, tiempo, latitud, longitud]
#   obs_data: objeto con datos observacionales, estructura con dimensión [tiempo, latitud, longitud]
#   ref_grid: objeto referencia con metadatos espaciales y temporales para construir grillas (xyCoords, Variable, Dates)
#   threshold: umbral para marcar significancia estadística (p-valor), default 0.05
#
# Returns:
#   Lista con:
#     - cor: matriz de correlaciones [lat x lon]
#     - pval: matriz de valores p [lat x lon]
#     - pval_grid: objeto tipo "grid" con valores p y metadatos
#     - pts: lista de objetos para graficar puntos de significancia (stippling)

calc_cor_pval_grid = function(model_data, obs_data, ref_grid, threshold = 0.05) {
    
    # Dimensiones espaciales (latitud y longitud)
    lat_n = dim(model_data$Data)[2]
    lon_n = dim(model_data$Data)[3]
  
    # Inicializar matrices vacías para almacenar correlaciones y p-valores
    cor_array = matrix(NA, nrow = lat_n, ncol = lon_n)
    pval_array = matrix(NA, nrow = lat_n, ncol = lon_n)
    
    # Iterar sobre cada punto espacial
    for (i in 1:lat_n) {
        for (j in 1:lon_n) {
            
            # Extraer series temporales de modelo y observaciones para la celda actual
            pred_series = model_data$Data[, i, j]
            obs_series = obs_data$Data[, i, j]
      
            # Filtrar índices con datos completos (no NA)
            valid_idx = complete.cases(pred_series, obs_series)
            
            # Solo calcular correlación si hay suficientes datos (mínimo 10)
            if (sum(valid_idx) >= 10) {
                test = cor.test(pred_series[valid_idx], obs_series[valid_idx], method = "pearson")
                cor_array[i, j] = test$estimate  # Coeficiente de correlación
                pval_array[i, j] = test$p.value  # Valor p de la prueba
            }
        }
    }
  
    # Construir un objeto "grid" para los valores p, con metadatos espaciales y temporales
    pval_grid = list()
    pval_grid$Data = pval_array
    attr(pval_grid$Data, "dimensions") = c("lat", "lon")
    pval_grid$xyCoords = ref_grid$xyCoords
    pval_grid$Variable = ref_grid$Variable
    pval_grid$Dates = ref_grid$Dates
    class(pval_grid) = "grid"

    pval_grid$Variable$varName = "p-values"
    attr(pval_grid$Variable, "description") = "Mapa de p-valores"
    attr(pval_grid$Variable, "units") = ""
    attr(pval_grid$Variable, "longname") = "p-values"

    # Construir un objeto "grid" para los valores de correlación,
    cor_grid = list()
    cor_grid$Data = cor_array
    attr(cor_grid$Data, "dimensions") = c("lat", "lon")
    cor_grid$xyCoords = ref_grid$xyCoords
    cor_grid$Variable = ref_grid$Variable
    cor_grid$Dates = ref_grid$Dates
    class(cor_grid) = "grid"

    cor_grid$Variable$varName = "correlation"
    attr(cor_grid$Variable, "description") = "Mapa de correlaciones"
    attr(cor_grid$Variable, "units") = ""
    attr(cor_grid$Variable, "longname") = "correlation"

    # Crear objetos para graficar puntos de significancia estadística (stippling)
    pts = map.stippling(climatology(pval_grid), 
                        threshold = threshold, 
                        condition = "LT", 
                        pch = 19, col = "black", cex = 0.05) %>% suppressMessages() %>% suppressWarnings()
    
    # Devolver lista con resultados y objetos para plot
    return(list(cor = cor_grid, pval = pval_array, pval_grid = pval_grid, pts = pts))
}

### Bias and corr 

In [10]:
# Calculo el bias entre la predicción y las observaciones en el periodo de train para dos casos:
# 1. Partición train sin CV
# 2. Partición train con CV con leave-one-year-out
ref = climatology(yT_obs) %>% suppressMessages %>% suppressWarnings
diff = climatology(model$pred) %>% suppressMessages %>% suppressWarnings
bias = gridArithmetics(diff, ref, operator = "-")
b = spatialPlot(bias,
                backdrop.theme = "countries",
                main = "Bias Train (Prediction - Obs)",
                col.regions = color,
                at = seq(0, 0.3, 0.01))

diff_cv = climatology(analog.cv) %>% suppressMessages %>% suppressWarnings
bias_cv = gridArithmetics(diff_cv, ref, operator = "-")
b_cv = spatialPlot(bias_cv,
                   backdrop.theme = "countries",
                   main = "Bias Train CV (LOO) (Prediction - Obs)",
                   col.regions = color,
                   at = seq(0, 0.3, 0.01))

In [11]:
# Calculo la correlación entre la predicción y las observaciones en el periodo de train para dos casos:
# 1. Partición train sin CV
# 2. Partición train con CV con leave-one-year-out
test_cor = calc_cor_pval_grid(model$pred, yT_obs, model$pred)

corr = spatialPlot(climatology(test_cor$cor),
                   backdrop.theme = "countries",
                   main = "Corr Train",
                   sp.layout = list(test_cor$pts),
                   col.regions = color,
                   at = seq(-1, 1, 0.1)) %>% suppressMessages %>% suppressWarnings

test_cor_cv = calc_cor_pval_grid(analog.cv, yT_obs, analog.cv)

corr_cv = spatialPlot(climatology(test_cor_cv$cor),
                      backdrop.theme = "countries",
                      main = "Corr CV (LOO)",
                      sp.layout = list(test_cor_cv$pts),
                      col.regions = color,
                      at = seq(-1, 1, 0.1)) %>% suppressMessages %>% suppressWarnings

In [12]:
png("metricas_train_comparison.png", width = 2000, height = 1000, res = 150)

titulo_fila1 = textGrob("Bias (Pred - Obs)",
                        gp = gpar(fontsize = 18, fontface = "bold"))

titulo_fila2 = textGrob("Corr (Pred - Obs)",
                        gp = gpar(fontsize = 18, fontface = "bold"))

grid.arrange(titulo_fila1,
             arrangeGrob(b, b_cv, ncol = 2),
             titulo_fila2,
             arrangeGrob(corr, corr_cv, ncol = 2),
             ncol = 1,
             heights = c(0.1, 1, 0.1, 1))

dev.off()

# Predecimos sobre los datos de SEAS5 a 1º

In [13]:
# Cargo la tas de SEAS5 a 1º
x_tas_seas5 = readRDS('../../data/analogs/downscaling/tas_cgdds_seas5_downscaling.rds')
x_tas_seas5 = subsetGrid(x_tas_seas5, season = c(4:8))

# Renombro variables para que coincidan con ERA5
attr(x_tas_seas5$Variable, "varName") = "t2m"
x_tas_seas5$Variable$varName = "t2m"

# Subset temporal
xT_tas_seas5 = subsetGrid(x_tas_seas5, years = 1993:2016)  # training
xt_tas_seas5 = subsetGrid(x_tas_seas5, years = 2021:2022)  # test

# Cargo la sp de CMCC
x_sp_seas5 = readRDS('../../data/analogs/downscaling/sp_cmcc_downscaling.rds')
x_sp_seas5 = subsetGrid(x_sp_seas5, season = c(4:8))

# Renombro variables para que coincidan con ERA5
attr(x_sp_seas5$Variable, "varName") = "sp"
x_sp_seas5$Variable$varName = "sp"

# Subset temporal
xT_sp_seas5 = subsetGrid(x_sp_seas5, years = 1993:2016)  # training
xt_sp_seas5 = subsetGrid(x_sp_seas5, years = 2021:2022)  # test

In [14]:
# Estandarizamos los predictores del modelo por mes y gridbox
xt_tas_harmonized = scaleGrid(grid = xt_tas_seas5,
                              base = xT_tas_seas5,
                              ref = xT_tas,
                              type = "center",
                              time.frame = "monthly",
                              spatial.frame = "gridbox",
                              by.member = FALSE)

xt_sp_harmonized = scaleGrid(grid = xt_sp_seas5,
                             base = xT_sp_seas5,
                             ref = xT_sp,
                             type = "center",
                             time.frame = "monthly",
                             spatial.frame = "gridbox",
                             by.member = FALSE)

[2025-12-30 14:53:39.473202] - Scaling by months ...

[2025-12-30 14:53:58.947137] - Done

[2025-12-30 14:53:58.949773] - Scaling by months ...

[2025-12-30 14:54:19.708548] - Done



In [15]:
# Unimos los grid con makeMultiGrid
xt_seas5 = makeMultiGrid(xt_tas_harmonized, xt_sp_harmonized)

# Preparo los nuevos datos de test
newdata = prepareNewData(xt_seas5, data)

# Predigo en test
pred = downscalePredict(newdata, model)

# Métricas predictando

Comparamos el valor de la temperatura del modelo habiendo hecho downscaling a 0.1º, con la temperatura original de ERA5-Land, ambas en el periodo de test.

In [16]:
# Calculo del RMSE
bias = veriApply(verifun = "EnsMe", 
                 fcst = pred$Data, 
                 obs = yt_obs$Data, 
                 ensdim = 1, tdim = 2) %>% suppressMessages %>% suppressWarnings

# Reconstrucción del grid
bias_grid = easyVeri2grid(easyVeri.mat = bias, obs.grid = yt_obs, verifun = "EnsMe")

bias_pred = spatialPlot(climatology(bias_grid),
                        backdrop.theme = "countries",
                        col.regions = color,
                        main = "Bias (prediction)") %>% suppressMessages %>% suppressWarnings

In [17]:
# Función para calcular correlación de Pearson y valores p entre datos de modelo y observaciones en una grilla espacial
# Además, identifica y marca los puntos con correlación estadísticamente significativa según un umbral de p-valor
#
# Args:
#   model_data: objeto con datos del modelo, estructura esperada con dimensión [miembros, tiempo, latitud, longitud]
#   obs_data: objeto con datos observacionales, estructura con dimensión [tiempo, latitud, longitud]
#   ref_grid: objeto referencia con metadatos espaciales y temporales para construir grillas (xyCoords, Variable, Dates)
#   threshold: umbral para marcar significancia estadística (p-valor), default 0.05
#
# Returns:
#   Lista con:
#     - cor: matriz de correlaciones [lat x lon]
#     - pval: matriz de valores p [lat x lon]
#     - pval_grid: objeto tipo "grid" con valores p y metadatos
#     - pts: lista de objetos para graficar puntos de significancia (stippling)

calc_cor_pval_grid = function(model_data, obs_data, ref_grid, threshold = 0.05) {
    
    # Calcular la media del ensamble para cada punto [tiempo, lat, lon]
    ens_mean = apply(model_data$Data, c(2, 3, 4), mean, na.rm = TRUE)
    
    # Dimensiones espaciales (latitud y longitud)
    lat_n = dim(ens_mean)[2]
    lon_n = dim(ens_mean)[3]
  
    # Inicializar matrices vacías para almacenar correlaciones y p-valores
    cor_array = matrix(NA, nrow = lat_n, ncol = lon_n)
    pval_array = matrix(NA, nrow = lat_n, ncol = lon_n)
    
    # Iterar sobre cada punto espacial
    for (i in 1:lat_n) {
        for (j in 1:lon_n) {
            
            # Extraer series temporales de modelo y observaciones para la celda actual
            pred_series = ens_mean[, i, j]
            obs_series = obs_data$Data[, i, j]
      
            # Filtrar índices con datos completos (no NA)
            valid_idx = complete.cases(pred_series, obs_series)
            
            # Solo calcular correlación si hay suficientes datos (mínimo 10)
            if (sum(valid_idx) >= 10) {
                test = cor.test(pred_series[valid_idx], obs_series[valid_idx], method = "pearson")
                cor_array[i, j] = test$estimate  # Coeficiente de correlación
                pval_array[i, j] = test$p.value  # Valor p de la prueba
            }
        }
    }
  
    # Construir un objeto "grid" para los valores p, con metadatos espaciales y temporales
    pval_grid = list()
    pval_grid$Data = pval_array
    attr(pval_grid$Data, "dimensions") = c("lat", "lon")
    pval_grid$xyCoords = ref_grid$xyCoords
    pval_grid$Variable = ref_grid$Variable
    pval_grid$Dates = ref_grid$Dates
    class(pval_grid) = "grid"

    pval_grid$Variable$varName = "p-values"
    attr(pval_grid$Variable, "description") = "Mapa de p-valores"
    attr(pval_grid$Variable, "units") = ""
    attr(pval_grid$Variable, "longname") = "p-values"
    
    # Crear objetos para graficar puntos de significancia estadística (stippling)
    pts = map.stippling(climatology(pval_grid), 
                        threshold = threshold, 
                        condition = "LT", 
                        pch = 19, col = "black", cex = 0.05) %>% suppressMessages() %>% suppressWarnings()
    
    # Devolver lista con resultados y objetos para plot
    return(list(cor = cor_array, pval = pval_array, pval_grid = pval_grid, pts = pts))
}

In [18]:
test_cor = calc_cor_pval_grid(pred, yt_obs, pred)

# Calculo del RMSE
corr = veriApply(verifun = "EnsCorr", 
                 fcst = pred$Data, 
                 obs = yt_obs$Data, 
                 ensdim = 1, tdim = 2) %>% suppressMessages %>% suppressWarnings

# Reconstrucción del grid
corr_grid = easyVeri2grid(easyVeri.mat = corr, obs.grid = yt_obs, verifun = "EnsCorr")

corr_pred = spatialPlot(climatology(corr_grid),
                       backdrop.theme = "countries",
                       sp.layout = list(test_cor$pts),
                       col.regions = color,
                       main = "Corr (prediction)",
                       at = seq(-1, 1, 0.1)) %>% suppressMessages %>% suppressWarnings

In [19]:
mean_pred = spatialPlot(climatology(pred, by.member = FALSE),
                        backdrop.theme = "countries",
                        main = "Mean (prediction)",
                        col.regions = colorRampPalette(rev(brewer.pal(n = 9, "RdYlBu")))) %>% suppressMessages %>% suppressWarnings

In [20]:
png("metricas_prediction.png", width = 2000, height = 1000, res = 150)

titulo_fila1 = textGrob("tas (ºC) SEAS5 downscaled (0.1º)",
                        gp = gpar(fontsize = 18, fontface = "bold"))

grid.arrange(titulo_fila1,
             arrangeGrob(mean_pred, bias_pred, corr_pred, ncol = 3),
             ncol = 1,
             heights = c(0.1, 1))

dev.off()