In [None]:
import xarray as xr
import numpy as np
import os
import glob

## Parameters

In [None]:
root = "/lustre/gmeteo/WORK/DATA/C3S-CDS/C3S-CICA-Atlas/v2/"
domain= "CORDEX-CORE" #"CORDEX-CORE"/"CORDEX-EUR-11"
scenario = 'rcp85'
name= "t_CORDEX-CORE_rcp85_mon_200601-210012_v02.nc"

## Open the dataset

In [None]:
root_nc = root + domain + '/' + scenario + '/' + name
var = name.split('_')[0]

ds = xr.open_dataset(root_nc, engine="netcdf4", 
                    chunks={"member": 1, "time": 120})

In [None]:
root_output = "/lustre/gmeteo/WORK/DATA/C3S-CDS/C3S-CICA-Atlas/v2.urban/"
#create de output folder for urban
root_urb = root_output + domain +'URB/'+ scenario + '/'

name_urb = "_".join(name.split('_')[0:2]) + 'URB_' + "_".join(name.split('_')[2:])

if not os.path.exists(os.path.dirname( domain +'URB/')):
    os.makedirs(os.path.dirname(domain +'URB/'))
    
if not os.path.exists(os.path.dirname(root_urb)):
    os.makedirs(os.path.dirname(root_urb))


#create de output folder for rural
root_rur = root_output + domain +'RUR/'+ scenario + '/'
name_rur = "_".join(name.split('_')[0:2]) + 'RUR_' + "_".join(name.split('_')[2:])

if not os.path.exists(os.path.dirname( domain +'RUR/')):
    os.makedirs(os.path.dirname( domain +'RUR/'))
    
if not os.path.exists(os.path.dirname(root_rur)):
    os.makedirs(os.path.dirname(root_rur))

## Filter the members

In [None]:
# Define the lists of models from the dictionary
Dict = {
    "MOHC_HadGEM2-ES_REMO": {"models": ["MOHC_HadGEM2-ES_r1i1p1_GERICS_REMO2015_v1"]},
    "MOHC_HadGEM2-ES_RegCM": {"models": ["MOHC_HadGEM2-ES_r1i1p1_ICTP_RegCM4-7_v0", "MOHC_HadGEM2-ES_r1i1p1_ICTP_RegCM4-4_v0", "MOHC_HadGEM2-ES_r1i1p1_ICTP_RegCM4-6_v1", "MOHC_HadGEM2-ES_r1i1p1_ISU_RegCM4_v4-4-rc8"]},
    "MPI-M_MPI-ESM-LR_REMO": {"models": ["MPI-M_MPI-ESM-LR_r1i1p1_GERICS_REMO2015_v1", "MPI-M_MPI-ESM-LR_r3i1p1_GERICS_REMO2015_v1"]},
    "MPI-M_MPI-ESM-MR_RegCM": {"models": ["MPI-M_MPI-ESM-MR_r1i1p1_ICTP_RegCM4-7_v0", "MPI-M_MPI-ESM-MR_r1i1p1_ICTP_RegCM4-4_v0", "MPI-M_MPI-ESM-MR_r1i1p1_ORNL_RegCM4-7_v0"]},
    "NCC_NorESM1-M_REMO": {"models": ["NCC_NorESM1-M_r1i1p1_GERICS_REMO2015_v1"]},
    "NCC_NorESM1-M_RegCM": {"models": ["NCC_NorESM1-M_r1i1p1_ICTP_RegCM4-7_v0", "NCC_NorESM1-M_r1i1p1_ICTP_RegCM4-4_v0", "NCC_NorESM1-M_r1i1p1_ICTP_RegCM4-6_v1", "NCC_NorESM1-M_r1i1p1_ORNL_RegCM4-7_v0"]},
    "NOAA-GFDL_GFDL-ESM2M_RegCM": {"models": ["NOAA-GFDL_GFDL-ESM2M_r1i1p1_ICTP_RegCM4-7_v0", "NOAA-GFDL_GFDL-ESM2M_r1i1p1_ISU_RegCM4_v4-4-rc8"]},
    "MIROC_MIROC5_RegCM": {"models": ["MIROC_MIROC5_r1i1p1_ORNL-RegCM4-7_v0"]},
    "MPI-M_MPI-ESM-LR_RegCM": {"models": ["MPI-M_MPI-ESM-LR_r1i1p1_ICTP_RegCM4-6_v0", "MPI-M_MPI-ESM-LR_r1i1p1_NCAR_RegCM4_v4-4-rc8", "MPI-M_MPI-ESM-LR_r1i1p1_ICTP_RegCM4-6-v1"]}
}

model_list = []
for key in Dict:
    for model in Dict[key]["models"]:
        model_list.extend(Dict[key]["models"])
        model_list.append(f"{key}") 

member_ids = ds.coords['member_id'].values
filtered_member_ids = [member for member in member_ids if member in model_list]
indices_filtrados = [i for i, member in enumerate(member_ids) if member in filtered_member_ids]
ds = ds.isel(member=indices_filtrados).compute()

## Create the masks for RegCM and REMO 

### Create a NaN dataset for the masks

In [None]:
merged_ds = ds.isel(member = 0, time = 0)

#Create a new variable 'urmask' with NaN values, matching the shape of var
urmask_data = np.nan * np.ones_like(merged_ds[var].values)  # Create NaN-filled array with the same shape as var
merged_ds['urmask'] = (merged_ds[var].dims, urmask_data)
merged_ds = merged_ds.drop_vars(var)

### Merging the masks of each city together

In [None]:
def merge_nc_files(domain, folder_list, base_dir, merged_ds):
    """Merges multiple .nc files from different folders into a single dataset while ensuring no overlap."""
    
    for folder in folder_list:
        folder_path = os.path.join(base_dir, folder)
        if domain == "CORDEX-CORE":
            nc_files = glob.glob(os.path.join(folder_path, "*22*.nc"))
        elif domain == "CORDEX-EUR-11":
            nc_files = glob.glob(os.path.join(folder_path, "*11*.nc"))
        if nc_files: 
            ds = xr.open_dataset(nc_files[0])  # Open the first .nc file in the folder
            if ds.data_vars:
                try:
                    
                    merged_ds = xr.merge([merged_ds, ds])
                except Exception as e:
                    continue    
    return merged_ds

In [None]:
#Base directory where the result folders are located
base_dir = "./results"

#Get all folders containing 'REMO' or 'RegCM'
remo_dirs = [d for d in os.listdir(base_dir) if "REMO" in d]
regcm_dirs = [d for d in os.listdir(base_dir) if "RegCM" in d]

# Merge masks by model
ds_remo = merge_nc_files(domain, remo_dirs, base_dir, merged_ds).compute()
ds_regcm = merge_nc_files(domain, regcm_dirs, base_dir, merged_ds).compute()

### Filtering members for REMO and RegCM

In [None]:
#Convert `member_id` to a manageable array before filtering
member_ids = ds['member_id'].values  

# Member indexes containing “RegCM” and “REMO”
regcm_mask = np.char.find(member_ids.astype(str), "RegCM") >= 0
remo_mask = np.char.find(member_ids.astype(str), "REMO") >= 0

## Applying the Rural mask

### Filtering the mask to retain only rual values (where urmask equals 0)

In [None]:
#Replacing values of 0 with 1 and all other values with NaN in the mask
mask_regcm = xr.where(ds_regcm['urmask'] == 0, 1, float("nan"))
mask_remo = xr.where(ds_remo['urmask'] == 0, 1, float("nan"))

### Apply the mask

In [None]:
#Create a copy of the original dataset
ds_regcm_filtered = ds[[var]].isel(member=regcm_mask).chunk({"member": 1, "time": 120})
ds_remo_filtered = ds[[var]].isel(member=remo_mask).chunk({"member": 1, "time": 120})

#Apply the mask making sure that var is preserved.
ds_regcm_filtered[var] = ds_regcm_filtered[var] * mask_regcm
ds_remo_filtered[var] = ds_remo_filtered[var] * mask_remo

### Merge models and save the dataset

In [None]:
ds_merged = xr.concat([ds_regcm_filtered, ds_remo_filtered], dim="member")

In [None]:
ds_merged.to_netcdf(root_rur + name_rur, encoding={var: {"zlib": True, "complevel": 1}})

## Applying the Urban mask

### Filtering the mask to retain only urban values (where urmask equals 1)

In [None]:
import gc
del ds_regcm_filtered, ds_remo_filtered, ds_merged  # Eliminar variables
gc.collect()  # Liberar memorian

In [None]:
#Replace values close to 0 by NaN in the mask
mask_regcm = ds_regcm['urmask'].where(ds_regcm['urmask'] > 0)
mask_remo = ds_remo['urmask'].where(ds_remo['urmask'] > 0)

### Apply the mask

In [None]:
#Create a copy of the original dataset
ds_regcm_filtered = ds[[var]].isel(member=regcm_mask).chunk({"member": 1, "time": 120})
ds_remo_filtered = ds[[var]].isel(member=remo_mask).chunk({"member": 1, "time": 120})

#Apply the mask making sure that var is preserved.
ds_regcm_filtered[var] = ds_regcm_filtered[var] * mask_regcm
ds_remo_filtered[var] = ds_remo_filtered[var] * mask_remo

### Merge models and save the dataset

In [None]:
ds_merged = xr.concat([ds_regcm_filtered, ds_remo_filtered], dim="member")

In [None]:
ds_merged.to_netcdf(root_urb + name_urb, encoding={var: {"zlib": True, "complevel": 1}})