In [None]:
import xarray as xr
import numpy as np
import os
import glob

## Parameters

In [None]:
root = "/lustre/gmeteo/WORK/DATA/C3S-CDS/C3S-CICA-Atlas/v2/"
domain= "CERRA"
name= "sfcwind_CERRA_mon_198501-202112_v02.nc"
#sfcwind_CERRA_mon_198501-202112_v02.nc

## Open the dataset

In [None]:
root_nc = root + domain + '/'  + name
var = name.split('_')[0]

ds = xr.open_dataset(root_nc, engine="netcdf4", 
                    chunks={"member": 1, "time": 120})

In [None]:
root_output = "/lustre/gmeteo/WORK/DATA/C3S-CDS/C3S-CICA-Atlas/v2.urban/"
#create de output folder for urban
root_urb = root_output + domain +'URB/'

name_urb = "_".join(name.split('_')[0:2]) + 'URB_' + "_".join(name.split('_')[2:])

if not os.path.exists(os.path.dirname( domain +'URB/')):
    os.makedirs(os.path.dirname(domain +'URB/'))
    
if not os.path.exists(os.path.dirname(root_urb)):
    os.makedirs(os.path.dirname(root_urb))


#create de output folder for rural
root_rur = root_output + domain +'RUR/'
name_rur = "_".join(name.split('_')[0:2]) + 'RUR_' + "_".join(name.split('_')[2:])

if not os.path.exists(os.path.dirname( domain +'RUR/')):
    os.makedirs(os.path.dirname( domain +'RUR/'))
    
if not os.path.exists(os.path.dirname(root_rur)):
    os.makedirs(os.path.dirname(root_rur))

## Create the mask

### Create a NaN dataset for the masks

In [None]:
merged_ds = ds.isel(time = 0)

#Create a new variable 'urmask' with NaN values, matching the shape of var
urmask_data = np.nan * np.ones_like(merged_ds[var].values)  # Create NaN-filled array with the same shape as var
merged_ds['urmask'] = (merged_ds[var].dims, urmask_data)
merged_ds = merged_ds.drop_vars(var)

### Merging the masks of each city together

In [None]:
def merge_nc_files(folder_list, base_dir, merged_ds):
    """Merges multiple .nc files from different folders into a single dataset while ensuring no overlap."""
    
    for folder in folder_list:
        folder_path = os.path.join(base_dir, folder)
        nc_files = glob.glob(os.path.join(folder_path, "urmask*.nc"))
        if nc_files: 
            ds = xr.open_dataset(nc_files[0])  # Open the first .nc file in the folder
            if ds.data_vars:
                try:
                    
                    merged_ds = xr.merge([merged_ds, ds])
                except Exception as e:
                    continue    
    return merged_ds

In [None]:
base_dir = "./results_CERRA"

dirs = [d for d in os.listdir(base_dir)]

# Merge masks by model
ds_CERRA = merge_nc_files(dirs, base_dir, merged_ds).compute()

## Applying the Urban mask

### Filtering the mask to retain only urban values (where urmask equals 1)

In [None]:
#Replace values close to 0 by NaN in the mask
mask_CERRA = ds_CERRA['urmask'].where(ds_CERRA['urmask'] > 0)

### Apply the mask

In [None]:
root_nc = root + domain + '/'  + name
var = name.split('_')[0]

ds = xr.open_dataset(root_nc, engine="netcdf4", 
                    chunks={"member": 1, "time": 120})

In [None]:
ds_merged=ds
ds_merged[var] = ds_merged[var] * mask_CERRA

### Merge models and save the dataset

In [None]:
ds_merged.to_netcdf(root_urb + name_urb, encoding={var: {"zlib": True, "complevel": 1}})

## Applying the Rural mask

In [None]:
import gc
del ds_merged  # Eliminar variables
gc.collect()  # Liberar memorian

### Filtering the mask to retain only rual values (where urmask equals 0)

In [None]:
#Replacing values of 0 with 1 and all other values with NaN in the mask
mask_CERRA = xr.where(ds_CERRA['urmask'] == 0, 1, float("nan"))

### Apply the mask

In [None]:
ds = xr.open_dataset(root_nc, engine="netcdf4", 
                    chunks={"member": 1, "time": 120})

In [None]:
ds_merged=ds
ds_merged[var] = ds_merged[var] * mask_CERRA

### Merge models and save the dataset

In [None]:
ds_merged.to_netcdf(root_rur + name_rur, encoding={var: {"zlib": True, "complevel": 1}})