# Load packages

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import xarray as xr
import os
import sys
import tqdm

In [None]:
#from lib.utils import latest_version, fix_360_longitudes
from lib.interpolater import Interpolator

In [None]:
def traverseDir(root, end):
    for (dirpath, dirnames, filenames) in os.walk(root):
        for file in filenames:
            if file.endswith((end)):
                yield os.path.join(dirpath, file)

In [None]:
def select_domain(ds, lats_ref, lons_ref):
    lons_d = ds.lon.data
    lats_d = ds.lat.data
    if (lons_d>180).any():
        lons_d[lons_d>180] = lons_d[lons_d>180] -360
    lons_d_max = np.max(lons_d)
    lons_d_min = np.min(lons_d)
    lats_d_max = np.max(lats_d)
    lats_d_min = np.min(lats_d)

    lons_ref_cut = lons_ref[(lons_ref>lons_d_min) & (lons_ref<lons_d_max)]
    lats_ref_cut = lats_ref[(lats_ref>lats_d_min) & (lats_ref<lats_d_max)]

    return lats_ref_cut, lons_ref_cut

In [None]:
#Load reference grid
import xarray as xr

# Especifica el archivo correcto dentro del directorio
ruta_nc = "/lustre/gmeteo/WORK/DATA/C3S-CDS/C3S-CICA-Atlas/v2/CORDEX-CORE/historical/t_CORDEX-CORE_historical_mon_197001-200512_v02.nc"

# Abre el archivo NetCDF
ds_ref_025 = xr.open_dataset(ruta_nc, engine="netcdf4")

# Muestra información del dataset
print(ds_ref_025)
root = '/lustre/gmeteo/WORK/DATA/C3S-CDS/C3S-CICA-Atlas/v2/CORDEX-EUR-11/historical/'
ds_ref_012 = xr.open_dataset(f"{root}t_CORDEX-EUR-11_historical_mon_197001-200512_v02.nc")

## Files to interpolate

### tasmin

In [None]:
root_esgf_local = '/lustre/gmeteo/DATA/ESGF/REPLICA/DATA/cordex/output/'

In [None]:
files_CORDEX = np.sort(list(traverseDir(root_esgf_local, '.nc')))

In [None]:
da = pd.DataFrame(index = np.arange(len(files_CORDEX)))
da['files'] = files_CORDEX

da.to_csv('files_CORDEX_lustre.csv')

In [None]:
root = "/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/tasmin/"

In [None]:
files = np.sort(list(traverseDir(root, '.nc')))

In [None]:
files

In [None]:
864/12

# Conservative and Nearest interpolation

### tasmin

In [None]:
# Interpolate data
int_attr = {'interpolation_method' : None, 
            'lats' : None,
            'lons' : None,
            'var_name' : 'tasmin'
}

In [None]:
for file in tqdm.tqdm(files):
    
    print(file)
    
    if file.split('/')[-1].split('_')[1].split('-')[1] == '11':
        lats_ref = np.sort(ds_ref_012.lat.data)
        lons_ref = np.sort(ds_ref_012.lon.data)
    elif file.split('/')[-1].split('_')[1].split('-')[1] == '22':
        lats_ref = np.sort(ds_ref_025.lat.data)
        lons_ref = np.sort(ds_ref_025.lon.data)
         
    filepath_root = '/'.join(file.split('/')[:-1])
    filename_root = file.split('/')[-1]
        
    # open file
    ds = xr.open_dataset(file)
    # cut destination grid and update attr
    lats_ref, lons_ref = select_domain(ds, lats_ref, lons_ref)
    int_attr['lats'] = lats_ref
    int_attr['lons'] = lons_ref
    
    # Conservative
    int_attr['interpolation_method'] = 'conservative_normed'
    filepath_dest = filepath_root.replace('/tasmin/', '/tasmin_C/')
    filename_dest = file.split('/')[-1]
    # check if the file aready exist
    if os.path.isfile(file.replace('/tasmin/', '/tasmin_C/')):
        continue
    else:
        
        # Interpolate
        INTER = Interpolator(int_attr)
        ds_inter = INTER(ds)
        # save the results
        if not os.path.exists(filepath_dest):
            os.makedirs(filepath_dest)
        mask = np.isnan(ds_inter['tasmin'])
        ds_inter.to_netcdf(filepath_dest + '/' +  filename_dest)
    
        # NN
        int_attr['interpolation_method'] = 'nearest_s2d'
        filepath_dest = filepath_root.replace('/tasmin/', '/tasmin_N/')
        filename_dest = file.split('/')[-1]
        # Interpolate
        INTER = Interpolator(int_attr)
        ds_inter = INTER(ds)
        # save the results
        if not os.path.exists(filepath_dest):
            os.makedirs(filepath_dest) 
        ds_inter['tasmin'] = ds_inter['tasmin'].where(~mask)
        ds_inter.to_netcdf(filepath_dest + '/' +  filename_dest)
        
    ds.close()

### tasmax

In [None]:
root = "/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/tasmin/data-esgf-local/tasmax/"

In [None]:
files = np.sort(list(traverseDir(root, '.nc')))

In [None]:
# Interpolate data
int_attr = {'interpolation_method' : None, 
            'lats' : None,
            'lons' : None,
            'var_name' : 'tasmax'
}

In [None]:
for file in tqdm.tqdm(files):
    
    print(file)
    
    if file.split('/')[-1].split('_')[1].split('-')[1] == '11':
        lats_ref = np.sort(ds_ref_012.lat.data)
        lons_ref = np.sort(ds_ref_012.lon.data)
    elif file.split('/')[-1].split('_')[1].split('-')[1] == '22':
        lats_ref = np.sort(ds_ref_025.lat.data)
        lons_ref = np.sort(ds_ref_025.lon.data)
         
    filepath_root = '/'.join(file.split('/')[:-1])
    filename_root = file.split('/')[-1]
        
    # open file
    ds = xr.open_dataset(file)
    # cut destination grid and update attr
    lats_ref, lons_ref = select_domain(ds, lats_ref, lons_ref)
    int_attr['lats'] = lats_ref
    int_attr['lons'] = lons_ref
    
    # Conservative
    int_attr['interpolation_method'] = 'conservative_normed'
    filepath_dest = '/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/tasmax_C/'
    filename_dest = file.split('/')[-1]
    # check if the file aready exist
    if os.path.isfile(filepath_dest + filename_dest):
        continue
    else:
        
        # Interpolate
        INTER = Interpolator(int_attr)
        ds_inter = INTER(ds)
        # save the results
        if not os.path.exists(filepath_dest):
            os.makedirs(filepath_dest)
        mask = np.isnan(ds_inter['tasmax'])
        ds_inter.to_netcdf(filepath_dest +  filename_dest)
    
        # NN
        int_attr['interpolation_method'] = 'nearest_s2d'
        filepath_dest = '/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/tasmax_N/'
        filename_dest = file.split('/')[-1]
        # Interpolate
        INTER = Interpolator(int_attr)
        ds_inter = INTER(ds)
        # save the results
        if not os.path.exists(filepath_dest):
            os.makedirs(filepath_dest) 
        ds_inter['tasmax'] = ds_inter['tasmax'].where(~mask)
        ds_inter.to_netcdf(filepath_dest +  filename_dest)
        
    ds.close()

### Fixed variables

In [None]:
# Interpolate data
int_attr = {'interpolation_method' : None, 
            'lats' : None,
            'lons' : None,
            'var_name' : None
}

In [None]:
vars_str = {'land-sea-mask': 'sftlf',
             'orography': 'orog',
             'urbanfraction': 'sftimf',
}

In [None]:
vars_str = {'land-sea-mask': 'sftlf'}

In [None]:
import os
import numpy as np
import xarray as xr

# Dictionary of variable names
vars_str = {
    'land-sea-mask': 'sftlf',
    'orography': 'orog',
    'urbanfraction': 'sftimf',
}

# Iterate over models and variables
for model in ["RegCM", "REMO"]:
    for var_str, var_name in vars_str.items():
        root = f"/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/nextcloud/CORDEX-CORE-WG/{model}/{var_str}/"
        print(root)
        
        files = np.sort(list(traverseDir(root, '.nc')))
        print(files)
        
        if var_str == 'urbanfraction':
            files = [file for file in files if '/orig/' not in file]
        
        int_attr = {'var_name': var_name}
        
        for file in files:
            filename = file.split('/')[-1]
            
            if '-11' in filename:
                lats_ref = np.sort(ds_ref_012.lat.data)
                lons_ref = np.sort(ds_ref_012.lon.data)
            elif '-22' in filename:
                lats_ref = np.sort(ds_ref_025.lat.data)
                lons_ref = np.sort(ds_ref_025.lon.data)
            
            filepath_dest = "interpolation_results"
            
            # Open dataset
            ds = xr.open_dataset(file)
            
            # Select domain and update attributes
            lats_ref, lons_ref = select_domain(ds, lats_ref, lons_ref)
            int_attr.update({'lats': lats_ref, 'lons': lons_ref, 'interpolation_method': 'conservative_normed'})
            
            # Interpolation
            INTER = Interpolator(int_attr)
            ds_inter = INTER(ds)
            
            # Save results
            if not os.path.exists(filepath_dest):
                os.makedirs(filepath_dest)
            
            ds_inter.to_netcdf(f"{filepath_dest}/{filename}")
            
            ds.close()


In [None]:
lat = 48.864716 
lon = 2.349014

In [None]:
root = "/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/nextcloud/CORDEX-CORE-WG/new/RegCM/sftimf/"

In [None]:
ds = xr.open_dataset(f"{root}sftimf_EUR-11c_ICTP_RegCM4-6_v1_fx.nc")

In [None]:
ds = xr.open_dataset(f"./interpolation_results/sftimf_EUR-11_GERICS_REMO2015_v1_fx.nc")

In [None]:
ds = ds.sel(lon = slice(lon-0.5, lon+0.5 ), lat = slice(lat-0.5, lat+0.5))

In [None]:
ds['sftimf'].plot()

In [None]:
filepath_dest + '/' +  filename_dest

In [None]:
for file in tqdm.tqdm(files):
    
    print(file)
    
    if file.split('/')[-1].split('_')[1].split('-')[1] == '11':
        lats_ref = np.sort(ds_ref_012.lat.data)
        lons_ref = np.sort(ds_ref_012.lon.data)
    elif file.split('/')[-1].split('_')[1].split('-')[1] == '22':
        lats_ref = np.sort(ds_ref_025.lat.data)
        lons_ref = np.sort(ds_ref_025.lon.data)
         
    filepath_root = '/'.join(file.split('/')[:-1])
    filename_root = file.split('/')[-1]
        
    # open file
    ds = xr.open_dataset(file)
    # cut destination grid and update attr
    lats_ref, lons_ref = select_domain(ds, lats_ref, lons_ref)
    int_attr['lats'] = lats_ref
    int_attr['lons'] = lons_ref
    
    # Conservative
    int_attr['interpolation_method'] = 'conservative_normed'
    filepath_dest = filepath_root.replace('land-sea-mask', 'land-sea-mask_C')
    filename_dest = file.split('/')[-1]

    INTER = Interpolator(int_attr)
    ds_inter = INTER(ds)
    # save the results
    if not os.path.exists(filepath_dest):
        os.makedirs(filepath_dest)
    mask = np.isnan(ds_inter[int_attr['var_name']])
    ds_inter.to_netcdf(filepath_dest + '/' +  filename_dest)

    # NN
    int_attr['interpolation_method'] = 'nearest_s2d'
    filepath_dest = filepath_root.replace('land-sea-mask', 'land-sea-mask_N')
    filename_dest = file.split('/')[-1]

    INTER = Interpolator(int_attr)
    ds_inter = INTER(ds)
    # save the results
    if not os.path.exists(filepath_dest):
        os.makedirs(filepath_dest) 
    ds_inter[int_attr['var_name']] = ds_inter[int_attr['var_name']].where(~mask)
    ds_inter.to_netcdf(filepath_dest + '/' +  filename_dest)
        
    ds.close()

In [None]:
xr_dict = {
    "dims": {
        "bnds": 2,
        "lon": len(ds.lon),
        "lat": len(ds.lat),
        #"time": len(ds.time),
    },
    "coords": {
        "lon": {
            "dims": ("lon",),
            "attrs": {
                "units": "degrees_east",
                "standard_name": "longitude",
                "long_name": "longitude",
                "axis": "X",
                "bounds": "lon_bnds",
            },
            "data": None,
        },
        "lat": {
            "dims": ("lat",),
            "attrs": {
                "units": "degrees_north",
                "standard_name": "latitude",
                "long_name": "latitude",
                "axis": "Y",
                "bounds": "lat_bnds",
            },
            "data": None,
        },
        #"time": {"dims": ("time",), "attrs": ds.time.attrs, "data": ds.time.data},
    },
}

In [None]:
time = True

In [None]:
if time:
    xr_dict['dims'].update({"time":3})
    xr_dict['coords'].update({"dims": ("time",), "attrs": ds.time.attrs, "data": ds.time.data})

In [None]:
xr_dict

In [None]:
def add_keys_nested_dict(d, keys):
    for key in keys:
        if key not in d:
            d[key] = {}
        d = d[key]
    d.setdefault(keys[-1], 1)
 
 
# initializing dictionary
test_dict = {'GFG': {'rate': 4, 'since': 2012}}
 
# printing original dictionary
print("The original dictionary is: " + str(test_dict))
 
# Add keys to nested dictionary using for loop
add_keys_nested_dict(test_dict, ['GFG', 'rank'])
 
# printing result
print("Dictionary after nested key update: " + str(test_dict))

## Check results

In [None]:
ds_rot = xr.open_dataset(file)
ds_interp = xr.open_dataset(filepath_dest + '/' +  filename_dest)
ds_rot['tasmin'].isel(time = 0).plot()

In [None]:
ds_interp['tasmin'].isel(time = 0).plot()

MASNKING

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
root = '/lustre/gmeteo/WORK/chantreuxa/BA_IPCC/final_products/climate_index/CMIP6/mrsos/gr100/IPSL/IPSL-CM6A-LR/'

In [None]:
files = np.sort(list(traverseDir(root, '.nc')))

In [None]:
sces = np.sort(np.unique([f.split('/')[-3] for f in files]))

In [None]:
for n_sce, sce in enumerate(sces):
    print(sce)
    files_sce = [file for file in files if sce in file]
    ds = xr.open_mfdataset(files_sce, 
                           concat_dim='time',
                           combine='nested',
                           chunks={'time': 1000})
    # delete dates with all nan
    ds_mean_time = ds.mean(dim = ('lon', 'lat'), skipna=True)
    no_null = ~pd.isnull(ds_mean_time['mrsos'].values)
    ds = ds.isel(time = no_null)
    if n_sce == 0:
        ds_mean = ds.mean(dim = ('time'), skipna=False)
    else:
        ds_aux = ds.mean(dim = ('time'), skipna=False)
        ds_mean['mrsos'][:] = ds_mean['mrsos'].values + ds_aux['mrsos'].values

ds_mean['mrsos'][:] = ds_mean['mrsos'].values/len(sces)

ds_mean["mask"]=(['lat', 'lon'],  xr.where(~pd.isnull(ds_mean['mrsos']), 1, 0))

In [None]:
ds_mean["mask"].attrs["commet"] = "Mask calculated using all files and scenarios for this simulation. A value of 1 indicates cells with no NaNs in any file (across time and scenarios), while a value of 0 indicates the opposite"

In [None]:
ds_mean.to_netcdf('mrsos_gr100_mon_CMIP6_IPSL_IPSL-CM6A-LR_r1i1p1f1_mon.nc')

In [None]:
files[0]

In [None]:
file = '/lustre/gmeteo/WORK/PROYECTOS/2020_C3S_34d/CORDEX_PROVIDERS/CORDEX_NAM_UCAR/DATA/canrcm4/canesm2/nam-22/hist/day/tas_NAM-22_CCCma-CanESM2_historical_r1i1p1_CCCma-CanRCM4_r2_day_20010101-20051231.nc'

In [None]:
ds = xr.open_dataset(file)

In [None]:
ds['tas'].isel(time = 0).plot()