## Browse DKRZ catalog on Levante HPC system

Search for LUMIP simulations and available models

In [3]:
# import intake
import os
import glob
import netCDF4 as nc

import matplotlib as mpl
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cartopy as cart
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.mpl.ticker as cticker
from cartopy.util import add_cyclic_point

import numpy as np
import xarray as xr
import cftime
import pandas as pd

# Regridding
import xesmf as xe


In [None]:
# # Multiprocessing
# import multiprocessing
# from dask.distributed import Client, progress, wait # Libaray to orchestrate distributed resources
# from dask_jobqueue import SLURMCluster

# ncpu = multiprocessing.cpu_count()
# processes = False
# nworker = 1
# threads = ncpu // nworker
# print(
#     f"Number of CPUs: {ncpu}, number of threads: {threads}, number of workers: {nworker}, processes: {processes}",
# )
# client = Client(
#     processes=processes,
#     threads_per_worker=threads,
#     n_workers=nworker,
#     memory_limit="16GB",
# )
# client

In [None]:
data_path = "G:/My Drive/MPIM/data/"
data_path = "G:/My Drive/MPIM/data/"

# Functions

In [4]:
def xr_fix_time(xr_in, date_start, date_end):
    
    if xr_in['time'].dt.calendar == 'noleap' or xr_in['time'].dt.calendar == '360_day':
        xr_in = xr_in.convert_calendar(calendar = 'gregorian', align_on = 'date')
        
    else: None
    
    time = pd.date_range(start=date_start, end=date_end, freq='M').to_numpy(dtype='datetime64[ns]')
    xr_out = xr_in.assign_coords(time = time)
    
    return xr_out

In [6]:
def xr_clean(xr_in, dims):
    data = xr_in.copy()
    for i,d in enumerate(dims):
        if d in data.coords and d in data.dims:
            data = data.drop(d)
            data = data.isel({d : 0})
        if d in data.coords:
            data = data.drop(d)
        if d in data.dims and d not in data.coords:
            data = data.isel({d : 0})
        if data.attrs == {}:
            None
        else:
            if d in data.data_vars:
                data = data.drop_vars(d)
    xr_out = data
    return xr_out

In [7]:
def lon180(ds):
    ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
    ds = ds.sortby(ds.lon)
    return ds

In [5]:
import xesmf as xe

def xr_regrid(data, method, lon_bnds, lat_bnds, xres, yres):
    # Regrid XARRAY using xESMF library (https://xesmf.readthedocs.io/en/stable/index.html)
    # data: input data
    # method: "bilinear", "conservative", "conservative_normed", "patch", "nearest_s2d", "nearest_d2s"
    
    lonmin = lon_bnds[0]; lonmax = lon_bnds[1]
    latmin = lat_bnds[0]; latmax = lat_bnds[1]
    xr_out = xe.util.cf_grid_2d(lonmin, lonmax, xres, latmin, latmax, yres)
   
    regrid = xe.Regridder(data, xr_out, method)

    data_regrid = regrid(data, keep_attrs=True)

    return data_regrid

# PFT data

In [8]:
models = ["ACCESS-ESM1-5", "CMCC-ESM2", "IPSL-CM6A-LR"]

variables = [["treeFrac", "treeFracBdlDcd", "treeFracBdlEvg", "treeFracNdlDcd","treeFracNdlEvg", "grassFrac", "grassFracC3", "grassFracC4", "cropFrac", "cropFracC3"],
             ["treeFrac", "treeFracBdlDcd", "treeFracBdlEvg", "treeFracNdlDcd","treeFracNdlEvg", "grassFrac", "grassFracC3", "grassFracC4", "cropFrac", "cropFracC3"],
             ["treeFrac", "treeFracBdlDcd", "treeFracBdlEvg", "treeFracNdlDcd","treeFracNdlEvg", "grassFrac", "grassFracC3", "grassFracC4", "cropFrac", "cropFracC3", "cropFracC4"]]
             
members = [["r1i1p1f1"],
           ["r1i1p1f1"],
           ["r1i1p1f1"]]

# IPSL has also cropFracC4

In [9]:
data_path = '/home/m/m301093/data/new_models/pft'             

scenario = 'ssp126'
xr_aff_esm_pft = []
for i,mm in enumerate(models):
    xr_aff = []
    for v,vv in enumerate(variables[i]): 
        filepath = glob.glob(os.path.join(data_path+'/'+ mm + '/' + vv + '_*_' + mm + "_" + scenario + '_*'))[0]                                       ## List of files sorted by name
        content = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 240})
        xr_aff.append(content)
    xr_aff_esm_pft.append(xr.merge(xr_aff, compat = "override"))


scenario = 'ssp370'
xr_ctl_esm_pft = []
for i,mm in enumerate(models):
    xr_ctl = []
    for v,vv in enumerate(variables[i]): 
        filepath = glob.glob(os.path.join(data_path+'/'+ mm + '/' + vv + '_*_' + mm + "_" + scenario + '_*'))[0]                                       ## List of files sorted by name
        content = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 240})
        xr_ctl.append(content)
    xr_ctl_esm_pft.append(xr.merge(xr_ctl, compat = "override"))


Preprocessing

In [10]:
import dask
dask.config.set({"array.slicing.split_large_chunks": False})

for i,item in enumerate(models):
    xr_aff_esm_pft[i] = xr_aff_esm_pft[i].sel(lat=slice(-60,90))
    xr_ctl_esm_pft[i] = xr_ctl_esm_pft[i].sel(lat=slice(-60,90))

    xr_aff_esm_pft[i] = xr_clean(xr_aff_esm_pft[i], ['dcpp_init_year', 'member_id', 'lon_bnds', 'lat_bnds', 'time_bnds', 'bnds'])
    xr_ctl_esm_pft[i] = xr_clean(xr_ctl_esm_pft[i], ['dcpp_init_year', 'member_id', 'lon_bnds', 'lat_bnds', 'time_bnds', 'bnds'])

    xr_aff_esm_pft[i] = lon180(xr_aff_esm_pft[i])
    xr_ctl_esm_pft[i] = lon180(xr_ctl_esm_pft[i])

    xr_aff_esm_pft[i] = xr_fix_time(xr_aff_esm_pft[i],"2015-01","2101-01")
    xr_ctl_esm_pft[i] = xr_fix_time(xr_ctl_esm_pft[i],"2015-01","2101-01")

Save data

In [13]:
for i,mm in enumerate(models):
    xr_aff_esm_pft[i].to_netcdf(out_path + "/xr_ssp370_"+mm+"_pft.nc")
    xr_ctl_esm_pft[i].to_netcdf(out_path + "/xr_ssp126Lu_"+mm+"_pft.nc")


Check saved data

In [17]:
import glob
import netCDF4

scenario = 'ssp126Lu'
xr_aff_esm_pft = []
for i,mm in enumerate(models):
    xr_aff = []
    for v,vv in enumerate(variables[i]): 
        filepath = glob.glob(os.path.join(out_path + '/' + 'xr_' + scenario + "_" + mm + '_pft.nc'))[0]                                       ## List of files sorted by name
        content = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 240})
        xr_aff.append(content)
    xr_aff_esm_pft.append(xr.merge(xr_aff, compat = "override"))


scenario = 'ssp370'
xr_ctl_esm_pft = []
for i,mm in enumerate(models):
    xr_ctl = []
    for v,vv in enumerate(variables[i]): 
        filepath = glob.glob(os.path.join(out_path + '/' + 'xr_' + scenario + "_" + mm + '_pft.nc'))[0]                                       ## List of files sorted by name
        content = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 240})
        xr_ctl.append(content)
    xr_ctl_esm_pft.append(xr.merge(xr_ctl, compat = "override"))


## Climate data

In [None]:
models = ["ACCESS-ESM1-5", "CMCC-ESM2", "IPSL-CM6A-LR"]

variables = [["tas", "ts", "pr", "rlds", "rsds", "rsus", "hfls", "hfss", "huss", "ps"],
             ["tas", "ts", "pr", "rlds", "rsds", "rsus", "hfls", "hfss", "huss", "ps"],
             ["tas", "ts", "pr", "rlds", "rsds", "rsus", "hfls", "hfss", "huss", "ps"]]
             
members = [["r1i1p1f1"],
           ["r1i1p1f1"],
           ["r1i1p1f1"]]

# IPSL has also cropFracC4

In [None]:
import glob
import netCDF4

data_path = '/home/m/m301093/data/new_models/climate'             

scenario = 'ssp370-ssp126Lu'
xr_aff_esm_clim = []
for i,mm in enumerate(models):
    xr_aff = []
    for v,vv in enumerate(variables[i]): 
        filepath = glob.glob(os.path.join(data_path+'/'+ mm + '/' + vv + '_*_' + mm + "_" + scenario + '_*'))[0]                                       ## List of files sorted by name
        content = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 240})
        xr_aff.append(content)
    xr_aff_esm_clim.append(xr.merge(xr_aff, compat = "override"))


scenario = 'ssp370'
xr_ctl_esm_clim = []
for i,mm in enumerate(models):
    xr_ctl = []
    for v,vv in enumerate(variables[i]): 
        filepath = glob.glob(os.path.join(data_path+'/'+ mm + '/' + vv + '_*_' + mm + "_" + scenario + '_*'))[0]                                       ## List of files sorted by name
        content = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 240})
        xr_ctl.append(content)
    xr_ctl_esm_clim.append(xr.merge(xr_ctl, compat = "override"))


scenario = 'historical'
xr_hist_esm_clim = []
for i,mm in enumerate(models):
    xr_ctl = []
    for v,vv in enumerate(variables[i]): 
        filepath = glob.glob(os.path.join(data_path+'/'+ mm + '/' + vv + '_*_' + mm + "_" + scenario + '_*'))[0]                                       ## List of files sorted by name
        content = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 240})
        xr_ctl.append(content)
    xr_hist_esm_clim.append(xr.merge(xr_ctl, compat = "override"))

In [None]:
for i,item in enumerate(models):
    xr_aff_esm_clim[i] = xr_aff_esm_clim[i].sel(lat=slice(-60,90))
    xr_ctl_esm_clim[i] = xr_ctl_esm_clim[i].sel(lat=slice(-60,90))
    xr_hist_esm_clim[i] = xr_hist_esm_clim[i].sel(lat=slice(-60,90))
    
    xr_aff_esm_clim[i] = xr_clean(xr_aff_esm_clim[i], ['dcpp_init_year', 'member_id', 'lon_bnds', 'lat_bnds', 'time_bnds', 'bnds'])
    xr_ctl_esm_clim[i] = xr_clean(xr_ctl_esm_clim[i], ['dcpp_init_year', 'member_id', 'lon_bnds', 'lat_bnds', 'time_bnds', 'bnds'])
    xr_hist_esm_clim[i] = xr_clean(xr_hist_esm_clim[i], ['dcpp_init_year', 'member_id', 'lon_bnds', 'lat_bnds', 'time_bnds', 'bnds'])

    xr_aff_esm_clim[i] = lon180(xr_aff_esm_clim[i])
    xr_ctl_esm_clim[i] = lon180(xr_ctl_esm_clim[i])
    xr_hist_esm_clim[i] = lon180(xr_hist_esm_clim[i])

    xr_aff_esm_clim[i] = xr_fix_time(xr_aff_esm_clim[i],"2015-01","2101-01")
    xr_ctl_esm_clim[i] = xr_fix_time(xr_ctl_esm_clim[i],"2015-01","2101-01")
    xr_hist_esm_clim[i] = xr_fix_time(xr_hist_esm_clim[i],"2015-01","2101-01")    

In [None]:
for i,mm in enumerate(models):
    xr_ctl_esm[i].to_netcdf(out_path + "/xr_ssp370_"+mm+".nc")
    xr_aff_esm[i].to_netcdf(out_path + "/xr_ssp126Lu_"+mm+".nc")
    xr_hist_esm[i].to_netcdf(out_path + "/xr_hist_"+mm+".nc")