In [8]:
import xarray as xr
import matplotlib
import matplotlib.pyplot as plt
import glob
import os
import cftime
import shutil
import numpy as np

#### Function Definition

In [2]:
'''
This function opens the file and put them in a list (the file pattern given) 
of a list (every query of that file pattern).

file_pattern must be a list.
exclude_files (optional): include the name of the file you would like to exclude

example: 

file_pattern = ['tas_Amon_CanESM5_piControl_r1i1p1f1_gn_*.nc', 
                'rlut_Amon_CanESM5_piControl_r1i1p1f1_gn_*.nc']
would open all of these files and output a list like this:
output (ddict) = ['tas_xxx.nc, rlut_xxx.nc']
'''
def Open_Files(file_pattern, exclude_files="None", exclude_vars="None"):
    ddict = []
    for order in range(len(file_pattern)):
        ds = []
        all_files = glob.glob(file_pattern[order])
        if exclude_files != "None":
            file_paths = [f for f in all_files if exclude_files not in f]
        else:
            file_paths = all_files
        if len(file_paths) == 1:
            ds = xr.open_dataset(file_paths[0], use_cftime=True, drop_variables=exclude_vars)
        elif len(file_paths) == 0:
            raise ValueError("Could not locate the file. Check the directory path.")
        else:
            ds = xr.open_mfdataset(file_paths, combine='by_coords', use_cftime=True, drop_variables=exclude_vars)
        ddict.append(ds)
    return ddict

In [3]:
# Make a file name based on variable, model, experiment, year range, variant, and grid. 
# Use * to find all of them
def NewFile(var, model, exp, number="*", variant="r1i1p1f1", grid="*"):
    newname = []
    newname.append(f"{var}_Amon_{model}_{exp}_{variant}_{grid}_{number}.nc")
    #naming convention: Variable_realm_model_experiment_variant_grid_startyyyymm-endyyyymm.nc
    #example: hfls_Amon_CanESM5_piControl_r1i1p1f1_gn_520101-620012.nc
    return newname[0]

#make a new file name, but with the correct directory leading to the file
def LocateFile(path, file_name, model, exp, variant):
    newpath = (f"{path}/{model}/{exp}/{variant}/{file_name}")
    return newpath

''' 
Read the data from the file

The format of the file must be lists like this
[abe, 123]
[bce, 394, hello, 34857d]
...

The output will be like this:
[[abe, 123], [bce, 394, hello, 34857d], ...]
'''
def ReadFile(filename):
  nested_list = []
  with open(filename, 'r') as f:
    for line in f:
      cleanline = line.strip('[]\n')
      sublist = [item.strip() for item in cleanline.split(',')]
      nested_list.append(sublist)
  return nested_list

In [4]:
'''
This is under assumption that ds list is arranged in the way that 
variables is ["tas", "rsdt", "rsut", "rlut", "rlutcs", "rsutcs"]

the input of the dataset must be:
[tas_xxx.nc, rsdt_xxx.nc, ...]
'''
# Calculate All-sky Fluxes
def Cal_Toa(ds):
    # load the datasets
    Derived = xr.merge([ds[1], ds[2], ds[3]])
    skeleton_ds = Derived.drop_vars(list(Derived.data_vars))  
    new_ds = skeleton_ds.copy(deep=True)
    new_ds['toa'] = Derived.rsdt - Derived.rsut - Derived.rlut
    return new_ds

# Calculate Cloud Radiative effect
def Cal_Cre(ds):
    # load the datasets
    Derived = xr.merge([ds[1], ds[2], ds[3], ds[4], ds[5]])
    skeleton_ds = Derived.drop_vars(list(Derived.data_vars))  
    new_ds = skeleton_ds.copy(deep=True) 
    new_ds['cre'] = Derived.rsdt - Derived.rsut - Derived.rlut + Derived.rsutcs + Derived.rlutcs
    return new_ds

In [5]:
def FormatTime(ds):
    try:
        ds["time"] = ds.indexes["time"].to_datetimeindex(unsafe=True)
    except AttributeError:
        pass
    except ValueError:
        pass
    return ds

#### Pre-processing

In [6]:
# Do not change variables and dir_path
variables = ["tas", "rsdt", "rsut", "rlut", "rlutcs", "rsutcs"]
dir_path = '/data/cristi/a/cristi/esm_data/cmip6'

output_folder = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed'
experiments = ["piControl", "amip-piForcing", "abrupt-4xCO2", "amip"]

In [None]:
# # # # # # # Copy TAS over # # # # # # #
output_folder = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw'

for ex in range(len(experiments)):
    dsname = []
    dsname = ReadFile(f"{experiments[ex]}.txt")
    for mod in range(len(dsname)): #loop each model individually
        # finding the original location
        file_path = None
        source_path = []
        file_path = NewFile(var="tas", model=dsname[mod][0], exp=experiments[ex], variant=dsname[mod][1])
        source_path.append(LocateFile(dir_path, file_path, model=dsname[mod][0], exp=experiments[ex], variant=dsname[mod][1]))
        file_path = None
        file_path = NewFile(var="tas", model=dsname[mod][0], exp=experiments[ex], number=dsname[mod][2], variant=dsname[mod][1], grid=dsname[mod][3])
        output_path = os.path.join(output_folder, file_path)
        if not os.path.exists(output_path):
            print(output_path)
            ds = []
            # Concat the file together
            if "CESM2" in dsname[mod] and experiments[ex] == "amip":
                ds = Open_Files(source_path, exclude_files="195001-201412", exclude_vars=['lat_bnds', 'lon_bnds'])
            elif "dupe" in dsname[mod]:
                ds = Open_Files(source_path, exclude_files=dsname[mod][2], exclude_vars=['lat_bnds', 'lon_bnds'])
            else:
                ds = Open_Files(source_path, exclude_files="None", exclude_vars=['lat_bnds', 'lon_bnds'])
            # <-!---- Copying file ----!->
            ds[0].to_netcdf(output_path)
            print("done")

/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/tas_Amon_IPSL-CM6A-LR_abrupt-4xCO2_r1i1p1f1_gr_185001-274912.nc


In [16]:
# # # # # Copying TAS over if Concat takes too long # # # # #
output_folder = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/'

for ex in range(len(experiments)):
    dsname = []
    dsname = ReadFile(f"{experiments[ex]}.txt")
    for mod in range(len(dsname)): #loop each model individually
        # finding the original location
        file_path = None
        source_path = None
        file_path = NewFile(var="tas", model=dsname[mod][0], exp=experiments[ex], variant=dsname[mod][1])
        print(file_path)
        dir_path = (f'/data/cristi/a/cristi/esm_data/cmip6/{dsname[mod][0]}/{experiments[ex]}/{dsname[mod][1]}/')
        source_path = glob.glob(dir_path + file_path)
        if "dupe" in dsname[mod]:
            exclude_files = dsname[mod][2]
            file_path = [f for f in file_path if exclude_files not in f]
        file_path = None
        file_path = NewFile(var="tas", model=dsname[mod][0], exp=experiments[ex], number=dsname[mod][2], variant=dsname[mod][1], grid=dsname[mod][3])
        output_path = os.path.join(output_folder, file_path)
        if not os.path.exists(output_path):
            print(output_folder)
            print(source_path)
            ds = []
            for file in source_path:
                shutil.copy2(file, output_folder)

tas_Amon_CESM2_piControl_r1i1p1f1_*_*.nc
tas_Amon_MRI-ESM2-0_piControl_r1i1p1f1_*_*.nc
tas_Amon_CNRM-CM6-1_piControl_r1i1p1f2_*_*.nc
tas_Amon_MIROC6_piControl_r1i1p1f1_*_*.nc
tas_Amon_IPSL-CM6A-LR_piControl_r1i1p1f1_*_*.nc
tas_Amon_HadGEM3-GC31-LL_piControl_r1i1p1f1_*_*.nc
tas_Amon_CanESM5_piControl_r1i1p1f1_*_*.nc
tas_Amon_TaiESM1_piControl_r1i1p1f1_*_*.nc
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/
['/data/cristi/a/cristi/esm_data/cmip6/TaiESM1/piControl/r1i1p1f1/tas_Amon_TaiESM1_piControl_r1i1p1f1_gn_020101-030012.nc', '/data/cristi/a/cristi/esm_data/cmip6/TaiESM1/piControl/r1i1p1f1/tas_Amon_TaiESM1_piControl_r1i1p1f1_gn_030101-040012.nc', '/data/cristi/a/cristi/esm_data/cmip6/TaiESM1/piControl/r1i1p1f1/tas_Amon_TaiESM1_piControl_r1i1p1f1_gn_040101-050012.nc', '/data/cristi/a/cristi/esm_data/cmip6/TaiESM1/piControl/r1i1p1f1/tas_Amon_TaiESM1_piControl_r1i1p1f1_gn_050101-060012.nc', '/data/cristi/a/cristi/esm_data/cmip6/TaiESM1/piControl/r1i1p1f1/tas_Amon_TaiESM1_piContr

In [10]:
# # # # # # # Monthly Data for Cre and Toa # # # # # # #
output_folder = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw'

for ex in range(len(experiments)):
    dsname = []
    '''
    Read the files. You may find them in this same directory the jupyter notebook file is in
    they are formated like this:
    [model1, variant_1, number_1, grid_1, (optional)dupe]
    [model2, variant_2, number_2, grid_2, (optional)dupe]
    if there are duplication data (ie, concat and separated data), add "dupe" at the end
    ...
    '''
    dsname = ReadFile(f"{experiments[ex]}.txt")
    for mod in range(len(dsname)): #loop each model individually
        file_pattern = []
        ''' 
        Load each variables up and put them in stream of list. They look like this:
        [dir/tas_xx_xx_*.nc, dir/rsdt_xx_xx_*.nc, dir/rsut_xx_xx_*.nc, ...]
        '''
        for n in range(len(variables)):
            file_path = NewFile(var=variables[n], model=dsname[mod][0], exp=experiments[ex], variant=dsname[mod][1])
            complete_path = LocateFile(dir_path, file_path, model=dsname[mod][0], exp=experiments[ex], variant=dsname[mod][1])
            file_pattern.append(complete_path)
        # open files
        ds = []
        '''
        For this processing, the output when using Open_Files function will be like this:
        ddict = ['tas_xxx.nc, rsdt_xxx.nc, rsut_xxx.nc, rlut_xxx.nc, rlutcs_xxx.nc, rsutcs_xxx.nc']
        
        if there are duplicates (ie, 100-200, 200-300, 300-400, 100-400), 
        then the concat one "100-400" will be excluded. dsname[mod][2] calls that number
        '''
        if "CESM2" in dsname[mod] and experiments[ex] == "amip":
            ds = Open_Files(file_pattern, exclude_files="195001-201412")
        elif "dupe" in dsname[mod]:
            ds = Open_Files(file_pattern, exclude_files=dsname[mod][2])
        else:
            ds = Open_Files(file_pattern, exclude_files="None")
        # <-!---- processing ----!->
        # <><><><><><> Toa <><><><><><>
        final_ds = []
        #the new file name
        file_path = None
        file_path = NewFile(var="net-toa", model=dsname[mod][0], exp=experiments[ex], number=dsname[mod][2], variant=dsname[mod][1], grid=dsname[mod][3])
        output_path = os.path.join(output_folder, file_path)
        if not os.path.exists(output_path):
            Derived = Cal_Toa(ds)
            final_ds = Derived
            final_ds.to_netcdf(output_path)

        # <><><><><><> Cre <><><><><><>
        final_ds = []
        #the new file name
        file_path = None
        file_path = NewFile(var="net-cre", model=dsname[mod][0], exp=experiments[ex], number=dsname[mod][2], variant=dsname[mod][1], grid=dsname[mod][3])
        output_path = os.path.join(output_folder, file_path)
        if not os.path.exists(output_path):
            Derived = Cal_Cre(ds)
            final_ds = Derived
            final_ds.to_netcdf(output_path)

  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
 

In [19]:
# # # # # # # Calculate annual resampling # # # # # # #
variables = ["net-toa", "net-cre", "tas"]
new_variables = ["toa-annual", "cre-annual", "tas-annual"]
dir_path = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw'

output_folder = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual'
experiments = ["piControl", "amip-piForcing", "abrupt-4xCO2", "amip"]
for ex in range(len(experiments)):
    dsname = []
    dsname = ReadFile(f"{experiments[ex]}.txt")
    for mod in range(len(dsname)): #loop each model individually
        file_pattern = []
        for n in range(len(variables)):
            file_pattern = []
            file_path = NewFile(var=variables[n], model=dsname[mod][0], exp=experiments[ex], variant=dsname[mod][1])
            complete_path = os.path.join(dir_path, file_path)
            file_pattern.append(complete_path)
            print(file_pattern)
            # open files
            ds = []
            ds = Open_Files(file_pattern, exclude_files="None", exclude_vars=['lat_bnds', 'lon_bnds'])
            file_path = None
            new_number = dsname[mod][2][:4] + dsname[mod][2][6:11] # Rename the date range from YYYYMM to YYYY
            file_path = NewFile(var=new_variables[n], model=dsname[mod][0], exp=experiments[ex], number=new_number, variant=dsname[mod][1], grid=dsname[mod][3])
            output_path = os.path.join(output_folder, file_path)
            print(output_path)
            if not os.path.exists(output_path):
                final_ds = ds[0].resample(time='YS').mean()
                final_ds.to_netcdf(output_path)

['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/net-toa_Amon_CESM2_piControl_r1i1p1f1_*_*.nc']
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/toa-annual_Amon_CESM2_piControl_r1i1p1f1_gn_0001-1200.nc
['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/net-cre_Amon_CESM2_piControl_r1i1p1f1_*_*.nc']
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/cre-annual_Amon_CESM2_piControl_r1i1p1f1_gn_0001-1200.nc
['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/tas_Amon_CESM2_piControl_r1i1p1f1_*_*.nc']


  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)


/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/tas-annual_Amon_CESM2_piControl_r1i1p1f1_gn_0001-1200.nc
['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/net-toa_Amon_MRI-ESM2-0_piControl_r1i1p1f1_*_*.nc']
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/toa-annual_Amon_MRI-ESM2-0_piControl_r1i1p1f1_gn_1850-2550.nc
['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/net-cre_Amon_MRI-ESM2-0_piControl_r1i1p1f1_*_*.nc']
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/cre-annual_Amon_MRI-ESM2-0_piControl_r1i1p1f1_gn_1850-2550.nc
['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/tas_Amon_MRI-ESM2-0_piControl_r1i1p1f1_*_*.nc']
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/tas-annual_Amon_MRI-ESM2-0_piControl_r1i1p1f1_gn_1850-2550.nc
['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/net-toa_Amon_CNRM-CM6-1_piControl_r1i1p1f2_*_*.nc']
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/toa-annual_Amon_C

  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)


['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/net-toa_Amon_MRI-ESM2-0_amip-piForcing_r1i1p1f1_*_*.nc']
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/toa-annual_Amon_MRI-ESM2-0_amip-piForcing_r1i1p1f1_gn_1870-2014.nc
['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/net-cre_Amon_MRI-ESM2-0_amip-piForcing_r1i1p1f1_*_*.nc']
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/cre-annual_Amon_MRI-ESM2-0_amip-piForcing_r1i1p1f1_gn_1870-2014.nc
['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/tas_Amon_MRI-ESM2-0_amip-piForcing_r1i1p1f1_*_*.nc']
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/tas-annual_Amon_MRI-ESM2-0_amip-piForcing_r1i1p1f1_gn_1870-2014.nc
['/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/net-toa_Amon_CNRM-CM6-1_amip-piForcing_r1i1p1f2_*_*.nc']
/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/toa-annual_Amon_CNRM-CM6-1_amip-piForcing_r1i1p1f2_gr_1870-2014.nc
['/data/cristi/a/kchoo3/FluxesPr

In [11]:
# # # # # # # Calculate Anomaly  # # # # # # #
variables = ["toa-annual", "cre-annual", "tas-annual"]
new_variables = ["toa-anom", "cre-anom", "tas-anom"]
dir_path = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual'

output_folder = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/anom'
experiments = ["piControl", "abrupt-4xCO2"]

piControl_ddict = [[],[],[]]
for ex in range(len(experiments)):
    dsname = []
    dsname = ReadFile(f"{experiments[ex]}.txt")
    for mod in range(len(dsname)): #loop each model individually
        for n in range(len(variables)):
            file_pattern = []
            file_path = NewFile(var=variables[n], model=dsname[mod][0], exp=experiments[ex], variant=dsname[mod][1])
            complete_path = os.path.join(dir_path, file_path)
            file_pattern.append(complete_path)
            # open files
            ds = []
            ds = Open_Files(file_pattern, exclude_files="None")
            file_path = None
            new_number = dsname[mod][2][:4] + dsname[mod][2][6:11]
            file_path = NewFile(var=new_variables[n], model=dsname[mod][0], exp=experiments[ex], number=new_number, variant=dsname[mod][1], grid=dsname[mod][3])
            output_path = os.path.join(output_folder, file_path)
            if experiments[ex] == "piControl":
                final_ds = ds[0].mean('time')
                piControl_ddict[n].append(final_ds)
            else:
                if not os.path.exists(output_path):
                    final_ds = ds[0] - piControl_ddict[n][mod]
                    final_ds.to_netcdf(output_path)

In [10]:
# # # # # # # OLD Calculate Anomaly  # # # # # # #
variables = ["toa-annual", "cre-annual", "tas-annual"]
new_variables = ["toa-anom", "cre-anom", "tas-anom"]
dir_path = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual'

output_folder = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/anom'
experiments = ["piControl", "abrupt-4xCO2"]

piControl_ddict = [[],[],[]]
for ex in range(len(experiments)): # co2 and picontrol
    dsname = []
    dsname = ReadFile(f"{experiments[ex]}.txt")
    for mod in range(len(dsname)): # loop each model (8) individually
        for n in range(len(variables)): # tas, toa, and cre
            file_pattern = []
            file_path = NewFile(var=variables[n], model=dsname[mod][0], exp=experiments[ex], variant=dsname[mod][1])
            complete_path = os.path.join(dir_path, file_path)
            file_pattern.append(complete_path)
            # open files
            ds = []
            ds = Open_Files(file_pattern, exclude_files="None")
            file_path = None
            new_number = dsname[mod][2][:4] + dsname[mod][2][6:11]
            file_path = NewFile(var=new_variables[n], model=dsname[mod][0], exp=experiments[ex], number=new_number, variant=dsname[mod][1], grid=dsname[mod][3])
            output_path = os.path.join(output_folder, file_path)
            
            new_lat = np.linspace(ds[0].lat.min(), ds[0].lat.max(), 150)
            new_lon = np.linspace(ds[0].lon.min(), ds[0].lon.max(), 300)
            
            ds[0] = ds[0].interp(lat=new_lat, lon=new_lon)
            if experiments[ex] == "piControl":
                final_ds = ds[0].mean('time')
                piControl_ddict[n].append(final_ds)
            else:
                if not os.path.exists(output_path):
                    final_ds = ds[0] - piControl_ddict[n][mod]
                    final_ds.to_netcdf(output_path)

In [7]:
# IGNORE OLD WORKFLOW
# # # # # # # Code Combined # # # # # # #
piControl_ddict = [[],[],[]]
for ex in range(len(experiments)):
    dsname = []
    '''
    Read the files. You may find them in this same directory the jupyter notebook file is in
    they are formated like this:
    [model1, variant_1, number_1, grid_1, (optional)dupe]
    [model2, variant_2, number_2, grid_2, (optional)dupe]
    if there are duplication data (ie, concat and separated data), add "dupe" at the end
    ...
    '''
    dsname = ReadFile(f"{experiments[ex]}.txt")
    for mod in range(len(dsname)): #loop each model individually
        file_pattern = []
        ''' 
        Load each variables up and put them in stream of list. They look like this:
        [dir/tas_xx_xx_*.nc, dir/rsdt_xx_xx_*.nc, dir/rsut_xx_xx_*.nc, ...]
        '''
        for n in range(len(variables)):
            file_path = NewFile(var=variables[n], model=dsname[mod][0], exp=experiments[ex], variant=dsname[mod][1])
            complete_path = LocateFile(dir_path, file_path, model=dsname[mod][0], exp=experiments[ex], variant=dsname[mod][1])
            file_pattern.append(complete_path)
        # open files
        ds = []
        '''
        For this processing, the output when using Open_Files function will be like this:
        ddict = ['tas_xxx.nc, rsdt_xxx.nc, rsut_xxx.nc, rlut_xxx.nc, rlutcs_xxx.nc, rsutcs_xxx.nc']
        
        if there are duplicates (ie, 100-200, 200-300, 300-400, 100-400), 
        then the concat one "100-400" will be excluded. dsname[mod][2] calls that number
        '''
        if "CESM2" in dsname[mod] and experiments[ex] == "amip":
            ds = Open_Files(file_pattern, exclude_files="195001-201412")
        elif "dupe" in dsname[mod]:
            ds = Open_Files(file_pattern, exclude_files=dsname[mod][2])
        else:
            ds = Open_Files(file_pattern, exclude_files="None")
        # <-!---- processing ----!->
        # <><><><><><> Toa <><><><><><>
        final_ds = []
        #the new file name
        file_path = None
        file_path = NewFile(var="toa-anom", model=dsname[mod][0], exp=experiments[ex], number=dsname[mod][2], variant=dsname[mod][1], grid=dsname[mod][3])
        output_path = os.path.join(output_folder, file_path)
        if experiments[ex] == "piControl":
            # make the dataset dictionary of piControl of each model. Used to calculate anomaly
            if not os.path.exists(output_path):
                final_ds = Cal_Toa(ds)
                piControl_ddict[0].append(final_ds)
                final_ds = final_ds.mean('time')
                final_ds.to_netcdf(output_path)
            else:
                temp_ds = xr.open_dataset(output_path, use_cftime=True, decode_times=False)
                final_ds = FormatTime(final_ds)
                temp_ds = temp_ds.mean('time')
                piControl_ddict[0].append(temp_ds)
        else:
            if not os.path.exists(output_path):
                Derived = Cal_Toa(ds)
                final_ds = Derived - piControl_ddict[0][mod]
                final_ds.to_netcdf(output_path)

        # <><><><><><> Cre <><><><><><>
        final_ds = []
        #the new file name
        file_path = None
        file_path = NewFile(var="cre-anom", model=dsname[mod][0], exp=experiments[ex], number=dsname[mod][2], variant=dsname[mod][1], grid=dsname[mod][3])
        output_path = os.path.join(output_folder, file_path)
        if experiments[ex] == "piControl":
            # make the dataset dictionary of piControl of each model. Used to calculate anomaly
            if not os.path.exists(output_path):
                final_ds = Cal_Cre(ds)
                piControl_ddict[1].append(final_ds)
                final_ds = final_ds.mean('time')
                final_ds.to_netcdf(output_path)
            else:
                temp_ds = xr.open_dataset(output_path, use_cftime=True, decode_times=False)
                final_ds = FormatTime(final_ds)
                temp_ds = temp_ds.mean('time')
                piControl_ddict[1].append(temp_ds)
        else:
            if not os.path.exists(output_path):
                Derived = Cal_Cre(ds)
                final_ds = Derived - piControl_ddict[1][mod]
                final_ds.to_netcdf(output_path)

        # <><><><><><> TAS <><><><><><><>
        final_ds = []
        #the new file name
        file_path = None
        file_path = NewFile(var="tas-anom", model=dsname[mod][0], exp=experiments[ex], number=dsname[mod][2], variant=dsname[mod][1], grid=dsname[mod][3])
        output_path = os.path.join(output_folder, file_path)
        if experiments[ex] == "piControl":
            # make the dataset dictionary of piControl of each model. Used to calculate anomaly
            if not os.path.exists(output_path):
                final_ds = ds[0]
                piControl_ddict[2].append(final_ds)
                final_ds = final_ds.mean('time')
                final_ds.to_netcdf(output_path)
            else:
                temp_ds = xr.open_dataset(output_path, use_cftime=True, decode_times=False)
                final_ds = FormatTime(final_ds)
                temp_ds = temp_ds.mean('time')
                piControl_ddict[2].append(temp_ds)
        else:
            if not os.path.exists(output_path):
                final_ds = ds[0] - piControl_ddict[2][mod]
                final_ds.to_netcdf(output_path)

  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)


RuntimeError: NetCDF: HDF error

#### Manually calculate files in case for whatever reason it doesn't work when doing it all at once

In [None]:
output_folder = '/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw'
dir_path = '/data/cristi/a/cristi/esm_data/cmip6'

# finding the original location
file_path = None
source_path = []
file_path = NewFile(var="tas", model="CanESM5", exp="piControl", variant="r1i1p1f1")
source_path.append(LocateFile(dir_path, file_path, model="CanESM5", exp="piControl", variant="r1i1p1f1"))
file_path = None
# New file name
file_path = 'tas_Amon_CanESM5_piControl_r1i1p1f1_gn_520101-620012.nc'
output_path = os.path.join(output_folder, file_path)
if not os.path.exists(output_path):
    print(output_path)
    ds = []
    # Concat the file together
    ds = Open_Files(source_path, exclude_files="None", exclude_vars=['lat_bnds', 'lon_bnds'])
    # <-!---- Copying file ----!->
    ds[0].to_netcdf(output_path)
    print("done")
else:
    print("File already existed. Manually remove them")

In [14]:
ds = xr.open_dataset("/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/annual/cre-annual_Amon_CESM2_amip-piForcing_r1i1p1f1_gn_1870-2015.nc", use_cftime=True)
ds

In [11]:
ds = xr.open_dataset("/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/net-cre_Amon_CESM2_amip-piForcing_r1i1p1f1_gn_187001-201512.nc", use_cftime=True)
ds

In [15]:
ds.cre.sel(lat=5, lon=5, method="nearest").isel(time=4).values

array(389.87463, dtype=float32)

In [49]:
file_pattern = []
file_pattern.append("/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/tas_Amon_IPSL-CM6A-LR_abrupt-4xCO2_r1i1p1f1_gr_*.nc")
ds = Open_Files(file_pattern, exclude_vars=['lat_bnds', 'lon_bnds'])
ds

[<xarray.Dataset> Size: 890MB
 Dimensions:      (lat: 143, lon: 144, time: 10800, axis_nbounds: 2)
 Coordinates:
   * lat          (lat) float32 572B -90.0 -88.73 -87.46 ... 87.46 88.73 90.0
   * lon          (lon) float32 576B 0.0 2.5 5.0 7.5 ... 350.0 352.5 355.0 357.5
     height       float64 8B ...
   * time         (time) object 86kB 1850-01-16 12:00:00 ... 2749-12-16 12:00:00
 Dimensions without coordinates: axis_nbounds
 Data variables:
     time_bounds  (time, axis_nbounds) object 173kB ...
     tas          (time, lat, lon) float32 890MB ...
 Attributes: (12/51)
     name:                   /ccc/work/cont003/gencmip6/p86maf/IGCM_OUT/IPSLCM...
     Conventions:            CF-1.7 CMIP-6.2
     creation_date:          2018-05-03T13:17:13Z
     description:            DECK: abrupt-4xCO2
     title:                  IPSL-CM6A-LR model output prepared for CMIP6 / CM...
     activity_id:            CMIP
     ...                     ...
     EXPID:                  abrupt-4xCO2
     

In [50]:
ds = ds[0]

In [51]:
ds = ds.resample(time='YS')

In [77]:
ds = ds.drop_vars(["lat_bnds", "lon_bnds", "time_bnds"])

In [52]:
ds = ds.mean()

In [53]:
ds

In [16]:
ds.to_netcdf("/data/cristi/a/kchoo3/FluxesProcessing/pre-processed/raw/tas_Amon_CanESM5_piControl_r1i1p1f1_gn_5201-6200.nc")