# Create CMIP6 Grid-Area Files

This notebook extracts all available grid-area files from all CMIP6 models and stores them in a new output path.\
Additionally, it calculates the grid-area for all different sea-ice concentration grids for each model.

## Import modules

In [1]:
import numpy as np
import glob          #  Unix style pathname pattern expansion
import os            #  Operating system
import sys           #  system specific 
import pickle          
import subprocess
import intake
import re
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm # For progress bar
from cdo import Cdo   # Climate Data operator 
cdo = Cdo()           # object to use CDO in python

sys.path.insert(0, '/home/u/u301557/UHH-CMIP6/scripts/functions')
import STOR_CMIP6

print("DONE")

DONE


## Define Variables

In [2]:
scenarios = ['historical',
             'ssp119',
             'ssp126',  
              'ssp370', 
             'ssp245',
             'ssp585', 
             'piControl',
             '1pctCO2']

subfolders = STOR_CMIP6.subfolders                             # Dictionary with names for the subfolders for each scenario 
    
basepath = '/pool/data/CMIP6/data/'                            # where CMIP6 model data is stored
outpath  = '/work/uo1227/u301557/ArcticCarbon/data/gridarea/'    # change to your own outpath                                 

print("SETUP:")
print(" - "+ outpath)

SETUP:
 - /work/uo1227/u301557/ArcticCarbon/data/gridarea/


# Iterative loop to get areacello and areacella

## Functions

### Create Model List

In [3]:
def Model_Search_by_Project(basepath,projects,debug=False):
    """
    Creates a dictionary of model centers with the corresponding models depending on the CMIP6 project(s)

    Parameters
    ----------
    basepath : str
        Inpath where CMIP6 data is stored.
    projects : list
        Name of CMIP6 projects as strings to check for participating modelling centers.
    debug : boolean
        Controls print statements. The default is False.

    Returns
    -------
    allmodels : dict
        All participating models sorted by modelling centers.
        allmodels = {modelcenter : list of models}

    """ 
    
    
    allmodels = {}
    num = 0

    modelcenters = [os.path.basename(x) for x in glob.glob(basepath+"/"+projects[0]+"/*")]
    for modelcenter in modelcenters[:]:
        models = [os.path.basename(x) for x in glob.glob(basepath+"/"+projects[0]+"/"+modelcenter+'/*')]
        allmodels[modelcenter] = models
        num += len(models)
    if debug:
        print(num)
    
    if len(projects)>1:
        for project in projects[1:]:
            modelcenters2 = [os.path.basename(x) for x in glob.glob(basepath+"/"+project+"/*")]
            for modelcenter in modelcenters2[:]:
                if modelcenter not in modelcenters:
                    if debug:
                        print(modelcenter)
                    allmodels[modelcenter]=[]
                models = [os.path.basename(x) for x in glob.glob(basepath+"/"+project+"/"+modelcenter+'/*')]
                for model in models:
                    if model not in allmodels[modelcenter]:
                        if debug:
                            print(model)
                        allmodels[modelcenter].append(model)
        if debug:
            print(allmodels)

    number = 0
    for modelcenter in list(allmodels.keys()):
        number += len(allmodels[modelcenter])
    
    print("Found", number, "models from", len(list(allmodels.keys())), "differnt modelcenters in", projects)
    
    return allmodels

### Copy grid-area file

In [7]:
def copy_areacellx(areafiles, model, grid):
    """
    Copy areacello/a files to outpath and set missing values to NaN.
    Includes a special treatment for FGOALS-f3-L and FGOALS-g3.

    Parameters
    ----------
    areafiles : list
        All available areacello/a files.
    model : string
        Model name for file naming.
    grid : string
        Either "ocean" or "atmos" (depending on the areafiles).

    Returns
    -------
    None.

    """
    
    grid_list = []
    if model in ["FGOALS-f3-L","FGOALS-g3"]:
        special_treatment = "-setctomiss,1.e+35 -invertlat "
    else:
        special_treatment = ""
        
    for file in areafiles:
        grid_type = file.split("/")[-1].split("_")[5].split(".")[0]
        if grid_type not in grid_list:
            grid_list.append(grid_type)
        areafile = outpath+model+'_'+cmipvar[grid]+'_'+grid_type+'.nc'
        
        if not os.path.isfile(areafile):    
            try: 
                cdo.copy(input ="-chname,longitude,lon -chname,latitude,lat -setmissval,nan -setctomiss,0 -setattribute,"+cmipvar[grid]+"@comment='provided gridfile ("+grid+" grid)' "+special_treatment+ file, output = areafile)
                if os.path.isfile(areafile):
                    print("... copy",cmipvar[grid], "as",areafile)
            except Exception as e:
                print(e)
                print("!!! failed to copy: ",cmipvar[grid], modelcenter, model)
                
    for grid_type in grid_list:
        areafile = outpath+model+'_'+cmipvar[grid]+'_'+grid_type+'.nc'
        if not os.path.isfile(areafile):
            print("!!! failed to get", areafile)

### Calculate grid-area with cdo

In [6]:
def calc_areacellx_cdo(sicfiles, model, grid):
    
    """
    Create grid-area file using CDO's "gridarea".
    
    Parameters
    ----------
    sicfiles : list
        All available sea-ice area files.
    model : string
        Model name for file naming.
    grid : string
        Either "ocean" or "atmos" (depending on the sicfiles).

    Returns
    -------
    None.

    """
    
    grid_list = []
    
    for file in sicfiles:
        grid_type = file.split("/")[-1].split("_")[5].split(".")[0]
        if grid_type not in grid_list:
            grid_list.append(grid_type)
        areafile_calc = outpath+model+'_'+grid_var+'_'+grid_type+'_calc.nc'
        
        if not os.path.isfile(areafile_calc):    
            try: 
                cdo.chname("cell_area", grid_var, input =f"-chname,longitude,lon -chname,latitude,lat -setmissval,nan -setctomiss,0 -setattribute,cell_area@comment='gridfile calculated (via cdo gridarea)' -gridarea -selname,{var} "+file, output = areafile_calc)
                if os.path.isfile(areafile_calc):
                    print("... calculate",grid_var, "with cdo as",areafile_calc)
            except Exception as e:
                print(e)
                print("!!! failed to calculate with cdo: ",grid_var, modelcenter, model)
                
    for grid_type in grid_list:
        areafile_calc = outpath+model+'_'+grid_var+'_'+grid_type+'_calc.nc'
        if not os.path.isfile(areafile_calc):
            print("!!! cdo failed to get", areafile_calc)

## Loop

In [8]:
grids       = ["ocean", "atmos"]
cmipvar     = {"ocean":"areacello", "atmos":"areacella"} 
siconc_grid = {"ocean":"siconc", "atmos":"siconca"} 
var = "nbp"
grid_var = "areacella"

allmodels   = Model_Search_by_Project(basepath,["CMIP","ScenarioMIP"])

for grid in grids[1:]: 
    print()
    print("##############################                  " + grid +"                  ##############################")
    for modelcenter in tqdm(list(allmodels.keys())[:], leave=True):
        for model in allmodels[modelcenter]:
            print(modelcenter, model)
            availability_area = False
            availability_sic  = False
            
            for scenario in scenarios:
                #areafiles     = glob.glob(basepath+subfolders[scenario]+'/'+modelcenter+'/'+model+'/'+scenario+'/*/*/'+cmipvar[grid]+'/*/*/*.nc')
                sicfiles      = glob.glob(basepath+subfolders[scenario]+'/'+modelcenter+'/'+model+'/'+scenario+f'/*/*/{var}/*/*/*.nc')
                
                #if len(areafiles)>0: #----------------------------------------------------- copy areacell file
                #    copy_areacellx(areafiles, model, grid)
                #    availability_area = True
                if len(sicfiles)>0:  #----------------------------------------------------- calc areacell from sic file
                    calc_areacellx_cdo(sicfiles, model, grid)
                    availability_sic  = True
                    
            #if not availability_area:
            #    print(". No areacell files")
            if not availability_sic:
                print(". No siconc files")
print("DONE")

Found 73 models from 33 differnt modelcenters in ['CMIP', 'ScenarioMIP']

##############################                  atmos                  ##############################


  0%|          | 0/33 [00:00<?, ?it/s]

MIROC MIROC-ES2H
. No siconc files
MIROC MIROC6
. No siconc files
MIROC MIROC-ES2L
... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/MIROC-ES2L_areacella_gn_calc.nc


  3%|▎         | 1/33 [00:02<01:09,  2.17s/it]

CMCC CMCC-ESM2
... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/CMCC-ESM2_areacella_gn_calc.nc
CMCC CMCC-CM2-SR5


  6%|▌         | 2/33 [00:09<02:42,  5.25s/it]

... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/CMCC-CM2-SR5_areacella_gn_calc.nc
CMCC CMCC-CM2-HR4
. No siconc files
KIOST KIOST-ESM
. No siconc files
CCCR-IITM IITM-ESM
. No siconc files
AS-RCEC TaiESM1


 15%|█▌        | 5/33 [00:14<01:12,  2.57s/it]

... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/TaiESM1_areacella_gn_calc.nc
NCAR CESM2-FV2
NCAR CESM2-WACCM
NCAR CESM2-WACCM-FV2
NCAR CESM2


 21%|██        | 7/33 [00:14<00:40,  1.55s/it]

HAMMOZ-Consortium MPI-ESM-1-2-HAM
UA MCM-UA-1-0
. No siconc files
MRI MRI-ESM2-0


 27%|██▋       | 9/33 [00:15<00:22,  1.08it/s]

MPI-M ICON-ESM-LR
. No siconc files
MPI-M MPI-ESM1-2-LR


 30%|███       | 10/33 [00:19<00:39,  1.72s/it]

MPI-M MPI-ESM1-2-HR
. No siconc files
CSIRO ACCESS-ESM1-5


 33%|███▎      | 11/33 [00:19<00:30,  1.40s/it]

CNRM-CERFACS CNRM-CM6-1
. No siconc files
CNRM-CERFACS CNRM-CM6-1-HR
. No siconc files
CNRM-CERFACS CNRM-ESM2-1


 36%|███▋      | 12/33 [00:20<00:26,  1.27s/it]

NUIST NESM3
. No siconc files
SNU SAM0-UNICON
AWI AWI-CM-1-1-MR


 45%|████▌     | 15/33 [00:20<00:11,  1.54it/s]

. No siconc files
AWI AWI-ESM-1-REcoM
. No siconc files
AWI AWI-ESM-1-1-LR
. No siconc files
MOHC HadGEM3-GC31-LL
. No siconc files
MOHC HadGEM3-GC31-MM
. No siconc files
MOHC UKESM1-0-LL
... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/UKESM1-0-LL_areacella_gn_calc.nc


 48%|████▊     | 16/33 [00:22<00:14,  1.21it/s]

EC-Earth-Consortium EC-Earth3-AerChem
. No siconc files
EC-Earth-Consortium EC-Earth3-Veg
EC-Earth-Consortium EC-Earth3-LR
. No siconc files
EC-Earth-Consortium EC-Earth3


 52%|█████▏    | 17/33 [00:23<00:13,  1.15it/s]

. No siconc files
EC-Earth-Consortium EC-Earth3-Veg-LR
EC-Earth-Consortium EC-Earth3-CC
EC-Earth-Consortium EC-Earth3P-VHR
. No siconc files
CCCma CanESM5


 55%|█████▍    | 18/33 [00:26<00:19,  1.28s/it]

CCCma CanESM5-CanOE
BCC BCC-ESM1
. No siconc files
BCC BCC-CSM2-MR


 58%|█████▊    | 19/33 [00:26<00:13,  1.02it/s]

. No siconc files
NOAA-GFDL GFDL-AM4
. No siconc files
NOAA-GFDL GFDL-ESM4


 61%|██████    | 20/33 [00:27<00:13,  1.07s/it]

... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/GFDL-ESM4_areacella_gr1_calc.nc
NOAA-GFDL GFDL-CM4
. No siconc files
CAMS CAMS-CSM1-0
. No siconc files
INM INM-CM5-0
... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/INM-CM5-0_areacella_gr1_calc.nc
INM INM-CM4-8


 67%|██████▋   | 22/33 [00:29<00:10,  1.06it/s]

... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/INM-CM4-8_areacella_gr1_calc.nc
NIMS-KMA UKESM1-0-LL
NIMS-KMA KACE-1-0-G
. No siconc files
IPSL IPSL-CM5A2-INCA
... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/IPSL-CM5A2-INCA_areacella_gr_calc.nc
IPSL IPSL-CM6A-LR-INCA
. No siconc files
IPSL IPSL-CM6A-LR
... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/IPSL-CM6A-LR_areacella_gr_calc.nc


 73%|███████▎  | 24/33 [00:32<00:11,  1.31s/it]

FIO-QLNM FIO-ESM-2-0
. No siconc files
CAS CAS-ESM2-0
. No siconc files
CAS FGOALS-g3
. No siconc files
CAS FGOALS-f3-L


 79%|███████▉  | 26/33 [00:33<00:06,  1.13it/s]

. No siconc files
THU CIESM
. No siconc files
NCC NorESM1-F
. No siconc files
NCC NorCPM1
... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/NorCPM1_areacella_gn_calc.nc
NCC NorESM2-LM
... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/NorESM2-LM_areacella_gn_calc.nc
NCC NorESM2-MM


 85%|████████▍ | 28/33 [00:35<00:05,  1.01s/it]

... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/NorESM2-MM_areacella_gn_calc.nc
NASA-GISS GISS-E2-1-H
NASA-GISS GISS-E2-1-G-CC
NASA-GISS GISS-E2-2-G
. No siconc files
NASA-GISS GISS-E2-1-G


 88%|████████▊ | 29/33 [00:36<00:03,  1.06it/s]

CSIRO-ARCCSS ACCESS-CM2
. No siconc files
E3SM-Project E3SM-1-1-ECA
... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/E3SM-1-1-ECA_areacella_gr_calc.nc
E3SM-Project E3SM-1-0
. No siconc files
E3SM-Project E3SM-1-1


 94%|█████████▍| 31/33 [00:37<00:01,  1.29it/s]

... calculate areacella with cdo as /work/uo1227/u301557/ArcticCarbon/data/gridarea/E3SM-1-1_areacella_gr_calc.nc
DWD MPI-ESM1-2-HR
. No siconc files
DKRZ MPI-ESM1-2-LR


100%|██████████| 33/33 [00:37<00:00,  1.14s/it]

DKRZ MPI-ESM1-2-HR
. No siconc files
DONE





# Control

## Check if gridareas are missing compared to last time

In [7]:
new = [x.split("/")[-1] for x in glob.glob(outpath+"*")]

old = [x.split("/")[-1] for x in glob.glob('/work/uo1227/DATA/modelling/CMIP6/gridareas/'+"*")]
#print(old)

print(len(new)-len(old))
for o in old:
    if o not in new:
        print(o)
        
for n in new:
    if n not in old:
        print(n)

2
CAS-ESM2-0_areacello_gn.nc
ICON-ESM-LR_areacella_gn.nc


## Create modellist

In [9]:
grid_diffs = []
modellist  = []

grid_dicto = {}
grid_dicta = {}

files = glob.glob(outpath+"*")
for file in files[:]: 
    name  = file.split("/")[-1]
    model  = name.split("_")[0]
    domain = name.split("_")[1]
    grid   = name.split("_")[2].split(".")[0]
    
    if grid not in grid_diffs: 
        grid_diffs.append(grid)
    if domain == "areacello":
        grid_dicto[model] = grid
    if domain == "areacella":
        grid_dicta[model] = grid
    if model not in modellist: 
        modellist.append(model)
        

modellist.sort()
print(modellist)
print(len(modellist))
print("Available models:", len(modellist))
print(f"   {len(list(grid_dicto.keys()))} available on ocean grid")
print(f"   {len(list(grid_dicta.keys()))} available on atm grid")
print("Available different grids:", grid_diffs)


['ACCESS-CM2', 'ACCESS-ESM1-5', 'AWI-CM-1-1-MR', 'AWI-ESM-1-1-LR', 'BCC-CSM2-MR', 'BCC-ESM1', 'CAMS-CSM1-0', 'CAS-ESM2-0', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 'CIESM', 'CMCC-CM2-HR4', 'CMCC-CM2-SR5', 'CMCC-ESM2', 'CNRM-CM6-1', 'CNRM-CM6-1-HR', 'CNRM-ESM2-1', 'CanESM5', 'CanESM5-CanOE', 'E3SM-1-0', 'E3SM-1-1', 'E3SM-1-1-ECA', 'EC-Earth3', 'EC-Earth3-AerChem', 'EC-Earth3-CC', 'EC-Earth3-LR', 'EC-Earth3-Veg', 'EC-Earth3-Veg-LR', 'FGOALS-f3-L', 'FGOALS-g3', 'FIO-ESM-2-0', 'GFDL-CM4', 'GFDL-ESM4', 'GISS-E2-1-G', 'GISS-E2-1-G-CC', 'GISS-E2-1-H', 'GISS-E2-2-G', 'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'ICON-ESM-LR', 'INM-CM4-8', 'INM-CM5-0', 'IPSL-CM5A2-INCA', 'IPSL-CM6A-LR', 'IPSL-CM6A-LR-INCA', 'KIOST-ESM', 'MCM-UA-1-0', 'MIROC-ES2L', 'MIROC6', 'MPI-ESM-1-2-HAM', 'MPI-ESM1-2-HR', 'MPI-ESM1-2-LR', 'MRI-ESM2-0', 'NESM3', 'NorCPM1', 'NorESM1-F', 'NorESM2-LM', 'NorESM2-MM', 'SAM0-UNICON', 'TaiESM1', 'UKESM1-0-LL']
63
Available models: 63
   61 available on ocean grid
   53 avai