# Create CMIP6 Grid-Area Files

This notebook extracts all available grid-area files from all CMIP6 models and stores them in a new output path.\
Additionally, it calculates the grid-area for all different sea-ice concentration (SIC) grids for each model.

While it is originally written for SIC it was slighly modified to work with all variables from the CMIP6 catalog. Just change ```var``` and ```grid_var``` accordingly

## Import modules

In [1]:
import numpy as np
import glob          #  Unix style pathname pattern expansion
import os            #  Operating system
import sys           #  system specific 
import pickle          
import subprocess
import intake
import re
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm # For progress bar
from cdo import Cdo   # Climate Data operator 
cdo = Cdo()           # object to use CDO in python

print("DONE")

DONE


## Define Variables

In [2]:
basepath = '/pool/data/CMIP6/data/'                              # where CMIP6 model data is stored
outpath  = '/work/uo1227/u301557/ArcticCarbon/data/gridarea/'    # change to your own outpath   

var = "nbp"
grid_var = "areacella" # areacella for land and atmospheric variables, areacello for ocean variables

print("SETUP:")
print(" - "+ outpath)

scenarios = ['historical',
             'ssp119',
             'ssp126',  
              'ssp370', 
             'ssp245',
             'ssp585', 
             'piControl',
             '1pctCO2']

subfolders = {'historical':'CMIP',
              'piControl':'CMIP',
              '1pctCO2':'CMIP',     
              'ssp245':'ScenarioMIP',
              'ssp126':'ScenarioMIP',
              'ssp585':'ScenarioMIP',
              'ssp119':'ScenarioMIP',
              'ssp370': 'ScenarioMIP',
              'ssp460': 'ScenarioMIP'} 

SETUP:
 - /work/uo1227/u301557/ArcticCarbon/data/gridarea/


# Iterative loop to get areacello and areacella

## Functions

### Create Model List

In [41]:
def Model_Search_by_Project(basepath: str, projects: list, debug=False):
    """
    Creates a dictionary of model centers with the corresponding models depending on the CMIP6 project(s)

    Parameters
    ----------
    basepath : str
        Inpath where CMIP6 data is stored.
    projects : list
        Name of CMIP6 projects as strings to check for participating modelling centers.
    debug : boolean
        Controls print statements. The default is False.

    Returns
    -------
    allmodels : dict
        All participating models sorted by modelling centers.
        allmodels = {modelcenter : list of models}

    """ 

    allmodels = {}  # Dictionary to store model centers and their corresponding models
    
    # Step 1: Initialize the dictionary with model centers for each project
    for project in projects:
        modelcenters = [os.path.basename(x) for x in glob.glob(f"{basepath}/{project}/*")]
        for modelcenter in modelcenters:
            allmodels[modelcenter] = []  # Initialize each model center with an empty list

    # Step 2: Populate the models for each model center across CMIP and ScenarioMIP projects
    for modelcenter in allmodels.keys():
        for project in projects:
            # Get the list of models for the current model center and project
            models = [os.path.basename(x) for x in glob.glob(f"{basepath}/{project}/{modelcenter}/*")]
            
            # Add models to the dictionary if they are not already present
            for model in models:
                if model not in allmodels[modelcenter]:
                    if debug:
                        print(model)  # Debugging output for new models
                    allmodels[modelcenter].append(model)
    
    
    # Step 3: Calculate and print summary statistics
    number = 0
    for modelcenter in list(allmodels.keys()):
        number += len(allmodels[modelcenter])  # Count total models
            
    print("Found", number, "models from", len(list(allmodels.keys())), "differnt modelcenters in", projects)

    return allmodels

### Copy grid-area file

In [7]:
def copy_areacellx(areafiles, model, grid_var):
    """
    Copy areacello/a files to outpath and set missing values to NaN.
    Includes a special treatment for FGOALS-f3-L and FGOALS-g3.

    Parameters
    ----------
    areafiles : list
        All available areacello/a files.
    model : string
        Model name for file naming.
    grid_var : string
        Either "areacello" or "areacella" (depending on the areafiles).

    Returns
    -------
    None.

    """
    
    # Initialize an empty list to store unique grid types
    grid_list = []

    # Check if the model requires special treatment
    # For specific models (FGOALS-f3-L and FGOALS-g3), set special processing options
    if model in ["FGOALS-f3-L","FGOALS-g3"]:
        special_treatment = "-setctomiss,1.e+35 -invertlat "
    else:
        special_treatment = ""

    # Iterate over all area files
    for file in areafiles:
        # Extract the grid type from the file name (this is specific to the file name convention)
        grid_type = file.split("/")[-1].split("_")[5].split(".")[0]
        # Add the grid type to the list if it's not already included
        if grid_type not in grid_list:
            grid_list.append(grid_type)
        # Construct the output file path based on model, variable, and grid type
        areafile = f"{outpath}{model}_{grid_var}_{grid_type}.nc"

        # Check if the output file already exists. If not, proceed to copy and process it
        if not os.path.isfile(areafile):    
            try: 
                # Use CDO to process and copy the file
                # - Renames coordinate variables  (longitude -> lon, latitude -> lat)
                # - Sets missing values to NaN and applies special treatments if necessary
                cdo.copy(input ="-chname,longitude,lon -chname,latitude,lat -setmissval,nan -setctomiss,0 -setattribute,"+grid_var+"@comment='provided gridfile' "+special_treatment+ file, output = areafile)
                
                # Confirm success by checking if the output file was created
                if os.path.isfile(areafile):
                    print("... copy",grid_var, "as",areafile)
            except Exception as e:
                print(e)
                print("!!! failed to copy: ",grid_var, modelcenter, model)
                
    # Verify if all expected grid types were successfully processed            
    for grid_type in grid_list:
        areafile = f"{outpath}{model}_{grid_var}_{grid_type}.nc"
        if not os.path.isfile(areafile):
            print("!!! failed to get", areafile)

### Calculate grid-area with cdo

In [6]:
def calc_areacellx_cdo(sicfiles, model, grid_var):
    
    """
    Create grid-area file using CDO's "gridarea".
    
    Parameters
    ----------
    sicfiles : list
        All available sea-ice area files.
    model : string
        Model name for file naming.
    grid_var : string
        Either "areacello" or "areacella" (depending on the sicfiles).

    Returns
    -------
    None.

    """

    # Initialize an empty list to store unique grid types
    grid_list = []

    # Iterate over all SIC files
    for file in sicfiles:
        # Extract the grid type from the file name (this is specific to the file name convention)
        grid_type = file.split("/")[-1].split("_")[5].split(".")[0]
        # Add the grid type to the list if it's not already included
        if grid_type not in grid_list:
            grid_list.append(grid_type)
            
        # Construct the output file path based on model, variable, and grid type
        areafile_calc = outpath+model+'_'+grid_var+'_'+grid_type+'_calc.nc'
        
        if not os.path.isfile(areafile_calc):    
            try: 
                # Use CDO to process and copy the file
                # - calculates the gridarea from the SIC file
                # - Renames coordinate variables  (longitude -> lon, latitude -> lat)
                # - Sets missing values to NaN and applies special treatments if necessary
                cdo.chname("cell_area", grid_var, input =f"-chname,longitude,lon -chname,latitude,lat -setmissval,nan -setctomiss,0 -setattribute,cell_area@comment='gridfile calculated (via cdo gridarea)' -gridarea -selname,{var} "+file, output = areafile_calc)
                if os.path.isfile(areafile_calc):
                    print("... calculate",grid_var, "with cdo as",areafile_calc)
            except Exception as e:
                print(e)
                print("!!! failed to calculate with cdo: ",grid_var, modelcenter, model)
                
    for grid_type in grid_list:
        areafile_calc = outpath+model+'_'+grid_var+'_'+grid_type+'_calc.nc'
        if not os.path.isfile(areafile_calc):
            print("!!! cdo failed to get", areafile_calc)

## Loop

In [42]:
allmodels   = Model_Search_by_Project(basepath,["CMIP","ScenarioMIP"])

for modelcenter in tqdm(list(allmodels.keys())[:], leave=True):
    for model in allmodels[modelcenter]:
        print(modelcenter, model)
        availability_area = False
        availability_sic  = False
        
        for scenario in scenarios:
            areafiles     = glob.glob(basepath+subfolders[scenario]+'/'+modelcenter+'/'+model+'/'+scenario+'/*/*/'+grid_var+'/*/*/*.nc')
            sicfiles      = glob.glob(basepath+subfolders[scenario]+'/'+modelcenter+'/'+model+'/'+scenario+f'/*/*/{var}/*/*/*.nc')
            
            if len(areafiles)>0: #----------------------------------------------------- copy areacell file
                copy_areacellx(areafiles, model, grid_var)
                availability_area = True
            if len(sicfiles)>0:  #----------------------------------------------------- calc areacell from sic file
                calc_areacellx_cdo(sicfiles, model, grid_var)
                availability_sic  = True
                
        if not availability_area:
            print(". No areacell files")
        if not availability_sic:
            print(". No siconc files")
print("DONE")

Found 74 models from 33 differnt modelcenters in ['CMIP', 'ScenarioMIP']
DONE


# Control

## Check if gridareas are missing compared to last time

In [7]:
new = [x.split("/")[-1] for x in glob.glob(outpath+"*")]

old = [x.split("/")[-1] for x in glob.glob('/work/uo1227/DATA/modelling/CMIP6/gridareas/'+"*")]
#print(old)

print(len(new)-len(old))
for o in old:
    if o not in new:
        print(o)
        
for n in new:
    if n not in old:
        print(n)

2
CAS-ESM2-0_areacello_gn.nc
ICON-ESM-LR_areacella_gn.nc


## Create modellist

In [9]:
grid_diffs = []
modellist  = []

grid_dicto = {}
grid_dicta = {}

files = glob.glob(outpath+"*")
for file in files[:]: 
    name  = file.split("/")[-1]
    model  = name.split("_")[0]
    domain = name.split("_")[1]
    grid   = name.split("_")[2].split(".")[0]
    
    if grid not in grid_diffs: 
        grid_diffs.append(grid)
    if domain == "areacello":
        grid_dicto[model] = grid
    if domain == "areacella":
        grid_dicta[model] = grid
    if model not in modellist: 
        modellist.append(model)
        

modellist.sort()
print(modellist)
print(len(modellist))
print("Available models:", len(modellist))
print(f"   {len(list(grid_dicto.keys()))} available on ocean grid")
print(f"   {len(list(grid_dicta.keys()))} available on atm grid")
print("Available different grids:", grid_diffs)


['ACCESS-CM2', 'ACCESS-ESM1-5', 'AWI-CM-1-1-MR', 'AWI-ESM-1-1-LR', 'BCC-CSM2-MR', 'BCC-ESM1', 'CAMS-CSM1-0', 'CAS-ESM2-0', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', 'CIESM', 'CMCC-CM2-HR4', 'CMCC-CM2-SR5', 'CMCC-ESM2', 'CNRM-CM6-1', 'CNRM-CM6-1-HR', 'CNRM-ESM2-1', 'CanESM5', 'CanESM5-CanOE', 'E3SM-1-0', 'E3SM-1-1', 'E3SM-1-1-ECA', 'EC-Earth3', 'EC-Earth3-AerChem', 'EC-Earth3-CC', 'EC-Earth3-LR', 'EC-Earth3-Veg', 'EC-Earth3-Veg-LR', 'FGOALS-f3-L', 'FGOALS-g3', 'FIO-ESM-2-0', 'GFDL-CM4', 'GFDL-ESM4', 'GISS-E2-1-G', 'GISS-E2-1-G-CC', 'GISS-E2-1-H', 'GISS-E2-2-G', 'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'ICON-ESM-LR', 'INM-CM4-8', 'INM-CM5-0', 'IPSL-CM5A2-INCA', 'IPSL-CM6A-LR', 'IPSL-CM6A-LR-INCA', 'KIOST-ESM', 'MCM-UA-1-0', 'MIROC-ES2L', 'MIROC6', 'MPI-ESM-1-2-HAM', 'MPI-ESM1-2-HR', 'MPI-ESM1-2-LR', 'MRI-ESM2-0', 'NESM3', 'NorCPM1', 'NorESM1-F', 'NorESM2-LM', 'NorESM2-MM', 'SAM0-UNICON', 'TaiESM1', 'UKESM1-0-LL']
63
Available models: 63
   61 available on ocean grid
   53 avai