# Combine base variables 

This Notebook provides code to calculate the sum or difference of two variables in case it is not provided by the model itself.\
E.g. gpp can also be calculated as the sum of npp and ra.

It requires the preprocessed data from the ```AC-loop.ipynb```

In [8]:
from cdo import Cdo
from tqdm import tqdm 
import logging
import glob
import os

# Load own functions
import functions.custom_logger_functions as lgfct

cdo = Cdo()

In [2]:
# Select dimension of output data
dimensions = "1D" # "1D" or "2D"
outpath   = f"/work/uo1227/u301557/ArcticCarbon/data/{dimensions}/"                        # test outpath

scenarios = ["historical", "ssp126", "ssp245", "ssp370", "ssp585"]

logging_level = "warning"
logger = lgfct.build_terminal_logger(logging_level, logger_name="processing")

In [3]:
def modellist_from_files(files, split_chr="_", split_num=-4):
    """
    Extracts unique model names from a list of file names based on specified delimiters and index.

    Parameters:
    - files (list of str): A list of file names to process.
    - split_chr (str, optional): The character used to split the file names into components. Default is "_".
    - split_num (int, optional): The index of the segment to extract after splitting. Default is -4 (fourth-last segment).

    Returns:
    - set: A set of unique model names extracted from the file names.

    Example:
        >>> files = ["ra_masked_TaiESM1_r1i1p1f1_ssp585_2D.nc", "ra_masked_UKESM1-0-LL_r1i1p1f2_historical_2D.nc", "ra_masked_UKESM1-0-LL_r1i1p1f2_ssp126_2D.nc"]
        >>> modellist_from_files(files)
        {'TaiESM1', 'UKESM1-0-LL'}

    """
    
    modellist = []
    for file in files:
        model = file.split(split_chr)[split_num]
        modellist.append(model)
        
    return set(modellist)

In [4]:
def file_existence(filepath: str, logger: logging.Logger):
    """
    Checks if a file exists and logs a message accordingly.

    Parameters:
    - filepath (str): The path to the file to check.
    - logger (logging.Logger): The logger object to use for logging messages.

    """

    filename = filepath.split("/")[-1]

    if os.path.isfile(filepath):
        logger.debug(f"File '{filename}' exists.")
    else:
        logger.error(f"File '{filename}' does not exist.")

In [5]:
def simple_cdo_operation(var1, var2, result, operation, outpath, regions=["masked"], scenarios=["historical"]):
    """
    Perform simple Climate Data Operators (CDO) operations on netCDF files for specified variables, regions, and scenarios.

    This function identifies common models with data for two specified variables and performs a CDO operation 
    (addition or subtraction) to generate a new result variable. Output files are created only if they do not already exist.

    Parameters:
    - var1 (str): The name of the first variable.
    - var2 (str): The name of the second variable.
    - result (str): The name of the result variable to be generated.
    - operation (str): The operation to perform, either "add" or "sub".
    - outpath (str): The base directory where input files are located and output files will be saved.
    - regions (list of str, optional): A list of region identifiers to process. Default is ["masked"].
    - scenarios (list of str, optional): A list of scenario identifiers to process. Default is ["historical"].

    Returns:
        None

    """
    if not operation in ["add", "sub"]:
        logger.error(f"Operation {operation} not defined. Has to be either add or sub")
        return
    else:
        for region in regions:
            for scenario in scenarios:
                
                # Find models that have files for both variables
                files1    = sorted(glob.glob(outpath + f"{var1}/{var1}*{region}*{scenario}_{dimensions}.nc"))
                files2    = sorted(glob.glob(outpath + f"{var2}/{var2}*{region}*{scenario}_{dimensions}.nc"))
                modellist = list(modellist_from_files(files1) & modellist_from_files(files2))
                logger.info(modellist)

                #Loop through those models
                for model in modellist[:]:
                    logger.info(model)
                    # Find all files from each model
                    files1 = sorted(glob.glob(outpath + f"{var1}/{var1}*{region}*{model}_*{scenario}_{dimensions}.nc"))
                    
                    for file1 in files1:
                        member   = file1.split("_")[-3]
                        logger.debug(file1)
                    
                        outputfile = outpath + f"{result}/{result}_{region}_{model}_{member}_{scenario}_{dimensions}.nc"
                        file2      = outpath + f"{var2}/{var2}_{region}_{model}_{member}_{scenario}_{dimensions}.nc"
                        
                        if not os.path.isfile(outputfile): # check if file exsists
                            if os.path.isfile(file2): # check if the var2 is available for the same member as var1
                                cdo.copy(input = f"-chname,{var1},{result} -{operation}  {file1} {file2}", output=outputfile)
                                file_existence(outputfile, logger)
                            else:
                                logger.warning(f"Members don't match for: {var1},{var2} {model} {member} {scenario}")
                                files2 = glob.glob(outpath + f"{var2}/{var2}*{region}*{model}_*{scenario}.nc")
                                logger.debug(f"   " + str(files2))
                        else:
                            logger.debug("--- File already exists")

In [None]:
# calculate GPP
simple_cdo_operation(var1="npp", var2="ra", result="gpp", operation="add", 
                     outpath=outpath, regions=["masked"], scenarios=["historical", "ssp126", "ssp245", "ssp370", "ssp585"])

# calculate NEP
simple_cdo_operation(var1="npp", var2="rh", result="nep", operation="sub", 
                     outpath=outpath, regions=["masked"], scenarios=["historical", "ssp126", "ssp245", "ssp370", "ssp585"])

# calculate NPP
simple_cdo_operation(var1="gpp", var2="ra", result="npp", operation="sub", 
                     outpath=outpath, regions=["masked"], scenarios=["historical", "ssp126", "ssp245", "ssp370", "ssp585"])

# calculate terrestrial carbon
simple_cdo_operation(var1="cSoil", var2="cVeg", result="terra_carbon", operation="add", 
                     outpath=outpath, regions=["masked"], scenarios=["historical", "ssp126", "ssp245", "ssp370", "ssp585"])

Members don't match for: npp,ra GISS-E2-1-H r1i1p1f1 historical
Members don't match for: npp,ra CESM2-WACCM-FV2 r1i1p1f1 historical
Members don't match for: npp,ra CanESM5-CanOE r2i1p2f1 ssp126
Members don't match for: npp,ra CanESM5 r2i1p1f1 ssp126
Members don't match for: npp,ra CanESM5-CanOE r3i1p2f1 ssp245
Members don't match for: npp,ra CanESM5 r2i1p1f1 ssp245
Members don't match for: npp,ra GISS-E2-1-G r1i1p3f1 ssp370
Members don't match for: npp,ra IPSL-CM6A-LR r2i1p1f1 ssp585
Members don't match for: npp,rh CESM2-WACCM-FV2 r3i1p1f1 historical
Members don't match for: npp,rh GISS-E2-1-G r1i1p1f1 historical
Members don't match for: npp,rh GISS-E2-1-G r1i1p1f2 historical
Members don't match for: npp,rh CanESM5-CanOE r2i1p2f1 ssp126
Members don't match for: npp,rh CanESM5 r1i1p2f1 ssp126
Members don't match for: npp,rh CanESM5-CanOE r3i1p2f1 ssp245
Members don't match for: npp,rh CanESM5 r2i1p1f1 ssp245
Members don't match for: npp,rh IPSL-CM6A-LR r2i1p1f1 ssp585
