In [1]:
import logging

logging.basicConfig(filename='error_log.txt', level=logging.ERROR, filemode='w',
                    format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
from pcmdi_metrics.mean_climate.lib_unified import (
    get_unique_bases, process_dataset, calculate_and_save_metrics, extract_info_from_model_catalogue, 
    get_ref_catalogue, get_model_catalogue, 
    multi_level_dict, print_dict, write_to_json
)
import os

from pcmdi_metrics.utils import create_target_grid

In [3]:
variables = [
    "pr", "ua-200", "ua-850", "va-200", 
    "rsdt", "rsut", "rsutcs", "rlut", "rlutcs", 
    "rstcre", "rltcre", "rt", "rst"
]  # optional. If given, prioritized over the model_catalogue.json. If not given, use all variables commonly in ref_catalogue.json and model_catalogue.json

#variables = [
#    "ua-200", "ua-850", "va-200"
#]

model_data_path_template = "/home/data/%(model)/%(var)/%(model)_%(run)_%(var)_blabla.nc"  # optional. If given, prioritized over model_catalogue.json

models = ["model-a", "model-b"]  # optional. If given, prioritized over the model_catalogue.json. If not given, use all models in model_catalogue.json

runs_dict = {
    "model-a": ["r1", "r2"],
    "model-b": ["r1", "r2"],
    "model-c": ["r1", "r2"],
}
# optional. If given, prioritized over the model_catalogue.json. If not given, use all runs in model_catalogue.json

In [4]:
interim_output_path_dict = {
    "ref": {
        "path_ac": "/p/user_pub/climate_work/lee1043/temporary/mean_climate_workflow_refactorization/output/clims_ref/%(var)",
        "path_ac_interp": "/p/user_pub/climate_work/lee1043/temporary/mean_climate_workflow_refactorization/output/clims_ref_interp/%(var)"
    },
    "model": {
        "path_ac": "/p/user_pub/climate_work/lee1043/temporary/mean_climate_workflow_refactorization/output/clims_model/%(var)",
        "path_ac_interp": "/p/user_pub/climate_work/lee1043/temporary/mean_climate_workflow_refactorization/output/clims_model_interp/%(var)"
    }
}

output_path = "/p/user_pub/climate_work/lee1043/temporary/mean_climate_workflow_refactorization/output/json"

regions = ["NHEX", "SHEX"]

target_grid = "2.5x2.5"

ref_catalogue_file_path = '/p/user_pub/PCMDIobs/catalogue/obs4MIPs_PCMDI_monthly_byVar_catalogue_v20240716.json'
model_catalogue_file_path = 'model_catalogue.json'

ref_data_head = "/p/user_pub/PCMDIobs"  # optional, if ref_catalogue file does not include entire directory path

In [5]:
rad_diagnostic_variables = ["rt", "rst", "rstcre", "rltcre"]

default_regions = ["global", "NHEX", "SHEX", "TROPICS"]

In [6]:
if not regions:
    regions = default_regions

In [7]:
refs_dict = get_ref_catalogue(ref_catalogue_file_path, ref_data_head)
models_dict = get_model_catalogue(model_catalogue_file_path, variables, models, runs_dict, model_data_path_template)

In [8]:
if any(var is None for var in (variables, models, runs_dict)): 
    variables, models, runs_dict = extract_info_from_model_catalogue(variables, models, runs_dict, refs_dict, models_dict)

In [9]:
common_grid = create_target_grid(target_grid_resolution=target_grid)

### implement grid creation here

In [10]:
encountered_variables = set()
ac_ref_dict = multi_level_dict()
ac_model_run_dict = multi_level_dict()
metrics_dict = multi_level_dict()

variables_dict = get_unique_bases(variables)
variables_unique = list(variables_dict.keys())

print('variables_unique:', variables_unique)
print('variables_dict:', variables_dict)





def process_references(var, refs, rad_diagnostic_variables, levels, common_grid):

    #refs = ["GPCP-2-3"]

    for ref in refs:
        print(f"=== var, ref: {var}, {ref}")
        try:
            process_dataset(var, ref, refs_dict, ac_ref_dict, rad_diagnostic_variables, encountered_variables, levels, common_grid, interim_output_path_dict["ref"], data_type="ref")
        except Exception as e:
            # Log the error to a file
            logging.error(f"Error for {var} {ref}: {str(e)}")
            print(f"Error logged for {var} {ref}")
            print(f'Error from process_references for {var} {ref}:', e)
            

def process_models(var, models, runs_dict, rad_diagnostic_variables, levels, common_grid, refs):
    for model in models:
        for run in runs_dict[model]:
            try:
                process_dataset(var, (model, run), models_dict, ac_model_run_dict, rad_diagnostic_variables, encountered_variables, levels, common_grid, interim_output_path_dict["model"], data_type="model")
                for level in levels:
                    ac_model_run_level_interp = ac_model_run_dict[var][model][run][level]
                    calculate_and_save_metrics(var, model, run, level, regions, refs, ac_ref_dict, ac_model_run_level_interp, output_path, refs_dict, metrics_dict)
            except Exception as e:
                print(f'Error from process_models for {var} {model} {run}:', e)

    for level in levels:
        if level is None:
             var_key = var
        else:
             var_key = f"{var}-{level}"
        write_to_json(metrics_dict[var_key], os.path.join(output_path, f"output_{var_key}.json"))


def main():  

    variables_unique = ["pr"]

    for var in variables_unique:
        try:
            print("var:", var)
            encountered_variables.add(var)
            levels = variables_dict[var]
            refs = refs_dict[var].keys()

            process_references(var, refs, rad_diagnostic_variables, levels, common_grid)
            # process_models(var, models, runs_dict, rad_diagnostic_variables, levels, common_grid, refs)
        except Exception as e:
            print(f'Error from main for {var}:', e)            


if __name__ == "__main__":
    main()


variables_unique: ['pr', 'ua', 'va', 'rsdt', 'rsut', 'rsutcs', 'rlut', 'rlutcs', 'rstcre', 'rltcre', 'rt', 'rst']
variables_dict: {'pr': [None], 'ua': [200, 850], 'va': [200], 'rsdt': [None], 'rsut': [None], 'rsutcs': [None], 'rlut': [None], 'rlutcs': [None], 'rstcre': [None], 'rltcre': [None], 'rt': [None], 'rst': [None]}
var: pr
=== var, ref: pr, GPCP-2-3
Processing data for: GPCP-2-3
Processing ref dataset - varname: pr, data: GPCP-2-3, path: /p/user_pub/PCMDIobs/obs4MIPs/NOAA-NCEI/GPCP-2-3/mon/pr/gn/v20210727/pr_mon_GPCP-2-3_PCMDI_gn_197901-201907.nc
get_annual_cycle, var: pr
data_path: /p/user_pub/PCMDIobs/obs4MIPs/NOAA-NCEI/GPCP-2-3/mon/pr/gn/v20210727/pr_mon_GPCP-2-3_PCMDI_gn_197901-201907.nc
out_path: /p/user_pub/climate_work/lee1043/temporary/mean_climate_workflow_refactorization/output/clims_ref/pr
ver: v20240922
infilename: pr_mon_GPCP-2-3_PCMDI_gn_197901-201907.nc
type(d): <class 'xarray.core.dataset.Dataset'>
atts: {'Conventions': 'CF-1.7 ODS-2.1', 'activity_id': 'obs4MIPs

In [11]:
print_dict(refs_dict)

{
    "hfls": {
        "ERA-INT": {
            "MD5sum": "1ae4587143f05ee81432b3d9960aab63",
            "filename": "hfls_mon_ERA-INT_PCMDI_gn_197901-201903.nc",
            "path": "/p/user_pub/PCMDIobs/obs4MIPs/ECMWF/ERA-INT/mon/hfls/gn/v20210727",
            "period": "197901-201903",
            "shape": "(483, 241, 480)",
            "template": "/p/user_pub/PCMDIobs/obs4MIPs/ECMWF/ERA-INT/mon/hfls/gn/v20210727/hfls_mon_ERA-INT_PCMDI_gn_197901-201903.nc"
        },
        "TropFlux-1-0": {
            "MD5sum": "2f05191d6727068e1500d8d4ed90098a",
            "filename": "hfls_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc",
            "path": "/p/user_pub/PCMDIobs/obs4MIPs/ESSO/TropFlux-1-0/mon/hfls/gn/v20210727",
            "period": "197901-201707",
            "shape": "(463, 60, 350)",
            "template": "/p/user_pub/PCMDIobs/obs4MIPs/ESSO/TropFlux-1-0/mon/hfls/gn/v20210727/hfls_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc"
        }
    },
    "hfns": {
        "TropFlu

In [12]:
print_dict(models_dict)

{
    "pr": {
        "model-a": {
            "r1": {
                "filename": "model-a_r1_pr_blabla.nc",
                "path": "/home/data/model-a/pr",
                "template": "/home/data/model-a/pr/model-a_r1_pr_blabla.nc",
                "varname": "pr"
            },
            "r2": {
                "filename": "model-a_r2_pr_blabla.nc",
                "path": "/home/data/model-a/pr",
                "template": "/home/data/model-a/pr/model-a_r2_pr_blabla.nc",
                "varname": "pr"
            }
        },
        "model-b": {
            "r1": {
                "filename": "model-b_r1_pr_blabla.nc",
                "path": "/home/data/model-b/pr",
                "template": "/home/data/model-b/pr/model-b_r1_pr_blabla.nc",
                "varname": "pr"
            },
            "r2": {
                "filename": "model-b_r2_pr_blabla.nc",
                "path": "/home/data/model-b/pr",
                "template": "/home/data/model-b/pr/model-b_r2_pr_b