In [1]:
%matplotlib inline
import intake
import xarray as xr
import os 
import pandas as pd
import numpy as np
import zarr 
import rhg_compute_tools.kubernetes as rhgk

import matplotlib.pyplot as plt

import re
import yaml

import ast

import warnings 

In [2]:
'''client, cluster = rhgk.get_standard_cluster()
cluster'''

'client, cluster = rhgk.get_standard_cluster()\ncluster'

In [3]:
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

In [4]:
def _paramfile_to_tuple(model, variable):
    """
    takes in a model and variable, returns tuple from parameter file. 
    """
    param_file = '/home/jovyan/downscaling/downscale/workflows/parameters/{}-{}.yaml'.format(model, variable)
    with open(param_file, 'r') as f:
        var_dict = yaml.full_load(f)
    # some string parsing 
    line = var_dict['jobs']
    line1 = re.sub(r"\n", "", line)
    line2 = re.sub(r"[\[\]]", "", line1)
    return ast.literal_eval(line2.strip())

def _get_cmip6_dataset(model, variable, tuple_id, period='ssp'):
    d_ssp = _paramfile_to_tuple(model, variable)[tuple_id][period]
    cat = col.search(
              activity_id=d_ssp['activity_id'],
              experiment_id=d_ssp['experiment_id'],
              table_id=d_ssp['table_id'],
              variable_id=d_ssp['variable_id'],
              source_id=d_ssp['source_id'],
              member_id=d_ssp['member_id'],
              grid_label=d_ssp['grid_label'],
              version=int(d_ssp['version']),
          )
    return cat.to_dataset_dict(progressbar=False)

def compute_dtr(model, tuple_id=1):
    """
    takes in tasmax and tasmin Datasets, computes DTR (returns it lazily)
    """
    tasmax = _get_cmip6_dataset(model, 'tasmax', tuple_id)
    k_tasmax = list(tasmax.keys())
    if len(k_tasmax) != 1:
        raise ValueError("there is likely an issue with {} tasmax".format(model))
    tasmin = _get_cmip6_dataset(model, 'tasmin', tuple_id)
    k_tasmin = list(tasmin.keys())
    if len(k_tasmin) != 1:
        raise ValueError("there is likely an issue with {} tasmin".format(model))
    return tasmax[k_tasmax[0]]['tasmax'] - tasmin[k_tasmin[0]]['tasmin'] 

def check_dtr(dtr, model):
    """
    """
    min_dtr = dtr.min('time')
    neg_count = min_dtr.where(min_dtr <= 0).count().values
    if neg_count > 0:
        warnings.warn("DTR has negative values for {}".format(model))

checking models 

DTR negative: 
- GFDL-ESM4
- GFDL-CM4

DTR positive: 
- CanESM5
- INM-CM4-8
- INM-CM5-0
- NorESM2-MM
- NorESM2-LM
- MIROC6
- EC-Earth3-Veg-LR
- EC-Earth3-Veg
- EC-Earth3
- KIOST-ESM
- MIROC-ES2L
- MPI-ESM1-2-LR
- MPI-ESM1-2-HR
- NESM3
- MRI-ESM2-0
- FGOALS-g3
- CMCC-ESM2
- BCC-CSM2-MR
- AWI-CM-1-1-MR
- ACCESS-CM2

Parameter files to add or fix (could not check DTR): 
- UKESM1-0-LL
- ACCESS-ESM1-5
- MPI-ESM1-2-HAM

Tasmin parameter files to add (could not check DTR): 
- CAMS-CSM1-0

In [5]:
model = 'NorESM2-MM'

In [8]:
# _get_cmip6_dataset(model, 'tasmax', 0)

In [6]:
dtr = compute_dtr(model, tuple_id=0)

In [39]:
check_dtr(dtr, model)