In [1]:
%matplotlib inline
import intake
import xarray as xr
import os 
import pandas as pd
import numpy as np
import zarr 
import gcsfs
from xarray.ufuncs import maximum, minimum
import rhg_compute_tools.kubernetes as rhgk

import matplotlib.pyplot as plt
import re
import yaml
import ast
import warnings 

In [2]:
from science_validation_manual import read_gcs_zarr

In [3]:
fs = gcsfs.GCSFileSystem(token='/opt/gcsfuse_tokens/impactlab-data.json')

In [4]:
'''client, cluster = rhgk.get_standard_cluster()
cluster'''

'client, cluster = rhgk.get_standard_cluster()\ncluster'

In [5]:
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

In [33]:
def _paramfile_to_tuple(model, variable):
    """
    takes in a model and variable, returns tuple from parameter file. 
    """
    param_file = '/home/jovyan/downscaling/downscale/workflows/parameters/{}-{}.yaml'.format(model, variable)
    with open(param_file, 'r') as f:
        var_dict = yaml.full_load(f)
    # some string parsing 
    line = var_dict['jobs']
    line1 = re.sub(r"\n", "", line)
    line2 = re.sub(r"[\[\]]", "", line1)
    return ast.literal_eval(line2.strip())

def _get_cmip6_dataset(model, variable, tuple_id, period='ssp'):
    d_ssp = _paramfile_to_tuple(model, variable)[tuple_id][period]
    cat = col.search(
              activity_id=d_ssp['activity_id'],
              experiment_id=d_ssp['experiment_id'],
              table_id=d_ssp['table_id'],
              variable_id=d_ssp['variable_id'],
              source_id=d_ssp['source_id'],
              member_id=d_ssp['member_id'],
              grid_label=d_ssp['grid_label'],
              version=int(d_ssp['version']),
          )
    return cat.to_dataset_dict(progressbar=False)

def compute_dtr(model, tuple_id=1):
    """
    takes in tasmax and tasmin Datasets, computes DTR (returns it lazily)
    """
    tasmax = _get_cmip6_dataset(model, 'tasmax', tuple_id)
    k_tasmax = list(tasmax.keys())
    if len(k_tasmax) != 1:
        raise ValueError("there is likely an issue with {} tasmax".format(model))
    tasmin = _get_cmip6_dataset(model, 'tasmin', tuple_id)
    k_tasmin = list(tasmin.keys())
    if len(k_tasmin) != 1:
        raise ValueError("there is likely an issue with {} tasmin".format(model))
    return tasmax[k_tasmax[0]]['tasmax'] - tasmin[k_tasmin[0]]['tasmin'] 

def check_dtr(dtr, model):
    """
    """
    min_dtr = dtr.min('time')
    neg_count = min_dtr.where(min_dtr < 0).count().values
    zero_count = min_dtr.where(min_dtr == 0).count().values
    if neg_count > 0:
        warnings.warn("DTR has {} negative values for {}, {} needs tasmin/tasmax swapping".format(neg_count, model, model))
    if zero_count > 0:
        warnings.warn("DTR has {} zero values for {}".format(zero_count, model))

checking models 

DTR negative: 
- GFDL-ESM4
- GFDL-CM4

DTR positive: 
- CanESM5
- INM-CM4-8
- INM-CM5-0
- NorESM2-MM
- NorESM2-LM
- MIROC6
- EC-Earth3-Veg-LR
- EC-Earth3-Veg
- EC-Earth3
- KIOST-ESM
- MIROC-ES2L
- MPI-ESM1-2-LR
- MPI-ESM1-2-HR
- NESM3
- MRI-ESM2-0
- FGOALS-g3
- CMCC-ESM2
- BCC-CSM2-MR
- AWI-CM-1-1-MR
- ACCESS-CM2

Parameter files to add or fix (could not check DTR): 
- UKESM1-0-LL
- ACCESS-ESM1-5

Tasmin parameter files to add (could not check DTR): 
- CAMS-CSM1-0

In [None]:
model = 'NorESM2-MM'
# _get_cmip6_dataset(model, variable, tuple_id, period='ssp')
dtr = compute_dtr(model, tuple_id=0)
check_dtr(dtr, model)

### For models with negative DTR, swap tasmax and tasmin ### 

GFDL-CM4: historical, ssp245, ssp585

GFDL-ESM4: historical, ssp126, ssp245, ssp370, ssp585

In [107]:
def _compute_max_or_min_temperature(model, tuple_id=1, variable='tasmax', ssp_or_historical='ssp'):
    """
    takes in a model source_id, pulls in the required parameter file info, 
    gets the tasmax and tasmin Datasets from the CMIP6 archive, computes tasmax or tasmin (returns it lazily)
    """
    tasmax = _get_cmip6_dataset(model, 'tasmax', tuple_id, period=ssp_or_historical)
    k_tasmax = list(tasmax.keys())
    if len(k_tasmax) != 1:
        raise ValueError("there is likely an issue with {} tasmax".format(model))
    tasmin = _get_cmip6_dataset(model, 'tasmin', tuple_id, period=ssp_or_historical)
    k_tasmin = list(tasmin.keys())
    if len(k_tasmin) != 1:
        raise ValueError("there is likely an issue with {} tasmin".format(model))
        
    # compute max or min 
    if variable == 'tasmax':
        return (maximum(tasmax[k_tasmax[0]]['tasmax'], tasmin[k_tasmin[0]]['tasmin']), tasmax[k_tasmax[0]].attrs)
    elif variable == 'tasmin':
        return (minimum(tasmax[k_tasmax[0]]['tasmax'], tasmin[k_tasmin[0]]['tasmin']), tasmin[k_tasmin[0]].attrs)

def swap_cmip6_tasmax_or_tasmin(model, tuple_id, variable, ssp='ssp245', target_run='ssp'):
    """
    for select GCMs with negative DTR, this swaps tasmax and tasmin so that tasmax > tasmin 
    """
    temp_var, attribs = _compute_max_or_min_temperature(model, tuple_id=tuple_id, variable=variable, ssp_or_historical=target_run)
    temp_var_computed = temp_var.persist()
    
    if target_run == 'historical':
        activity_id = 'CMIP'
    else:
        activity_id = 'ScenarioMIP'
    if model == 'GFDL-CM4':
        version = '20180701'
    elif model == 'GFDL-ESM4':
        if target_run == 'ssp':
            version = '20180701'
        else:
            version = '20190726'
    # store_filename = 'gs://impactlab-data/climate/source_data/CMIP6/{}-{}-{}.zarr'.format(model, variable, ssp)
    store_filename = ('gs://raw-305d04da/cmip6/{}/NOAA-GFDL/{}/{}/r1i1p1f1/day/{}/gr1/v{}.zarr'.format(activity_id, model, ssp, variable, version))
    store = fs.get_mapper(store_filename, check=False)
    
    ds_temp = temp_var_computed.to_dataset(name=variable)
    ds_temp.attrs = attribs
    
    ds_temp.chunk({'member_id': 1, 'time': 830, 'lat': len(ds_temp.lat), 'lon': len(ds_temp.lon)}).to_zarr(store, consolidated=True, mode="w")
    
    print("zarr store for {} {} saved to {}".format(model, ssp, store_filename))

In [145]:
# historical: _get_cmip6_dataset('GFDL-CM4', 'tasmin', 0, period='historical')
# ssp245: _get_cmip6_dataset('GFDL-CM4', 'tasmin', 1, period='ssp')
# _get_cmip6_dataset('GFDL-ESM4', 'tasmin', 0, period='historical')

In [144]:
# variables: tasmin and tasmax
# models: GFDL-ESM4 (all ssps included in downscaling) and GFDL-CM4 (ssps 245 and 585)
model = 'GFDL-ESM4'
gfdlcm4_scens = ['historical', 'ssp245', 'ssp585']
gfdlesm4_scens = ['historical', 'ssp370', 'ssp245', 'ssp126', 'ssp585']
for variable in ['tasmin', 'tasmax']:
    for i, tuple_id in enumerate([0, 1, 2, 3, 4]):
        if tuple_id != 0:
            target_run = 'ssp'
        else:
            target_run = 'historical'
        swap_cmip6_tasmax_or_tasmin(model, tuple_id, variable, gfdlesm4_scens[i], target_run=target_run)

"# variables: tasmin and tasmax\n# models: GFDL-ESM4 (all ssps included in downscaling) and GFDL-CM4 (ssps 245 and 585)\nmodel = 'GFDL-ESM4'\ngfdlcm4_scens = ['historical', 'ssp245', 'ssp585']\ngfdlesm4_scens = ['historical', 'ssp370', 'ssp245', 'ssp126', 'ssp585']\nfor variable in ['tasmin', 'tasmax']:\n    for i, tuple_id in enumerate([0, 1, 2, 3, 4]):\n        if tuple_id != 0:\n            target_run = 'ssp'\n        else:\n            target_run = 'historical'\n        swap_cmip6_tasmax_or_tasmin(model, tuple_id, variable, gfdlesm4_scens[i], target_run=target_run)"

In [142]:
ds = read_gcs_zarr('gs://raw-305d04da/cmip6/CMIP/NOAA-GFDL/GFDL-ESM4/historical/r1i1p1f1/day/tasmax/gr1/v20190726.zarr')

check original raw DTR for `GFDL-CM4` ssp245 and then "updated" DTR for the same model/ssp 

In [34]:
print("original raw CMIP6 data")
model = 'GFDL-CM4'
# _get_cmip6_dataset(model, variable, tuple_id, period='ssp')
dtr = compute_dtr(model, tuple_id=1)
check_dtr(dtr, model)

original raw CMIP6 data




In [35]:
print("pre-processed CMIP6 data")
model = 'GFDL-CM4'

check_dtr(dtr_gfdlcm4_ssp245, model)

pre-processed CMIP6 data




In [26]:
min_dtr = dtr_gfdlcm4_ssp245.min('time')
neg_count = min_dtr.where(min_dtr < 0).count().values
if neg_count > 0:
    warnings.warn("DTR has negative values for {} STILL".format('GFDL-CM4 ssp245'))