In [1]:
%matplotlib inline
import intake
import xarray as xr
import os 
import pandas as pd
import numpy as np
import zarr 
import gcsfs
from xarray.ufuncs import maximum, minimum
import rhg_compute_tools.kubernetes as rhgk

import matplotlib.pyplot as plt
import re
import yaml
import ast
import warnings 

In [2]:
from science_validation_manual import read_gcs_zarr

In [3]:
fs = gcsfs.GCSFileSystem(token='/opt/gcsfuse_tokens/impactlab-data.json')

In [4]:
'''client, cluster = rhgk.get_standard_cluster()
cluster'''

'client, cluster = rhgk.get_standard_cluster()\ncluster'

In [5]:
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

In [6]:
def _paramfile_to_tuple(model, variable):
    """
    takes in a model and variable, returns tuple from parameter file. 
    """
    param_file = '/home/jovyan/downscaling/downscale/workflows/parameters/{}-{}.yaml'.format(model, variable)
    with open(param_file, 'r') as f:
        var_dict = yaml.full_load(f)
    # some string parsing 
    line = var_dict['jobs']
    line1 = re.sub(r"\n", "", line)
    line2 = re.sub(r"[\[\]]", "", line1)
    return ast.literal_eval(line2.strip())

def _get_cmip6_dataset(model, variable, tuple_id, period='ssp'):
    d_ssp = _paramfile_to_tuple(model, variable)[tuple_id][period]
    cat = col.search(
              activity_id=d_ssp['activity_id'],
              experiment_id=d_ssp['experiment_id'],
              table_id=d_ssp['table_id'],
              variable_id=d_ssp['variable_id'],
              source_id=d_ssp['source_id'],
              member_id=d_ssp['member_id'],
              grid_label=d_ssp['grid_label'],
              version=int(d_ssp['version']),
          )
    return cat.to_dataset_dict(progressbar=False)

def compute_dtr(model, tuple_id=1):
    """
    takes in tasmax and tasmin Datasets, computes DTR (returns it lazily)
    """
    tasmax = _get_cmip6_dataset(model, 'tasmax', tuple_id)
    k_tasmax = list(tasmax.keys())
    if len(k_tasmax) != 1:
        raise ValueError("there is likely an issue with {} tasmax".format(model))
    tasmin = _get_cmip6_dataset(model, 'tasmin', tuple_id)
    k_tasmin = list(tasmin.keys())
    if len(k_tasmin) != 1:
        raise ValueError("there is likely an issue with {} tasmin".format(model))
    return tasmax[k_tasmax[0]]['tasmax'] - tasmin[k_tasmin[0]]['tasmin'] 

def check_dtr(dtr, model):
    """
    """
    min_dtr = dtr.min('time')
    neg_count = min_dtr.where(min_dtr <= 0).count().values
    if neg_count > 0:
        warnings.warn("DTR has negative values for {}".format(model))

checking models 

DTR negative: 
- GFDL-ESM4
- GFDL-CM4

DTR positive: 
- CanESM5
- INM-CM4-8
- INM-CM5-0
- NorESM2-MM
- NorESM2-LM
- MIROC6
- EC-Earth3-Veg-LR
- EC-Earth3-Veg
- EC-Earth3
- KIOST-ESM
- MIROC-ES2L
- MPI-ESM1-2-LR
- MPI-ESM1-2-HR
- NESM3
- MRI-ESM2-0
- FGOALS-g3
- CMCC-ESM2
- BCC-CSM2-MR
- AWI-CM-1-1-MR
- ACCESS-CM2

Parameter files to add or fix (could not check DTR): 
- UKESM1-0-LL
- ACCESS-ESM1-5

Tasmin parameter files to add (could not check DTR): 
- CAMS-CSM1-0

In [None]:
model = 'NorESM2-MM'

In [None]:
dtr = compute_dtr(model, tuple_id=0)
check_dtr(dtr, model)

### for models with negative DTR, swap tasmax and tasmin ### 

GFDL-CM4: historical, ssp245, ssp585

GFDL-ESM4: historical, ssp126, ssp245, ssp370, ssp585

In [12]:
def _compute_max_or_min_temperature(model, tuple_id=1, variable='tasmax'):
    """
    takes in a model source_id, pulls in the required parameter file info, 
    gets the tasmax and tasmin Datasets from the CMIP6 archive, computes tasmax or tasmin (returns it lazily)
    """
    tasmax = _get_cmip6_dataset(model, 'tasmax', tuple_id)
    k_tasmax = list(tasmax.keys())
    if len(k_tasmax) != 1:
        raise ValueError("there is likely an issue with {} tasmax".format(model))
    tasmin = _get_cmip6_dataset(model, 'tasmin', tuple_id)
    k_tasmin = list(tasmin.keys())
    if len(k_tasmin) != 1:
        raise ValueError("there is likely an issue with {} tasmin".format(model))
        
    # compute max or min 
    if variable == 'tasmax':
        return (maximum(tasmax[k_tasmax[0]]['tasmax'], tasmin[k_tasmin[0]]['tasmin']), tasmax[k_tasmax[0]].attrs)
    elif variable == 'tasmin':
        return (minimum(tasmax[k_tasmax[0]]['tasmax'], tasmin[k_tasmin[0]]['tasmin']), tasmin[k_tasmin[0]].attrs)

def swap_cmip6_tasmax_or_tasmin(model, tuple_id, variable, ssp):
    """
    for select GCMs with negative DTR, this swaps tasmax and tasmin so that tasmax > tasmin 
    """
    temp_var, attribs = _compute_max_or_min_temperature(model, tuple_id=tuple_id, variable=variable)
    temp_var_computed = temp_var.persist()
    
    store_filename = 'gs://impactlab-data/climate/source_data/CMIP6/downscaling/{}-{}-{}.zarr'.format(model, variable, ssp)
    store = fs.get_mapper(store_filename, check=False)
    
    ds_temp = temp_var_computed.to_dataset(name=variable)
    ds_temp.attrs = attribs
    
    ds_temp.chunk({'member_id': 1, 'time': 830, 'lat': len(ds_temp.lat), 'lon': len(ds_temp.lon)}).to_zarr(store, consolidated=True, mode="w")

In [13]:
# historical: _get_cmip6_dataset('GFDL-CM4', 'tasmin', 0, period='historical')
# ssp245: _get_cmip6_dataset('GFDL-CM4', 'tasmin', 1, period='ssp')
# ssp585: _get_cmip6_dataset('GFDL-CM4', 'tasmin', 2, period='ssp')

In [14]:
# _get_cmip6_dataset('GFDL-CM4', 'tasmax', 2, period='ssp')

In [16]:
swap_cmip6_tasmax_or_tasmin('GFDL-CM4', 1, 'tasmin', 'ssp245')

  return (minimum(tasmax[k_tasmax[0]]['tasmax'], tasmin[k_tasmin[0]]['tasmin']), tasmin[k_tasmin[0]].attrs)
  f(self.variable, other_variable)
  f(self_data, other_data) if not reflexive else f(other_data, self_data)


In [None]:
(tasmax, attribs) = compute_max_or_min_temperature('GFDL-CM4', tuple_id=1, variable='tasmax')
tasmax_computed = tasmax.persist()

test_store_filename = 'gs://impactlab-data/climate/source_data/CMIP6/downscaling/gfdl-cm4-tasmax-historical.zarr' 
test_store = fs.get_mapper(test_store_filename, check=False)

tasmax_computed = tasmax_computed.to_dataset(name='tasmax')
tasmax_computed.attrs = attribs

# write new zarr (note the rechunking is necessary to avoid an error in saving the zarr where chunks are different sizes)
tasmax_computed.chunk({'member_id': 1, 'time': 830, 'lat': len(tasmax_computed.lat), 'lon': len(tasmax_computed.lon)}).to_zarr(test_store, consolidated=True, mode="w")

In [17]:
tasmax_gfdlcm4_ssp245 = read_gcs_zarr('gs://impactlab-data/climate/source_data/CMIP6/downscaling/{}-{}-{}.zarr'.format('GFDL-CM4', 'tasmax', 'ssp245'))
tasmin_gfdlcm4_ssp245 = read_gcs_zarr('gs://impactlab-data/climate/source_data/CMIP6/downscaling/{}-{}-{}.zarr'.format('GFDL-CM4', 'tasmin', 'ssp245'))

In [23]:
dtr_gfdlcm4_ssp245 = tasmax_gfdlcm4_ssp245['tasmax'] - tasmin_gfdlcm4_ssp245['tasmin']

In [26]:
min_dtr = dtr_gfdlcm4_ssp245.min('time')
neg_count = min_dtr.where(min_dtr < 0).count().values
if neg_count > 0:
    warnings.warn("DTR has negative values for {} STILL".format('GFDL-CM4 ssp245'))