## humidity (hus)

In [None]:
import intake
import xarray as xr
#import xesmf as xe
import scipy
import numpy as np

import matplotlib.pyplot as plt

import os

import myFuncs
import myPlots


#import dask
#dask.config.set(**{'array.slicing.split_large_chunks': True})

In [None]:
models = [
        # 'IPSL-CM5A-MR', # 1 # super slow for some reason
         'GFDL-CM3',     # 2
        # 'GISS-E2-H',    # 3
        # 'bcc-csm1-1',   # 4
        # 'CNRM-CM5',     # 5
        # 'CCSM4',        # 6 # cannot concatanate files for rcp
        # 'HadGEM2-AO',   # 7
        # 'BNU-ESM',      # 8
        # 'EC-EARTH',     # 9
        # 'FGOALS-g2',    # 10
        # 'MPI-ESM-MR',   # 11
        # 'CMCC-CM',      # 12
        # 'inmcm4',       # 13
        # 'NorESM1-M',    # 14
        # 'CanESM2',      # 15 # rcp scenario: slicing with .sel does not work, says it 'contains no datetime objects'
        # 'MIROC5',       # 16
        # 'HadGEM2-CC',   # 17
        # 'MRI-CGCM3',    # 18
        # 'CESM1-BGC'     # 19
        ]

model = models[0]


historical = False
rcp85 = True

if historical:
    experiment = 'historical'
    period=slice('1970-01','1999-12')
    ensemble = 'r1i1p1'

    if model == 'GISS-E2-H':
        ensemble = 'r6i1p1'

    if model == 'CCSM4':
        ensemble = 'r5i1p1'


if rcp85:
    experiment = 'rcp85'
    period=slice('2070-01','2099-12')
    ensemble = 'r1i1p1'

    if model == 'GISS-E2-H':
        ensemble = 'r2i1p1'

    if model == 'CCSM4':
        ensemble = 'r5i1p1'


ds_dict = intake.cat.nci['esgf'].cmip5.search(
                                        model_id = model, 
                                        experiment = experiment,
                                        time_frequency = 'day', 
                                        realm = 'atmos', 
                                        ensemble = ensemble, 
                                        variable= 'hus').to_dataset_dict()

if not model == 'CanESM2':
    ds_orig =ds_dict[list(ds_dict.keys())[-1]].sel(time=period, lon=slice(0,360),lat=slice(-35,35))

elif (model == 'CanESM2' and experiment == 'historical'):
    ds_orig =ds_dict[list(ds_dict.keys())[-1]].isel(time=slice(43800, 43800+10950)).sel(lon=slice(0,360),lat=slice(-35,35))
elif (model == 'CanESM2' and experiment == 'rcp85'):
    ds_orig = ds_dict[list(ds_dict.keys())[-1]].isel(time=slice(365*64,365*94)).sel(lon=slice(0,360),lat=slice(-35,35))

    
# print(ds_pr.pr.encoding), will show that missing values are represented as 1e+20, although importing it with intake converts them to nan

In [None]:
ds_orig

In [None]:
haveDsOut = True
ds_hus = myFuncs.regrid_conserv(ds_orig, haveDsOut) # path='', model'')

In [None]:
hus = ds_hus.hus*1000
hus.attrs['units']= 'g/kg'
hus

In [None]:
hus.plev.data

In [None]:
da = hus.fillna(0)
hus_vInt = xr.DataArray(
    data=-scipy.integrate.simpson(da, hus.plev.data, axis=1, even='last')/hus.plev.data[0],
    dims=['time','lat', 'lon'],
    coords={'time': hus.time.data, 'lat': hus.lat.data, 'lon': hus.lon.data}
    ,attrs={'units':'mm/day'}
    )


# mass = (pressure/g) * surface area (g and surface are multiplied in numerator and denominator, so we can leave that out for the weighting)

# (orignial dataset have units of kg/kg. Mutiplying that with the mass of the column we get kg/m^2/day, which is the same as mm/day
# as all the water in the atmosphere does not fall as rain, this is an estimate of precipitable water)

In [None]:
myPlots.plot_snapshot(hus_vInt.isel(time=0), 'Greens', 'massweighted vertically integrated humidity', model)

In [None]:
myPlots.plot_snapshot(hus_vInt.mean(dim=('time'), keep_attrs=True), 'Greens', 'time mean humidty vInt', model)

In [None]:
# as function
def get_hus_snapshot_tMean(hus):
    da = hus.fillna(0)
    hus_vInt = xr.DataArray(
        data=-scipy.integrate.simpson(da, hus.plev.data, axis=1, even='last')/hus.plev.data[0],
        dims=['time','lat', 'lon'],
        coords={'time': hus.time.data, 'lat': hus.lat.data, 'lon': hus.lon.data}
        ,attrs={'units':'mm/day'}
        )
    return hus_vInt.isel(time=0), hus_vInt.mean(dim=('time'), keep_attrs=True)

In [None]:
aWeights = np.cos(np.deg2rad(hus.lat))
f, ax = plt.subplots(figsize=(15, 4))

hus_vInt.weighted(aWeights).mean(dim=('lat','lon'), keep_attrs=True).plot(ax=ax,label='spatial mean hus')
ax.set_title('spatial mean, massweighted vertically integrated humidity, model:' + model + ' exp:' + experiment)
plt.tight_layout()
plt.legend()

#hus_vInt_sMean= hus_vInt.weighted(aWeights).mean(dim=('lat','lon'), keep_attrs=True)

In [None]:
# as function
def calc_hus_sMean(hus):
    aWeights = np.cos(np.deg2rad(hus.lat))
    da = hus.fillna(0)
    hus_vInt = xr.DataArray(
        data=-scipy.integrate.simpson(da, hus.plev.data, axis=1, even='last'),
        dims=['time','lat', 'lon'],
        coords={'time': hus.time.data, 'lat': hus.lat.data, 'lon': hus.lon.data}
        ,attrs={'units':'mm/day'}
        )
    return hus_vInt.weighted(aWeights).mean(dim=('lat','lon'), keep_attrs=True)

## saving

In [None]:
# humidity examples
save = False
if save:
    folder = '/g/data/k10/cb4968/cmip5/' + model
    os.makedirs(folder, exist_ok=True)
    
    fileName = model + '_hus_examples' + experiment + '.nc'
    path = folder + '/' + fileName
    if os.path.exists(path):
        os.remove(path)    

    xr.Dataset({'hus_day': hus.isel(time=0), 'hus_tMean': hus_vInt.mean(dim=('time'), keep_attrs=True)}).to_netcdf(path)

In [None]:
# hus test, for local
save = False
if save:
    folder = '/g/data/k10/cb4968/data/cmip5/' + model
    os.makedirs(folder, exist_ok=True)
    
    fileName = model + '_hus4_' + experiment + '.nc'
    path = folder + '/' + fileName
    if os.path.exists(path):
        os.remove(path)    

    xr.Dataset({'hus_4days': hus.isel(time=slice(0,4))}).to_netcdf(path)

In [None]:
# humidity daily spatial mean
save = False
if save:
    folder = '/g/data/k10/cb4968/cmip5/' + model
    os.makedirs(folder, exist_ok=True)
    fileName = model + '_hus_sMean' + experiment + '.nc'
    path = folder + '/' + fileName
    if os.path.exists(path):
        os.remove(path)    

    xr.Dataset({'hus_sMean': hus_vInt.weighted(aWeights).mean(dim=('lat','lon'), keep_attrs=True)}).to_netcdf(path)