## CMIP5 - precipitation extremes and aggregation

In [None]:
import xarray as xr
import numpy as np
import skimage.measure as skm

import os
import timeit
from datetime import date

## functions

### regridder

In [None]:
ds_dict = intake.cat.nci['esgf'].cmip5.search(
                                        model_id = 'FGOALS-g2', 
                                        experiment = 'historical',
                                        time_frequency = 'day', 
                                        realm = 'atmos', 
                                        ensemble = 'r1i1p1', 
                                        variable= 'pr').to_dataset_dict()

ds_regrid = ds_dict[list(ds_dict.keys())[-1]].sel(time='1970-01-01', lon=slice(0,360),lat=slice(-30,30))


def regridder(ds_orig):
    ds_out = xr.Dataset(
        {
            "lat": (["lat"], ds_regrid.lat.data),
            "lon": (["lon"], ds_regrid.lon.data),
        }
        )
    regrid = xe.Regridder(ds_orig, ds_out, 'bilinear', periodic=True)
    
    return regrid(ds_orig)

### pr_examples

In [None]:
def get_pr_examples(var3d):
    # snapshot of daily scene
    pr_day = var3d.isel(time=0)
    
    # time mean of precipitation
    pr_tMean= var3d.mean(dim='time', keep_attrs=True)
    
    if save:
        fileName = model + '_pr_example_' + experiment + '.nc'
        path = folder + '/' + fileName
        if os.path.exists(path):
            os.remove(path)    

        xr.Dataset({'pr_day': pr_day, 'pr_tMean': pr_tMean}).to_netcdf(path)
    

### rx1day, rx5day 

In [None]:
def get_rxday(var3d):
    rx1day = var3d.resample(time='Y').max(dim='time')
    
    precip5day = var3d.resample(time='5D').mean(dim='time')
    rx5day = precip5day.resample(time='Y').max(dim='time')
    
    # .mean(dim=('time'),keep_attrs=True)
    # .mean(dim=('lat','lon'),keep_attrs=True)
    
    if save:
        fileName = model + '_pr_rxday_' + experiment + '.nc'
        path = folder + '/' + fileName
        if os.path.exists(path):
            os.remove(path)

        xr.Dataset({'rx1day': rx1day, 'rx5day': rx5day}).to_netcdf(path) 

### extreme percentiles

In [None]:
def get_high_percentiles(var3d):

    pr_97 = var3d.quantile(0.97,dim=('lat','lon'),keep_attrs=True)
    pr_97 = pr_97.drop('quantile',dim=None)


    pr_99 = var3d.quantile(0.99,dim=('lat','lon'),keep_attrs=True)
    pr_99 = pr_99.drop('quantile',dim=None)


    pr_999 = var3d.quantile(0.999,dim=('lat','lon'),keep_attrs=True)
    pr_999 = pr_999.drop('quantile',dim=None)
    
    
    if save:
        fileName = model + '_pr_extreme_' + experiment + '.nc'
        path = folder + '/' + fileName
        if os.path.exists(path):
            os.remove(path)

        xr.Dataset({'pr_97': pr_97, 'pr_99': pr_99, 'pr_999': pr_999}).to_netcdf(path) 
    

### convective object properties

In [None]:
# objects that touch across lon=0, lon=360 boundary are the same object, array(lat, lon)
def connect_boundary(array):
    s = np.shape(array)
    for row in np.arange(0,s[0]):
        if array[row,0]>0 and array[row,-1]>0:
            array[array==array[row,0]] = min(array[row,0],array[row,-1])
            array[array==array[row,-1]] = min(array[row,0],array[row,-1])

In [None]:
def get_object_props(var3d, aream, latm, lonm, lat, lon): #(lon in 0-360)
    conv_threshold = var3d.quantile(0.97,dim=('lat','lon')).mean(dim=('time'))
    
    o_pr, o_area = [], []
    for i in range(n_days): #len(precip.time)):
        
        pr_day = np.expand_dims(var3d.isel(time=i),axis=2)
        
        L = skm.label(var3d.isel(time=i).where(var3d.isel(time=i)>=conv_threshold,0)>0, background=0,connectivity=2)
        connect_boundary(L)
        labels = np.unique(L)[1:]
    
        obj3d = np.stack([(L==label) for label in labels],axis=2)*1
                
        o_pr = np.append(o_pr, np.sum(obj3d * pr_day * aream, axis=(0,1)) / np.sum(obj3d*aream, axis=(0,1)))
        o_area = np.append(o_area, np.sum(obj3d * aream, axis=(0,1)))    
                                 
            
    o_pr = xr.DataArray(o_pr, attrs=dict(description="area weighted mean pr in object", units="mm/day"))
    o_area = xr.DataArray(o_area, attrs=dict(description="area of object", units="km$^2$"))
    
    
    if save:
        fileName = model + '_pr_objects_' + experiment + '.nc'
        path = folder + '/' + fileName
        if os.path.exists(path):
            os.remove(path)
    
        xr.Dataset({'o_pr': o_pr, 'o_area': o_area}).to_netcdf(path) 
    
    

### aggregation index

In [None]:
# Great circle distance (Haversine formula) (lon in 0-360)
def hav_dist(lat1, lon1, lat2, lon2, R):

    # radius of earth in km
    #R = 6373.0

    lat1 = np.deg2rad(lat1)                       
    lon1 = np.deg2rad(lon1-180)     
    lat2 = np.deg2rad(lat2)                       
    lon2 = np.deg2rad(lon2-180)

    # Haversine formula
    h = np.sin((lat2 - lat1)/2)**2 + np.cos(lat1)*np.cos(lat2) * np.sin((lon2 - lon1)/2)**2

    # distance from Haversine function:
    # h = sin(theta/2)^2
    # central angle, theta:
    # theta = (great circle distance) / radius 
    # d = R * sin^-1(sqrt(h))*2 

    return 2 * R * np.arcsin(np.sqrt(h))

In [None]:
def calculate_rome(array, labels, aream, latm, lonm, lat, lon, R):
    sL = np.shape(array)
    ROME_allPairs = []
        
    if len(labels) ==1:
        ROME_allPairs = np.sum((array==labels)*1 * aream)

    else:
        for idx, labeli in enumerate(labels[0:-1]):
            
            # find coordinates of object i
            I, J = zip(*np.argwhere(array==labeli))
            I = list(I)
            J = list(J)

            # area of object i
            oi_area = np.sum(np.squeeze(aream)[I,J])

            # shortest distance from object i        
            # count the number of gridboxes
            Ni = len(I)

            # replicate each gridbox lon and lat to Ni 2D slices the shape of L
            lati3d = np.tile(lat[I],reps =[sL[0], sL[1], 1])
            loni3d = np.tile(lon[J],reps =[sL[0], sL[1], 1])

            # create corresponding 3D matrix from Ni copies of 
            # the mesh grid lon, lat, this metrix only needs to 
            # be recreated when Ni increases from previous loop
            if Ni > np.shape(lonm)[2]:
                lonm = np.tile(lonm[:,:,0:1],reps =[1, 1, Ni])
                latm = np.tile(latm[:,:,0:1],reps =[1, 1, Ni])
            # Otherwise you can index the previously created matrix to match lati3d, loni3d

            # distance from gridbox to every other point in the domain
            p_hav = hav_dist(lati3d,loni3d,latm[:,:,0:Ni],lonm[:,:,0:Ni], R)

            # minimum in the third dimension gives shortest distance from 
            # object i to every other point in the domain
            p_dist = np.amin(p_hav, axis=2)

            # pick out desired coordinates of p_dist, from the coordinates of the
            # unique pair object j
            # the minimum of the coordinates in p_dist will be the shortest distance.
            for labelj in labels[idx+1:]:

                # coordinates of object j
                I, J = zip(*np.argwhere(array==labelj))

                # area of object j
                oj_area = np.sum(np.squeeze(aream)[I,J])

                # ROME of unique pair
                large_area = np.maximum(oi_area, oj_area)
                small_area = np.maximum(oi_area, oj_area)
                ROME_pair = large_area + np.minimum(small_area, (small_area/np.amin(p_dist[I,J]))**2)
                ROME_allPairs = np.append(ROME_allPairs, ROME_pair)
            
    return np.mean(ROME_allPairs)
                    

In [None]:
def get_aggregation_index(var3d, aream, latm, lonm, lat, lon, R, n_days, model):
    conv_threshold = var3d.quantile(0.97,dim=('lat','lon')).mean(dim=('time'))
    
    n = 8
    aWeights = np.cos(np.deg2rad(var3d.lat))
    aWeights.name = "weights"
    
    scene_oNumber, scene_areaf, ROME, ROME_n = [], [], [], []
    for i in range(n_days): #len(precip.time)):
        
        # if var3d.time[i].dt.strftime('%b') == 'Jan':
        #     print(var3d.time[i].dt.strftime('%Y' '/' '%m'))
        
        
        conv_day = (var3d.isel(time=i).where(var3d.isel(time=i)>=conv_threshold,0)>0)*1
        
        L = skm.label(conv_day, background=0, connectivity=2)
        connect_boundary(L)
        
        scene_areaf.append(conv_day.weighted(aWeights).mean(dim=('lat','lon')))
        
        labels_all = np.unique(L)[1:]
        scene_oNumber.append(len(labels_all))
        
        ROME.append(calculate_rome(L, labels_all, aream, latm, lonm, lat, lon, R))
        
        # n largest objects (8)
        # index of n largest objects in L        
        if len(labels_all) <= n:
            labels_n = labels
        else:
            obj3d = np.stack([(L==label) for label in labels_all],axis=2)*1
            o_areaL = np.sum(obj3d * aream, axis=(0,1))
            labels_n = labels_all[o_areaL.argsort()[-n:]]
                    
        ROME_n.append(calculate_rome(L,labels_n, aream, latm, lonm, lat, lon, R))

                    
    scene_oNumber = xr.DataArray(scene_oNumber)
    scene_areaf = xr.DataArray(scene_areaf, attrs=dict(description="areafraction of convection from percentile threshold"))
    ROME = xr.DataArray(ROME)
    ROME_n = xr.DataArray(ROME_n, attrs=dict(description="ROME calculated from n= {} largest objects in scene".format(n)))
    
                    
    if save:
        fileName = model + '_pr_aggScene_' + experiment + '.nc'
        path = folder + '/' + fileName
        if os.path.exists(path):
            os.remove(path)

        xr.Dataset({'scene_oNumber': scene_oNumber, 'ROME': ROME, 'ROME_n': ROME_n, 'scene_areaf': scene_areaf}).to_netcdf(path) 
    
    

### tas_examples

In [None]:
def get_tas_examples(var3d):
    # snapshot of daily scene
    tas_day = var3d.isel(time=0)
    
    # time mean of precipitation
    tas_tMean= var3d.mean(dim='time', keep_attrs=True)
    
    if save:
        fileName = model + '_tas_example_' + experiment + '.nc'
        path = folder + '/' + fileName
        if os.path.exists(path):
            os.remove(path)    

        xr.Dataset({'tas_day': tas_day, 'tas_tMean': tas_tMean}).to_netcdf(path)

### hus examples

In [None]:
def get_hus_examples(var3d, model):

    if model == 'IPSL-CM5A-MR':
        # snapshot of daily scene
        hus_day = var3d.isel(time=0).mean(dim='lev',keep_attrs=True)

        # time mean of precipitation
        hus_tMean= var3d.mean(dim=('lev','time'), keep_attrs=True)
        
    else:
        # snapshot of daily scene
        hus_day = var3d.isel(time=0).mean(dim='plev',keep_attrs=True)

        # time mean of precipitation
        hus_tMean= var3d.mean(dim=('plev','time'), keep_attrs=True)
    
    if save:
        fileName = model + '_hus_example_' + experiment + '.nc'
        path = folder + '/' + fileName
        if os.path.exists(path):
            os.remove(path)    

        xr.Dataset({'hus_day': hus_day, 'hus_tMean': hus_tMean}).to_netcdf(path)

In [None]:
a = 'a'
if a == 'a':
    print('executes')

### hus daily

In [None]:
def get_hus_daily(var3d, lat):
    # spatial mean
    aWeights = np.cos(np.deg2rad(lat))
    aWeights.name = "aWeights"
    
    if model == 'IPSL-CM5A-MR':
        hus_sMean= var3d.weighted(aWeights).mean(dim=('lev','lat','lon'), keep_attrs=True)
        
    else:
        hus_sMean= var3d.weighted(aWeights).mean(dim=('plev','lat','lon'), keep_attrs=True)
    
    if save:
        fileName = model + '_hus_daily_' + experiment + '.nc'
        path = folder + '/' + fileName
        if os.path.exists(path):
            os.remove(path)    

        xr.Dataset({'hus_sMean': hus_sMean}).to_netcdf(path)

## run

In [None]:
models = [
        # 'IPSL-CM5A-MR', # 1 # hus lev instead of plev
        # 'GFDL-CM3',     # 2
        # 'GISS-E2-H',    # 3
        # 'bcc-csm1-1',   # 4
        # 'CNRM-CM5',     # 5
        # 'CCSM4',        # 6 # hus possibly wrong ensemble
        # 'HadGEM2-AO',   # 7
        # 'BNU-ESM',      # 8 # tas did not work
        'EC-EARTH',     # 9 # tas did not work
        'FGOALS-g2',    # 10 # tas slicing
        'MPI-ESM-MR',   # 11
        'CMCC-CM',      # 12
        'inmcm4',       # 13
        'NorESM1-M',    # 14
        'CanESM2',      # 15 # pr indexing time period
        'MIROC5',       # 16
        'HadGEM2-CC',   # 17
        'MRI-CGCM3',    # 18
        'CESM1-BGC'     # 19
        ]

historical = True
rcp85 = False

pr_examples = False
rxday = False
high_percentiles = False
object_props = False
aggregation_index = False

tas_examples = False

hus_examples = True
hus_daily = True

save = True

In [None]:
# # precipitation extremes and aggregation
# for model in models:
#     start = timeit.default_timer()

#     ensemble = 'r1i1p1'
    
#     if historical:
#         experiment = 'historical'
#         period=slice('1970-01','1999-12') #'1970-01-01','1999-12-31'


#         if model == 'GISS-E2-H':
#             ensemble = 'r6i1p1'


#     if rcp85:
#         experiment = 'rcp85'
#         period=slice('2070-01','2099-12')

#         if model == 'GISS-E2-H':
#             ensemble = 'r2i1p1'



#     ds_dict = intake.cat.nci['esgf'].cmip5.search(
#                                             model_id = model, 
#                                             experiment = experiment,
#                                             time_frequency = 'day', 
#                                             realm = 'atmos', 
#                                             ensemble = ensemble, 
#                                             variable= 'pr').to_dataset_dict()

#     ds_pr = ds_dict[list(ds_dict.keys())[-1]].sel(time=period, lon=slice(0,360),lat=slice(-30,30))
#     precip = regridder(ds_pr).pr*60*60*24
#     precip.attrs['units']= 'mm/day'
#     n_days = 2 #len(precip.time)

#     R = 6371.0 #km
#     lat = precip.lat
#     lon = precip.lon
#     lonm, latm = np.meshgrid(lon, lat)
#     dlon = lon[1]-lon[0]
#     dlat = lat[1]-lat[0]
#     aream = np.cos(np.deg2rad(latm))*np.float64(dlon*dlat*R**2*(np.pi/180)**2)
    
#     aream = np.expand_dims(aream,axis=2)
#     latm = np.expand_dims(latm,axis=2)
#     lonm = np.expand_dims(lonm,axis=2)


#     folder = '/g/data/k10/cb4968/cmip5/' + model
#     os.makedirs(folder, exist_ok=True)


#     if pr_examples:
#         get_pr_examples(precip)

#     if rxday:
#         get_rxday(precip)

#     if high_percentiles:
#         get_high_percentiles(precip)

#     if object_props:
#         get_object_props(precip, aream, latm, lonm, lat, lon)

#     if aggregation_index:    
#         get_aggregation_index(precip, aream, latm, lonm, lat, lon, R, n_days, model)
    


#     stop = timeit.default_timer()
#     print('model: {} took {} minutes to finsih'.format(model, (stop-start)/60))






In [None]:
# tas
# for model in models:
#     start = timeit.default_timer()

#     if historical:
#         experiment = 'historical'
#         period=slice('1970-01','1999-12') #'1970-01-01','1999-12-31'
#         ensemble = 'r1i1p1'

#         if model == 'GISS-E2-H':
#             ensemble = 'r6i1p1'


#     if rcp85:
#         experiment = 'rcp85'
#         period=slice('2070-01','2099-12')

#         if model == 'GISS-E2-H':
#             ensemble = 'r2i1p1'



#     ds_dict = intake.cat.nci['esgf'].cmip5.search(
#                                             model_id = model, 
#                                             experiment = experiment,
#                                             time_frequency = 'mon', 
#                                             realm = 'atmos', 
#                                             ensemble = ensemble, 
#                                             variable= 'tas').to_dataset_dict()

#     ds_tas = ds_dict[list(ds_dict.keys())[-1]].sel(time=period, lon=slice(0,360),lat=slice(-30,30))
#     tas = regridder(ds_tas).tas - 273.15
#     tas.attrs['units']= 'deg(C)'
#     n_days = 2 #len(precip.time)

#     R = 6371.0 #km
#     lat = tas.lat
#     lon = tas.lon
#     lonm, latm = np.meshgrid(lon, lat)
#     dlon = lon[1]-lon[0]
#     dlat = lat[1]-lat[0]
#     aream = np.cos(np.deg2rad(latm))*np.float64(dlon*dlat*R**2*(np.pi/180)**2)
    
#     aream = np.expand_dims(aream,axis=2)
#     latm = np.expand_dims(latm,axis=2)
#     lonm = np.expand_dims(lonm,axis=2)


#     folder = '/g/data/k10/cb4968/cmip5/' + model
#     os.makedirs(folder, exist_ok=True)


#     if tas_examples:
#         get_tas_examples(tas)
    


#     stop = timeit.default_timer()
#     print('model: {} took {} minutes to finsih'.format(model, (stop-start)/60))


In [None]:
# hus
for model in models:
    start = timeit.default_timer()

    if historical:
        experiment = 'historical'
        period=slice('1970-01','1999-12') #'1970-01-01','1999-12-31'
        ensemble = 'r1i1p1'

        if model == 'GISS-E2-H':
            ensemble = 'r6i1p1'


    if rcp85:
        experiment = 'rcp85'
        period=slice('2070-01','2099-12')

        if model == 'GISS-E2-H':
            ensemble = 'r2i1p1'



    ds_dict = intake.cat.nci['esgf'].cmip5.search(
                                            model_id = model, 
                                            experiment = experiment,
                                            time_frequency = 'day', 
                                            realm = 'atmos', 
                                            ensemble = ensemble, 
                                            variable= 'hus').to_dataset_dict()

    ds_hus = ds_dict[list(ds_dict.keys())[-1]].sel(time=period, lon=slice(0,360),lat=slice(-30,30))
    ds_hus = ds_hus.where(ds_hus.hus < 1e+20) 
    hus = regridder(ds_hus).hus*1000
    hus.attrs['units']= 'g/kg'
    n_days = 2 #len(precip.time)

    R = 6371.0 #km
    lat = hus.lat
    lon = hus.lon
    lonm, latm = np.meshgrid(lon, lat)
    dlon = lon[1]-lon[0]
    dlat = lat[1]-lat[0]
    aream = np.cos(np.deg2rad(latm))*np.float64(dlon*dlat*R**2*(np.pi/180)**2)
    
    aream = np.expand_dims(aream,axis=2)
    latm = np.expand_dims(latm,axis=2)
    lonm = np.expand_dims(lonm,axis=2)


    folder = '/g/data/k10/cb4968/cmip5/' + model
    os.makedirs(folder, exist_ok=True)


    if hus_examples:
        get_hus_examples(hus, model)
    
    
    if hus_daily:
        get_hus_daily(hus, lat)


    stop = timeit.default_timer()
    print('model: {} took {} minutes to finsih'.format(model, (stop-start)/60))

# saving

In [None]:
# prepare for saving
numberIndex = xr.DataArray(
    data = numberIndex, 
    attrs=dict(description="Number of objects in scene", units="Nb"))

areaf = xr.DataArray(
    data = areaf, 
    attrs=dict(description="areafraction covered by convection in scene", units=""))

In [None]:
# prepare for saving
o_pr = xr.DataArray(
    data = o_pr, 
    attrs=dict(description="area weighted mean pr in object", units="mm/day"))

o_area = xr.DataArray(
    data = o_pr, 
    attrs=dict(description="area of object", units="km$^2$"))

In [None]:
# object properties
save = False
if save:
    fileName = model + '_pr_objects_' + experiment + '.nc'
    path = folder + '/' + fileName
    if os.path.exists(path):
        os.remove(path)

    xr.Dataset({'o_lat': o_lat, 'o_lon': o_lon, 'o_pr': o_pr, 'o_area': o_area}).to_netcdf(path) 

In [None]:
# aggregation index
save = False
if save:
    fileName = model + '_pr_aggScene_' + experiment + '.nc'
    path = folder + '/' + fileName
    if os.path.exists(path):
        os.remove(path)

    xr.Dataset({'scene_oNumber': scene_oNumber, 'ROME': ROME, 'ROME_n': ROME_n, 'scene_areaf': scene_areaf}).to_netcdf(path) 

## check

In [None]:
#model = 'GFDL-CM3'

In [None]:
#ds_local = xr.open_dataset('/g/data/k10/cb4968/cmip5/' + model + '/' + model + '_pr_example_' + experiment + '.nc')

In [None]:
#ds_local = xr.open_dataset('/g/data/k10/cb4968/cmip5/' + model + '/' + model + '_pr_rxday_' + experiment + '.nc')

In [None]:
#ds_local = xr.open_dataset('/g/data/k10/cb4968/cmip5/' + model + '/' + model + '_pr_extreme_' + experiment + '.nc')

In [None]:
#ds_local = xr.open_dataset('/g/data/k10/cb4968/cmip5/' + model + '/' + model + '_object_props_' + experiment + '.nc')

In [None]:
#ds_local = xr.open_dataset('/g/data/k10/cb4968/cmip5/' + model + '/' + model + '_aggregation_index_' + experiment + '.nc')