# Making a nice table 

In [1]:
import xarray as xr
xr.set_options(display_style='html')
import intake
import cftime
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import numpy as np
import pandas as pd
import datetime
import seaborn as sns

In [2]:
def global_mean(ds):
    if 'lat' in ds:
        la = 'lat'
        lo = 'lon'
        weights = np.cos(np.deg2rad(ds.lat))
    elif 'latitude' in ds:
        weights = np.cos(np.deg2rad(ds.latitude))
        la = 'latitude'
        lo = 'longitude'
    else:
        weights = np.cos(np.deg2rad(ds.lat))
        
    return ds.weighted(weights).mean(la)

In [4]:
#cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
#cat_url = "/mnt/craas1-ns9989k-geo4992/data/cmip6.json"
cat_url = '/mnt/craas2-ns9988k/data/catalogs/cmip6.json'
col = intake.open_esm_datastore(cat_url)
col
varl = ['tas','clt']

models = ['CESM2','CanESM5-1','EC-Earth3-Veg-LR','GISS-E2-2-H','E3SM-1-0']
cat = col.search(source_id = models, 
                 experiment_id=['historical'], 
                 table_id=['Amon',], 
                 variable_id=varl, 
                 member_id=['r1i1p1f1'],
                )
cat.df.head()


Unnamed: 0,variable_id,table_id,source_id,experiment_id,member_id,grid_label,time_range,activity_id,institution_id,version,path,dcpp_init_year
0,tas,Amon,CanESM5-1,historical,r1i1p1f1,gn,185001-201412,CMIP,CCCma,v20190429,/mnt/craas2-ns9988k-dl-ns9560k/ESGF/CMIP6/CMIP...,
1,clt,Amon,E3SM-1-0,historical,r1i1p1f1,gr,185001-187412,CMIP,E3SM-Project,v20190913,/mnt/craas2-ns9988k-dl-ns9560k/ESGF/CMIP6/CMIP...,
2,clt,Amon,E3SM-1-0,historical,r1i1p1f1,gr,187501-189912,CMIP,E3SM-Project,v20190913,/mnt/craas2-ns9988k-dl-ns9560k/ESGF/CMIP6/CMIP...,
3,clt,Amon,E3SM-1-0,historical,r1i1p1f1,gr,190001-192412,CMIP,E3SM-Project,v20190913,/mnt/craas2-ns9988k-dl-ns9560k/ESGF/CMIP6/CMIP...,
4,clt,Amon,E3SM-1-0,historical,r1i1p1f1,gr,192501-194912,CMIP,E3SM-Project,v20190913,/mnt/craas2-ns9988k-dl-ns9560k/ESGF/CMIP6/CMIP...,


In [5]:
cat.df['source_id'].unique()

array(['CanESM5-1', 'E3SM-1-0', 'EC-Earth3-Veg-LR', 'GISS-E2-2-H',
       'CESM2'], dtype=object)

In [6]:
cat.esmcat.aggregation_control.groupby_attrs = ['activity_id','experiment_id', 'source_id','table_id','grid_label']
cat.esmcat.aggregation_control.groupby_attrs

['activity_id', 'experiment_id', 'source_id', 'table_id', 'grid_label']

### Create dictionary from the list of datasets we found
- This step may take several minutes so be patient!

In [7]:
dset_dict = cat.to_dataset_dict(zarr_kwargs={'use_cftime':True})


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.experiment_id.source_id.table_id.grid_label'


  var = coder.decode(var, name=name)
  var = coder.decode(var, name=name)


In [8]:
list(dset_dict.keys())

['CMIP.historical.CanESM5-1.Amon.gn',
 'CMIP.historical.GISS-E2-2-H.Amon.gn',
 'CMIP.historical.CESM2.Amon.gn',
 'CMIP.historical.E3SM-1-0.Amon.gr',
 'CMIP.historical.EC-Earth3-Veg-LR.Amon.gr']

### simplify keys:

In [9]:
ds_dict = {}
for k in dset_dict.keys():
    mod = k.split('.')[2]
    ds_dict[mod] = dset_dict[k]

## Compute global mean (replace by whatever you want to calculate)

In [12]:
ds_dict[mod]

Unnamed: 0,Array,Chunk
Bytes,30.94 kiB,16 B
Shape,"(1980, 2)","(1, 2)"
Dask graph,1980 chunks in 336 graph layers,1980 chunks in 336 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 30.94 kiB 16 B Shape (1980, 2) (1, 2) Dask graph 1980 chunks in 336 graph layers Data type object numpy.ndarray",2  1980,

Unnamed: 0,Array,Chunk
Bytes,30.94 kiB,16 B
Shape,"(1980, 2)","(1, 2)"
Dask graph,1980 chunks in 336 graph layers,1980 chunks in 336 graph layers
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.50 kiB,2.50 kiB
Shape,"(160, 2)","(160, 2)"
Dask graph,1 chunks in 821 graph layers,1 chunks in 821 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.50 kiB 2.50 kiB Shape (160, 2) (160, 2) Dask graph 1 chunks in 821 graph layers Data type float64 numpy.ndarray",2  160,

Unnamed: 0,Array,Chunk
Bytes,2.50 kiB,2.50 kiB
Shape,"(160, 2)","(160, 2)"
Dask graph,1 chunks in 821 graph layers,1 chunks in 821 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.00 kiB,5.00 kiB
Shape,"(320, 2)","(320, 2)"
Dask graph,1 chunks in 821 graph layers,1 chunks in 821 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 5.00 kiB 5.00 kiB Shape (320, 2) (320, 2) Dask graph 1 chunks in 821 graph layers Data type float64 numpy.ndarray",2  320,

Unnamed: 0,Array,Chunk
Bytes,5.00 kiB,5.00 kiB
Shape,"(320, 2)","(320, 2)"
Dask graph,1 chunks in 821 graph layers,1 chunks in 821 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,386.72 MiB,200.00 kiB
Shape,"(1, 1980, 160, 320)","(1, 1, 160, 320)"
Dask graph,1980 chunks in 496 graph layers,1980 chunks in 496 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 386.72 MiB 200.00 kiB Shape (1, 1980, 160, 320) (1, 1, 160, 320) Dask graph 1980 chunks in 496 graph layers Data type float32 numpy.ndarray",1  1  320  160  1980,

Unnamed: 0,Array,Chunk
Bytes,386.72 MiB,200.00 kiB
Shape,"(1, 1980, 160, 320)","(1, 1, 160, 320)"
Dask graph,1980 chunks in 496 graph layers,1980 chunks in 496 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,386.72 MiB,200.00 kiB
Shape,"(1, 1980, 160, 320)","(1, 1, 160, 320)"
Dask graph,1980 chunks in 496 graph layers,1980 chunks in 496 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 386.72 MiB 200.00 kiB Shape (1, 1980, 160, 320) (1, 1, 160, 320) Dask graph 1980 chunks in 496 graph layers Data type float32 numpy.ndarray",1  1  320  160  1980,

Unnamed: 0,Array,Chunk
Bytes,386.72 MiB,200.00 kiB
Shape,"(1, 1980, 160, 320)","(1, 1, 160, 320)"
Dask graph,1980 chunks in 496 graph layers,1980 chunks in 496 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [13]:
ds_vals_mod_var = {}
for mod in ds_dict.keys():
    print(mod)
    ds_vals_mod_var[mod]=dict()
    _ds = ds_dict[mod]
    _ds_m = global_mean(_ds.sel(time=slice('1990','2015'))).mean()
    for v in varl:
        if v in _ds_m:
            val = float(_ds_m[v].values)
            ds_vals_mod_var[mod][v] = val
        else:
            ds_vals_mod_var[mod][v] = np.nan
            

CanESM5-1
GISS-E2-2-H
CESM2
E3SM-1-0
EC-Earth3-Veg-LR


  x = np.divide(x1, x2, out)


In [14]:
ds_vals_mod_var_meas = {}
for mod in ds_dict.keys():
    ds_vals_mod_var_meas[mod]=dict()
    _ds = ds_dict[mod].copy()
    meas = {}
    
    meas['mean'] = global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').mean('time'))
    meas['median']= global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').median('time'))
    meas['std'] = global_mean(_ds.sel(time=slice('1990','2015')).mean('lon').std('time'))
    for v in varl:
        ds_vals_mod_var_meas[mod][v] =dict()
        for m in meas:
            if v in meas[m]:
                try:
                    ds_vals_mod_var_meas[mod][v][m] = float(meas[m][v].squeeze().values)
                except:
                    ds_vals_mod_var_meas[mod][v][m] = np.nan
                    
            else:
                ds_vals_mod_var_meas[mod][v][m] = np.nan
            

## Extract units

In [15]:
_ds = ds_dict[models[0]]
u_dic = {v:_ds[v].units for v in varl}
units = pd.DataFrame.from_dict(u_dic,orient='index',columns=['units'] )#index=['units'])

units

Unnamed: 0,units
tas,K
clt,%


In [16]:
import pandas as pd

In [17]:
df_nice =pd.DataFrame.from_dict(ds_vals_mod_var)
df_nice = pd.concat([units,df_nice], axis=1)
#pd.options.display.float_format = '{:20,.2f}'.format
pd.set_option('display.float_format','{:20,.2f}'.format)
display(df_nice)

Unnamed: 0,units,CanESM5-1,GISS-E2-2-H,CESM2,E3SM-1-0,EC-Earth3-Veg-LR
tas,K,287.65,,287.94,287.47,287.54
clt,%,,,69.74,67.78,64.1


In [18]:
ls_meas = list()
for m in ds_vals_mod_var_meas:
    _df = pd.DataFrame.from_dict(ds_vals_mod_var_meas[m])
    _df['model']=m
    ls_meas.append(_df)

In [19]:
df_out = pd.concat(ls_meas).reset_index().sort_values('index').set_index(['index','model'])

In [20]:
pd.set_option('display.float_format','{:20,.2f}'.format)

df_out

Unnamed: 0_level_0,Unnamed: 1_level_0,tas,clt
index,model,Unnamed: 2_level_1,Unnamed: 3_level_1
mean,CanESM5-1,287.65,
mean,GISS-E2-2-H,,
mean,CESM2,287.94,69.74
mean,E3SM-1-0,287.47,67.78
mean,EC-Earth3-Veg-LR,287.54,64.1
median,CanESM5-1,287.47,
median,GISS-E2-2-H,,
median,CESM2,287.92,70.04
median,E3SM-1-0,287.35,68.22
median,EC-Earth3-Veg-LR,287.29,64.48


In [21]:
df_out.T


index,mean,mean,mean,mean,mean,median,median,median,median,median,std,std,std,std,std
model,CanESM5-1,GISS-E2-2-H,CESM2,E3SM-1-0,EC-Earth3-Veg-LR,CanESM5-1,GISS-E2-2-H,CESM2,E3SM-1-0,EC-Earth3-Veg-LR,CanESM5-1,GISS-E2-2-H,CESM2,E3SM-1-0,EC-Earth3-Veg-LR
tas,287.65,,287.94,287.47,287.54,287.47,,287.92,287.35,287.29,3.7,,3.47,3.61,3.65
clt,,,69.74,67.78,64.1,,,70.04,68.22,64.48,,,5.73,5.48,6.35
