In [19]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd
import intake 
import pprint
from src.LE_LoadAndMerge import *
import warnings
warnings.filterwarnings("ignore")
import sys  
sys.path.insert(0, '/home/jupyter/InternalVariability/AdaptationAnalysis')
from app.main.src.utils import *
import datetime


In [2]:
def drop_bounds_height(ds):
        
    """Drop coordinates like 'time_bounds' from datasets,
    which can lead to issues when merging."""
    drop_vars = [vname for vname in ds.coords
            if (('_bounds') in vname ) or ('_bnds') in vname or ('height') in vname]
    return ds.drop(drop_vars)

In [3]:
url = 'https://storage.googleapis.com/cmip6/pangeo-cmip6.json'
raw_cat = intake.open_esm_datastore(url)

### ACCESS

In [4]:
cat = raw_cat.search(
    experiment_id=['historical','ssp585'],
    variable_id='tas',
    table_id = 'day',
    source_id ='ACCESS-ESM1-5'
)

In [5]:
# load data into xarray datasets
dset = cat.to_dataset_dict(zarr_kwargs={'consolidated':True}, storage_options={"anon": True});


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


In [6]:
keys = sorted(dset.keys())

In [7]:
hist = dset[keys[0]]
future = dset[keys[1]]

In [8]:
lat=41.3
lon=-72.5
lon = lon % 360

In [9]:
hist = hist.sel(lat=lat,lon=lon,method='nearest')
future = future.sel(lat=lat,lon=lon,method='nearest')

In [10]:
hist = hist.chunk({'member_id': 1, 'time': -1})
future = future.chunk({'member_id': 1, 'time': -1})

In [11]:
%%time
ds_future = []
for member_id in future.member_id:
    ds = future.sel(member_id = member_id).load()
    ds_future.append(ds)
ACCESS_future = xr.concat(ds_future,'member_id')

CPU times: user 4min 9s, sys: 1min 19s, total: 5min 28s
Wall time: 2min 56s


In [12]:
%%time
ds_hist = []
for member_id in hist.member_id:
    ds = hist.sel(member_id = member_id).load()
    ds_hist.append(ds)
ACCESS_hist = xr.concat(ds_hist,'member_id')

CPU times: user 7min 47s, sys: 2min 18s, total: 10min 6s
Wall time: 5min 25s


In [13]:
if ACCESS_hist.lon > 180 or ACCESS_hist.lon < -180:
    # convert lon from 0-360 to -180 to 180
    ACCESS_hist = ACCESS_hist.assign_coords(lon=((ACCESS_hist.lon + 180) % 360 - 180))
else: 
    pass 

if ACCESS_future.lon > 180 or ACCESS_future.lon < -180:
    # convert lon from 0-360 to -180 to 180
    ACCESS_future = ACCESS_future.assign_coords(lon=((ACCESS_future.lon + 180) % 360 - 180))
else: 
    pass 

In [14]:
ACCESS_hist = drop_bounds_height(ACCESS_hist)
ACCESS_future = drop_bounds_height(ACCESS_future)

In [15]:
# hist has 30 members and all have data 
ACCESS_hist

In [17]:
np.isfinite(ACCESS_hist.tas).sum('time')

In [16]:
# future has 10 members, but some of them start with nans 
# this may have to do with time going out to 2300 - maybe some members dont kick in until later?? 
ACCESS_future

In [18]:
np.isfinite(ACCESS_future.tas).sum('time')

once we slice future time, we get all nans for some of the members

In [19]:
ACCESS_future_sliced = ACCESS_future.sel(time=slice('2015','2100'))
ACCESS_hist_sliced = ACCESS_hist.sel(time=slice('1920','2014'))

In [20]:
ACCESS_future_sliced

In [22]:
np.isfinite(ACCESS_future_sliced.tas).sum('time')

In [21]:
ACCESS_hist_sliced

In [23]:
np.isfinite(ACCESS_hist_sliced.tas).sum('time')

In [24]:
# only left with 5 ensemble members after slicing data and dropping members with nans
ACCESS_future_sliced.dropna(dim='member_id')

### EC-Earth3

In [20]:
cat = raw_cat.search(
    experiment_id=['historical','ssp585'],
    variable_id='tas',
    table_id = 'day',
    source_id ='EC-Earth3'
)

In [21]:
cat.df

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,ScenarioMIP,EC-Earth-Consortium,EC-Earth3,ssp585,r6i1p1f1,day,tas,gr,gs://cmip6/CMIP6/ScenarioMIP/EC-Earth-Consorti...,,20200201
1,ScenarioMIP,EC-Earth-Consortium,EC-Earth3,ssp585,r9i1p1f1,day,tas,gr,gs://cmip6/CMIP6/ScenarioMIP/EC-Earth-Consorti...,,20200201
2,CMIP,EC-Earth-Consortium,EC-Earth3,historical,r9i1p1f1,day,tas,gr,gs://cmip6/CMIP6/CMIP/EC-Earth-Consortium/EC-E...,,20200201
3,ScenarioMIP,EC-Earth-Consortium,EC-Earth3,ssp585,r11i1p1f1,day,tas,gr,gs://cmip6/CMIP6/ScenarioMIP/EC-Earth-Consorti...,,20200201
4,CMIP,EC-Earth-Consortium,EC-Earth3,historical,r6i1p1f1,day,tas,gr,gs://cmip6/CMIP6/CMIP/EC-Earth-Consortium/EC-E...,,20200201
...,...,...,...,...,...,...,...,...,...,...,...
124,CMIP,EC-Earth-Consortium,EC-Earth3,historical,r17i1p1f1,day,tas,gr,gs://cmip6/CMIP6/CMIP/EC-Earth-Consortium/EC-E...,,20210120
125,CMIP,EC-Earth-Consortium,EC-Earth3,historical,r18i1p1f1,day,tas,gr,gs://cmip6/CMIP6/CMIP/EC-Earth-Consortium/EC-E...,,20210121
126,CMIP,EC-Earth-Consortium,EC-Earth3,historical,r21i1p1f1,day,tas,gr,gs://cmip6/CMIP6/CMIP/EC-Earth-Consortium/EC-E...,,20210223
127,CMIP,EC-Earth-Consortium,EC-Earth3,historical,r19i1p1f1,day,tas,gr,gs://cmip6/CMIP6/CMIP/EC-Earth-Consortium/EC-E...,,20210305


In [22]:
# load data into xarray datasets
dset = cat.to_dataset_dict(zarr_kwargs={'consolidated':True}, storage_options={"anon": True});


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


In [23]:
keys = sorted(dset.keys())

In [24]:
hist = dset[keys[0]]
future = dset[keys[1]]

In [25]:
lat=41.3
lon=-72.5
lon = lon % 360

In [26]:
hist = hist.sel(lat=lat,lon=lon,method='nearest')
future = future.sel(lat=lat,lon=lon,method='nearest')

In [27]:
hist = hist.chunk({'member_id': 1, 'time': -1})
future = future.chunk({'member_id': 1, 'time': -1})

In [28]:
future

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Count,11 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (2,) (2,) Count 11 Tasks 1 Chunks Type float64 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Count,11 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Count,3 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (2,) (2,) Count 3 Tasks 1 Chunks Type float64 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Count,3 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,502.58 kB,502.58 kB
Shape,"(31411, 2)","(31411, 2)"
Count,4 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 502.58 kB 502.58 kB Shape (31411, 2) (31411, 2) Count 4 Tasks 1 Chunks Type datetime64[ns] numpy.ndarray",2  31411,

Unnamed: 0,Array,Chunk
Bytes,502.58 kB,502.58 kB
Shape,"(31411, 2)","(31411, 2)"
Count,4 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.29 MB,125.64 kB
Shape,"(58, 31411)","(1, 31411)"
Count,229000 Tasks,58 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 7.29 MB 125.64 kB Shape (58, 31411) (1, 31411) Count 229000 Tasks 58 Chunks Type float32 numpy.ndarray",31411  58,

Unnamed: 0,Array,Chunk
Bytes,7.29 MB,125.64 kB
Shape,"(58, 31411)","(1, 31411)"
Count,229000 Tasks,58 Chunks
Type,float32,numpy.ndarray


In [29]:
hist

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Count,11 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (2,) (2,) Count 11 Tasks 1 Chunks Type float64 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Count,11 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Count,3 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (2,) (2,) Count 3 Tasks 1 Chunks Type float64 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Count,3 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,964.26 kB,482.13 kB
Shape,"(60266, 2)","(60266, 1)"
Count,31 Tasks,2 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 964.26 kB 482.13 kB Shape (60266, 2) (60266, 1) Count 31 Tasks 2 Chunks Type datetime64[ns] numpy.ndarray",2  60266,

Unnamed: 0,Array,Chunk
Bytes,964.26 kB,482.13 kB
Shape,"(60266, 2)","(60266, 1)"
Count,31 Tasks,2 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.12 MB,241.06 kB
Shape,"(71, 60266)","(1, 60266)"
Count,562021 Tasks,71 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.12 MB 241.06 kB Shape (71, 60266) (1, 60266) Count 562021 Tasks 71 Chunks Type float32 numpy.ndarray",60266  71,

Unnamed: 0,Array,Chunk
Bytes,17.12 MB,241.06 kB
Shape,"(71, 60266)","(1, 60266)"
Count,562021 Tasks,71 Chunks
Type,float32,numpy.ndarray


In [62]:
%%time
ds_future = []
for member_id in future.member_id:
    ds = future.sel(member_id = member_id).load()
    ds_future.append(ds)
future = xr.concat(ds_future,'member_id')

CPU times: user 53min 50s, sys: 18min 16s, total: 1h 12min 7s
Wall time: 33min 9s


In [14]:
np.isfinite(future.tas).sum('time')

Unnamed: 0,Array,Chunk
Bytes,464 B,8 B
Shape,"(58,)","(1,)"
Count,229174 Tasks,58 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes 464 B 8 B Shape (58,) (1,) Count 229174 Tasks 58 Chunks Type int64 numpy.ndarray",58  1,

Unnamed: 0,Array,Chunk
Bytes,464 B,8 B
Shape,"(58,)","(1,)"
Count,229174 Tasks,58 Chunks
Type,int64,numpy.ndarray


In [63]:
future

In [30]:
%%time
ds_hist = []
for member_id in hist.member_id:
    ds = hist.sel(member_id = member_id).load()
    ds_hist.append(ds)
EC_earth_hist = xr.concat(ds_hist,'member_id')

CPU times: user 2h 22min 18s, sys: 58min 14s, total: 3h 20min 33s
Wall time: 1h 41min 33s


In [31]:
EC_earth_hist

In [32]:
np.isfinite(EC_earth_hist.tas).sum('time')

In [33]:
EC_earth_sliced = EC_earth_hist.sel(time=slice('1920','2014'))

In [34]:
np.isfinite(EC_earth_sliced.tas).sum('time')

In [None]:
if hist.lon > 180 or hist.lon < -180:
    # convert lon from 0-360 to -180 to 180
    hist = hist.assign_coords(lon=((hist.lon + 180) % 360 - 180))
else: 
    pass 

if future.lon > 180 or future.lon < -180:
    # convert lon from 0-360 to -180 to 180
    future = future.assign_coords(lon=((future.lon + 180) % 360 - 180))
else: 
    pass 

In [None]:
hist = drop_bounds_height(hist)
future = drop_bounds_height(future)