In [1]:
import sys
import logging

so = open("data.log", 'w', 10)
sys.stdout.echo = so
sys.stderr.echo = so

get_ipython().log.handlers[0].stream = so
get_ipython().log.setLevel(logging.INFO)


In [2]:
#import packages
import matplotlib.pyplot as plt
from cartopy import crs as ccrs, feature as cfeature
import xarray as xr
import pandas as pd
import numpy as np

# Packages needed for CESM1 example
import pprint
import intake
import requests
import aiohttp
import s3fs
import dask

In [3]:
# Open original collection description file: CESM1 LENS
cat_url = "https://ncar-cesm-lens.s3-us-west-2.amazonaws.com/catalogs/aws-cesm1-le.json"
# open the catalog using the intake function
col = intake.open_esm_datastore(cat_url)
#view the catalog
col

Unnamed: 0,unique
variable,78
long_name,75
component,5
experiment,4
frequency,6
vertical_levels,3
spatial_domain,5
units,25
start_time,12
end_time,13


In [4]:
# Get more detailed: search for monthly output for the 20th century and RCP8.5 
col_ocntemp = col.search(
    frequency=["monthly"],
    component="ocn",
    variable=["TEMP"],
    experiment=["HIST", "20C", "RCP85"],  # ("HIST" is the 1850-1919 period, which is only in the first ensemble member, and "20C" is 1920-2005 which is common across all the other members) 
)


# Get more detailed: search for monthly output for the 20th century and RCP8.5 
col_ocnO2 = col.search(
    frequency=["monthly"],
    component="ocn",
    variable=["O2"],
    experiment=["HIST", "20C", "RCP85"],  # ("HIST" is the 1850-1919 period, which is only in the first ensemble member, and "20C" is 1920-2005 which is common across all the other members) 
)


# Get more detailed: search for monthly output for the 20th century and RCP8.5 
col_ocnsalt = col.search(
    frequency=["monthly"],
    component="ocn",
    variable=["SALT"],
    experiment=["HIST", "20C", "RCP85"],  # ("HIST" is the 1850-1919 period, which is only in the first ensemble member, and "20C" is 1920-2005 which is common across all the other members) 
)


# Get more detailed: search for monthly output for the 20th century and RCP8.5 
col_ocnsst = col.search(
    frequency=["monthly"],
    component="ocn",
    variable=["SST"],
    experiment=["HIST", "20C", "RCP85"],  # ("HIST" is the 1850-1919 period, which is only in the first ensemble member, and "20C" is 1920-2005 which is common across all the other members) 
)


In [5]:
# Load catalog entries for subset into a dictionary of xarray datasets
dsets_temp = col_ocntemp.to_dataset_dict(
    zarr_kwargs={"consolidated": True}, 
    storage_options={"anon": True})

print(f"\nDataset dictionary keys:\n {dsets_temp.keys()}") #print to make sure we are getting the correct set of keys


# Load catalog entries for subset into a dictionary of xarray datasets
dsets_o2 = col_ocnO2.to_dataset_dict(
    zarr_kwargs={"consolidated": True}, 
    storage_options={"anon": True})

print(f"\nDataset dictionary keys:\n {dsets_o2.keys()}") #print to make sure we are getting the correct set of keys


# Load catalog entries for subset into a dictionary of xarray datasets
dsets_salt = col_ocnsalt.to_dataset_dict(
    zarr_kwargs={"consolidated": True}, 
    storage_options={"anon": True})

print(f"\nDataset dictionary keys:\n {dsets_salt.keys()}") #print to make sure we are getting the correct set of keys


# Load catalog entries for subset into a dictionary of xarray datasets
dsets_sst = col_ocnsst.to_dataset_dict(
    zarr_kwargs={"consolidated": True}, 
    storage_options={"anon": True})

print(f"\nDataset dictionary keys:\n {dsets_salt.keys()}") #print to make sure we are getting the correct set of keys


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency'


Cannot find the ecCodes library
Cannot find the ecCodes library
Cannot find the ecCodes library



Dataset dictionary keys:
 dict_keys(['ocn.HIST.monthly', 'ocn.20C.monthly', 'ocn.RCP85.monthly'])

--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency'



Dataset dictionary keys:
 dict_keys(['ocn.RCP85.monthly', 'ocn.HIST.monthly', 'ocn.20C.monthly'])

--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency'



Dataset dictionary keys:
 dict_keys(['ocn.20C.monthly', 'ocn.RCP85.monthly', 'ocn.HIST.monthly'])

--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency'



Dataset dictionary keys:
 dict_keys(['ocn.20C.monthly', 'ocn.RCP85.monthly', 'ocn.HIST.monthly'])


In [6]:
# Define Xarray datasets corresponding to the two experiments
ds_20C_t = dsets_temp["ocn.20C.monthly"]
ds_RCP85_t = dsets_temp["ocn.RCP85.monthly"]



ds_20C_o = dsets_o2["ocn.20C.monthly"]
ds_RCP85_o = dsets_o2["ocn.RCP85.monthly"]



ds_20C_s = dsets_salt["ocn.20C.monthly"]
ds_RCP85_s = dsets_salt["ocn.RCP85.monthly"]



ds_20C_sst = dsets_salt["ocn.20C.monthly"]
ds_RCP85_sst = dsets_salt["ocn.RCP85.monthly"]


In [7]:
merge_ds_o = xr.concat([ds_20C_o, ds_RCP85_o], dim ='time')
merge_ds_t = xr.concat([ds_20C_t, ds_RCP85_t], dim ='time')
merge_ds_s = xr.concat([ds_20C_s, ds_RCP85_s], dim ='time')
merge_ds_sst = xr.concat([ds_20C_sst, ds_RCP85_sst], dim ='time')

In [8]:
#select just one point on the graph (this point is closest to channel islands)
test_2 = merge_ds_o.isel(nlat=(280), nlon=(240))

test_2

Unnamed: 0,Array,Chunk
Bytes,33.94 kiB,17.81 kiB
Shape,"(2172, 2)","(1140, 2)"
Dask graph,2 chunks in 5 graph layers,2 chunks in 5 graph layers
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 33.94 kiB 17.81 kiB Shape (2172, 2) (1140, 2) Dask graph 2 chunks in 5 graph layers Data type object numpy.ndarray",2  2172,

Unnamed: 0,Array,Chunk
Bytes,33.94 kiB,17.81 kiB
Shape,"(2172, 2)","(1140, 2)"
Dask graph,2 chunks in 5 graph layers,2 chunks in 5 graph layers
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.90 MiB,1.41 kiB
Shape,"(34, 2172, 60)","(1, 6, 60)"
Dask graph,12308 chunks in 6 graph layers,12308 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 16.90 MiB 1.41 kiB Shape (34, 2172, 60) (1, 6, 60) Dask graph 12308 chunks in 6 graph layers Data type float32 numpy.ndarray",60  2172  34,

Unnamed: 0,Array,Chunk
Bytes,16.90 MiB,1.41 kiB
Shape,"(34, 2172, 60)","(1, 6, 60)"
Dask graph,12308 chunks in 6 graph layers,12308 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [9]:
%%time

#create to nc file here:
# save file - don't forget the .nc extension!
test_2.to_netcdf('20C_rcp85_o2.nc')



KeyboardInterrupt: 

In [None]:
# open to check:
check = xr.open_dataset('20C_rcp85_o2.nc'
)
check