# Datatree
An example of working with a datatree

## Imports 

In [4]:
import sys, os
from pathlib import Path
import numpy as np
#Import xarray datatree
import datatree
import xarray as xr
#Plot the average temperature for the historical and ssp245 scenario
import matplotlib.pyplot as plt

git_dir = Path(os.popen('git rev-parse --show-toplevel').read().strip())
test_data_folder = git_dir.joinpath('tests').joinpath('data')

## A datatree

A datatree is a collection of xarray DataSets that are organized in a tree structure. See [DataTree docs](https://xarray-datatree.readthedocs.io/en/latest/) for more information.
It allows us to store xarrays in a folder like structure, and to easily navigate and manipulate them. They do not have the same xarray dimensions as an Xarray Dataset requires.

In [5]:

#Make a random observation dataset
np.random.seed(0)
obs = xr.Dataset(
    {
        "temperature": (("time", "lat", "lon"), np.random.rand(10, 10, 10)),
        "precipitation": (("time", "lat", "lon"), np.random.rand(10, 10, 10)),
    },
    coords={
        "time": np.arange(10),
        "lat": np.linspace(-90, 90, 10),
        "lon": np.linspace(-180, 180, 10),
    },
)

ds=xr.open_mfdataset(str(test_data_folder) + "/*historical*.nc", combine='by_coords', chunks='auto')
ds2=xr.open_mfdataset(str(test_data_folder) + "/*ssp245*.nc", combine='by_coords', chunks='auto')

dt = datatree.DataTree.from_dict({"model/historical": ds, "model/ssp245": ds2, "obs": obs})
dt

Unnamed: 0,Array,Chunk
Bytes,384 B,192 B
Shape,"(24, 2)","(12, 2)"
Dask graph,2 chunks in 5 graph layers,2 chunks in 5 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 384 B 192 B Shape (24, 2) (12, 2) Dask graph 2 chunks in 5 graph layers Data type datetime64[ns] numpy.ndarray",2  24,

Unnamed: 0,Array,Chunk
Bytes,384 B,192 B
Shape,"(24, 2)","(12, 2)"
Dask graph,2 chunks in 5 graph layers,2 chunks in 5 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,96.00 kiB,48.00 kiB
Shape,"(24, 256, 2)","(12, 256, 2)"
Dask graph,2 chunks in 7 graph layers,2 chunks in 7 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 96.00 kiB 48.00 kiB Shape (24, 256, 2) (12, 256, 2) Dask graph 2 chunks in 7 graph layers Data type float64 numpy.ndarray",2  256  24,

Unnamed: 0,Array,Chunk
Bytes,96.00 kiB,48.00 kiB
Shape,"(24, 256, 2)","(12, 256, 2)"
Dask graph,2 chunks in 7 graph layers,2 chunks in 7 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,192.00 kiB,96.00 kiB
Shape,"(24, 512, 2)","(12, 512, 2)"
Dask graph,2 chunks in 7 graph layers,2 chunks in 7 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 192.00 kiB 96.00 kiB Shape (24, 512, 2) (12, 512, 2) Dask graph 2 chunks in 7 graph layers Data type float64 numpy.ndarray",2  512  24,

Unnamed: 0,Array,Chunk
Bytes,192.00 kiB,96.00 kiB
Shape,"(24, 512, 2)","(12, 512, 2)"
Dask graph,2 chunks in 7 graph layers,2 chunks in 7 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12.00 MiB,6.00 MiB
Shape,"(24, 256, 512)","(12, 256, 512)"
Dask graph,2 chunks in 5 graph layers,2 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 12.00 MiB 6.00 MiB Shape (24, 256, 512) (12, 256, 512) Dask graph 2 chunks in 5 graph layers Data type float32 numpy.ndarray",512  256  24,

Unnamed: 0,Array,Chunk
Bytes,12.00 MiB,6.00 MiB
Shape,"(24, 256, 512)","(12, 256, 512)"
Dask graph,2 chunks in 5 graph layers,2 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,384 B,192 B
Shape,"(24, 2)","(12, 2)"
Dask graph,2 chunks in 5 graph layers,2 chunks in 5 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 384 B 192 B Shape (24, 2) (12, 2) Dask graph 2 chunks in 5 graph layers Data type datetime64[ns] numpy.ndarray",2  24,

Unnamed: 0,Array,Chunk
Bytes,384 B,192 B
Shape,"(24, 2)","(12, 2)"
Dask graph,2 chunks in 5 graph layers,2 chunks in 5 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,96.00 kiB,48.00 kiB
Shape,"(24, 256, 2)","(12, 256, 2)"
Dask graph,2 chunks in 7 graph layers,2 chunks in 7 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 96.00 kiB 48.00 kiB Shape (24, 256, 2) (12, 256, 2) Dask graph 2 chunks in 7 graph layers Data type float64 numpy.ndarray",2  256  24,

Unnamed: 0,Array,Chunk
Bytes,96.00 kiB,48.00 kiB
Shape,"(24, 256, 2)","(12, 256, 2)"
Dask graph,2 chunks in 7 graph layers,2 chunks in 7 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,192.00 kiB,96.00 kiB
Shape,"(24, 512, 2)","(12, 512, 2)"
Dask graph,2 chunks in 7 graph layers,2 chunks in 7 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 192.00 kiB 96.00 kiB Shape (24, 512, 2) (12, 512, 2) Dask graph 2 chunks in 7 graph layers Data type float64 numpy.ndarray",2  512  24,

Unnamed: 0,Array,Chunk
Bytes,192.00 kiB,96.00 kiB
Shape,"(24, 512, 2)","(12, 512, 2)"
Dask graph,2 chunks in 7 graph layers,2 chunks in 7 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12.00 MiB,6.00 MiB
Shape,"(24, 256, 512)","(12, 256, 512)"
Dask graph,2 chunks in 5 graph layers,2 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 12.00 MiB 6.00 MiB Shape (24, 256, 512) (12, 256, 512) Dask graph 2 chunks in 5 graph layers Data type float32 numpy.ndarray",512  256  24,

Unnamed: 0,Array,Chunk
Bytes,12.00 MiB,6.00 MiB
Shape,"(24, 256, 512)","(12, 256, 512)"
Dask graph,2 chunks in 5 graph layers,2 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


You can apply the same operations to a datatree as you would to a single xarray dataset, but it will be applied to all the datasets in the tree.

In [8]:

avg=dt.mean("time")
avg #Is the same datatree as dt, but with the time dimension removed


Unnamed: 0,Array,Chunk
Bytes,4.00 kiB,4.00 kiB
Shape,"(256, 2)","(256, 2)"
Dask graph,1 chunks in 9 graph layers,1 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.00 kiB 4.00 kiB Shape (256, 2) (256, 2) Dask graph 1 chunks in 9 graph layers Data type float64 numpy.ndarray",2  256,

Unnamed: 0,Array,Chunk
Bytes,4.00 kiB,4.00 kiB
Shape,"(256, 2)","(256, 2)"
Dask graph,1 chunks in 9 graph layers,1 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.00 kiB,8.00 kiB
Shape,"(512, 2)","(512, 2)"
Dask graph,1 chunks in 9 graph layers,1 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 8.00 kiB 8.00 kiB Shape (512, 2) (512, 2) Dask graph 1 chunks in 9 graph layers Data type float64 numpy.ndarray",2  512,

Unnamed: 0,Array,Chunk
Bytes,8.00 kiB,8.00 kiB
Shape,"(512, 2)","(512, 2)"
Dask graph,1 chunks in 9 graph layers,1 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,512.00 kiB,512.00 kiB
Shape,"(256, 512)","(256, 512)"
Dask graph,1 chunks in 7 graph layers,1 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 512.00 kiB 512.00 kiB Shape (256, 512) (256, 512) Dask graph 1 chunks in 7 graph layers Data type float32 numpy.ndarray",512  256,

Unnamed: 0,Array,Chunk
Bytes,512.00 kiB,512.00 kiB
Shape,"(256, 512)","(256, 512)"
Dask graph,1 chunks in 7 graph layers,1 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 kiB,4.00 kiB
Shape,"(256, 2)","(256, 2)"
Dask graph,1 chunks in 9 graph layers,1 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 4.00 kiB 4.00 kiB Shape (256, 2) (256, 2) Dask graph 1 chunks in 9 graph layers Data type float64 numpy.ndarray",2  256,

Unnamed: 0,Array,Chunk
Bytes,4.00 kiB,4.00 kiB
Shape,"(256, 2)","(256, 2)"
Dask graph,1 chunks in 9 graph layers,1 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.00 kiB,8.00 kiB
Shape,"(512, 2)","(512, 2)"
Dask graph,1 chunks in 9 graph layers,1 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 8.00 kiB 8.00 kiB Shape (512, 2) (512, 2) Dask graph 1 chunks in 9 graph layers Data type float64 numpy.ndarray",2  512,

Unnamed: 0,Array,Chunk
Bytes,8.00 kiB,8.00 kiB
Shape,"(512, 2)","(512, 2)"
Dask graph,1 chunks in 9 graph layers,1 chunks in 9 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,512.00 kiB,512.00 kiB
Shape,"(256, 512)","(256, 512)"
Dask graph,1 chunks in 7 graph layers,1 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 512.00 kiB 512.00 kiB Shape (256, 512) (256, 512) Dask graph 1 chunks in 7 graph layers Data type float32 numpy.ndarray",512  256,

Unnamed: 0,Array,Chunk
Bytes,512.00 kiB,512.00 kiB
Shape,"(256, 512)","(256, 512)"
Dask graph,1 chunks in 7 graph layers,1 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [None]:
#Quick plot using an Xarray datatree
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
avg.model.ssp245.tas.plot(ax=ax[0], label="ssp245")
avg.model.historical.tas.plot(ax=ax[1], label="historical")
#Plot the difference between the two scenarios
(avg.model.ssp245.tas-avg.model.historical.tas).plot(ax=ax[2], label="ssp245 - historical")
plt.show()