# Working with Arrays in a TileDB Group

This notebook contains an example of how to use the `Group` and `GroupSchema` classes from TileDB-CF to create, inspect, open, read, and write data to arrays in a TileDB group.

### Data

The following arrays will be created that share some of their dimensions:

* `dense_3d` is a dense array with dimensions `dense_x`, `dense_y` and `dense_t`
* `dense_2d` is a dense array with dimensions `dense_x` and `dense_y`
* `dense_1d` is a dense array with dimension `dense_t`
* `sparse_4d` is a sparse array with dimensions `sparse_x` `sparse_y` `sparse_z` and `sparse_t`
* `dense_axes_xy` is a dense array that contains the values of the `dense_x` and `dense_y` dimensions as `dense_x_data` and `dense_y_data` 
* `dense_axes_t` is a dense array that contains the values of the `dense_t` dimension as `dense_t_data`
* `sparse_axes` is a sparse array that contains the values of the `sparse_x` `sparse_y` `sparse_z` and `sparse_t` dimensions as `sparse_x_data` `sparse_y_data` `sparse_z_data` and `sparse_t_data`

### Packages

Import the libraries used in this notebook:

In [None]:
import time
import numpy as np
import pandas as pd 
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import tiledb
from tiledb.cf import Group, GroupSchema

## Create numpy arrays

Variables to set the size of the arrays:

In [None]:
dense_size = 100
sparse_size = 2000
t_size = 365

Functions used to create a dataset:

In [None]:
def ripple(x,y,t):
    return (np.sin(t*(x**2+y**2))/(t+1))

def ripple2(x,y,z,t):
    return (np.sin(t*(x**2+y**2))/(t+1))+z

### Data for the dense arrays

In [None]:
dense_x_values = np.arange(1, dense_size+1)
dense_y_values = np.arange(1, dense_size+1)
dense_t_values = np.arange(1, t_size+1)

dense_3d_values = np.fromfunction(
        lambda x,y,t: ripple(x,y,t), 
        (dense_size,dense_size,t_size))

dense_2d_values = np.nanmean(dense_3d_values, axis=2)
dense_1d_values = np.mean(dense_3d_values, axis=(0, 1))

In [None]:
dense_3d_values = np.fromfunction(
        lambda x,y,t: ripple(x,y,t), 
        (dense_size,dense_size,t_size))

dense_2d_values = np.nanmean(dense_3d_values, axis=2)
dense_1d_values = np.mean(dense_3d_values, axis=(0, 1))

### Data for the sparse arrays

In [None]:
sparse_x_values = np.random.randint(1, dense_size + 1, size=(sparse_size))
sparse_y_values = np.random.randint(1, dense_size + 1, size=(sparse_size))
sparse_z_values = np.random.randint(1, dense_size + 1, size=(sparse_size))
sparse_t_values = np.random.randint(1, t_size + 1, size=(sparse_size))

sparse_4d_values = ripple2(sparse_x_values,sparse_y_values,sparse_z_values,sparse_t_values)

## Create the TileDB Group

### Create the GroupSchema 
* [ArraySchema](https://tiledb-inc-tiledb-py.readthedocs-hosted.com/en/stable/python-api.html#array-schema) 
* [Domain](https://tiledb-inc-tiledb-py.readthedocs-hosted.com/en/stable/python-api.html#domain)
* [Attributes](https://tiledb-inc-tiledb-py.readthedocs-hosted.com/en/stable/python-api.html#tiledb.Attr)
* [Dimensions](https://tiledb-inc-tiledb-py.readthedocs-hosted.com/en/stable/python-api.html#dimension) 

In [None]:
dense_x = tiledb.Dim(name="dense_x", domain=(1, dense_size), tile=10, dtype=np.uint64)
dense_y = tiledb.Dim(name="dense_y", domain=(1, dense_size), tile=10, dtype=np.uint64)
dense_t = tiledb.Dim(name="dense_t", domain=(1, t_size), tile=10, dtype=np.uint64)
sparse_x = tiledb.Dim(name="sparse_x", domain=(1, sparse_size), tile=10, dtype=np.uint64)
sparse_y = tiledb.Dim(name="sparse_y", domain=(1, sparse_size), tile=10, dtype=np.uint64)
sparse_z = tiledb.Dim(name="sparse_z", domain=(1, sparse_size), tile=10, dtype=np.uint64)
sparse_t = tiledb.Dim(name="sparse_t", domain=(1, t_size), tile=10, dtype=np.uint64)

In [None]:
group_schema = GroupSchema(
    { 
        "dense_3d": tiledb.ArraySchema(
            domain=tiledb.Domain(dense_x,dense_y,dense_t), 
            attrs=[
                tiledb.Attr(name="dense_3d_data", dtype=np.float64), 
            ],
         ),
        "dense_2d": tiledb.ArraySchema(
            domain=tiledb.Domain(dense_x,dense_y), 
            attrs=[
                tiledb.Attr(name="dense_2d_data", dtype=np.float64), 
            ],
         ),
        "dense_1d": tiledb.ArraySchema(
            domain=tiledb.Domain(dense_t), 
            attrs=[
                tiledb.Attr(name="dense_1d_data", dtype=np.float64), 
            ],
        ),
        "sparse_4d": tiledb.ArraySchema(
            domain=tiledb.Domain(sparse_x,sparse_y,sparse_z,sparse_t), 
            attrs=[
                tiledb.Attr(name="sparse_4d_data", dtype=np.float64), 
            ],
            sparse=True,
            allows_duplicates=True,
        ),
        "dense_axes_xy": tiledb.ArraySchema(
            domain=tiledb.Domain(dense_x), 
            attrs=[
                tiledb.Attr(name="dense_x_data", dtype=np.uint64), 
                tiledb.Attr(name="dense_y_data", dtype=np.uint64), 
            ],
         ),
        "dense_axes_t": tiledb.ArraySchema(
            domain=tiledb.Domain(dense_t), 
            attrs=[
                tiledb.Attr(name="dense_t_data", dtype=np.uint64), 
            ],
        ),
        "sparse_axes": tiledb.ArraySchema(
            domain=tiledb.Domain(sparse_x), 
            attrs=[
                tiledb.Attr(name="sparse_x_data", dtype=np.uint64), 
                tiledb.Attr(name="sparse_y_data", dtype=np.uint64), 
                tiledb.Attr(name="sparse_z_data", dtype=np.uint64), 
                tiledb.Attr(name="sparse_t_data", dtype=np.uint64), 
            ],
            sparse=True,
            allows_duplicates=True,
         ),}
)

### Create the TileDB Group on disk

In [None]:
group_uri = "output/example_group"
if tiledb.object_type(group_uri) is None:
    Group.create("output/example_group", group_schema)

### Write data to the TileDB Group

#### Dense arrays

Specify the name of the attribute (`attr`) to write the data to. 

In [None]:
with Group(group_uri, attr="dense_3d_data", mode="w") as group:
    group.array[:] = dense_3d_values
    
with Group(group_uri, attr="dense_2d_data", mode="w") as group:
    group.array[:] = dense_2d_values   

with Group(group_uri, attr="dense_1d_data", mode="w") as group:
    group.array[:] = dense_1d_values

#### Sparse array

Specify the name of the attribute (`attr`) to write the data to. When writing to a sparse array add the values of the dimensions to `group.array[]`.

In [None]:
with Group(group_uri, attr="sparse_4d_data", mode="w") as group:
    group.array[sparse_x_values,sparse_y_values,sparse_z_values,sparse_t_values] = sparse_4d_values

#### Dimension arrays

Attribute names of can have the same name in multiple arrays. In this case, specify both the name of the `array` and the attribute (`attr`) to write the data to. 

Also note the difference in writing data to a dense and sparse array:

In [None]:
with Group(group_uri, array="dense_axes_xy", attr="dense_x_data", mode="w") as group:
    group.array[:] = dense_x_values

with Group(group_uri, array="dense_axes_xy", attr="dense_y_data", mode="w") as group:
    group.array[:] = dense_y_values

with Group(group_uri, array="dense_axes_t", attr="dense_t_data", mode="w") as group:
    group.array[:] = dense_t_values

with Group(group_uri, array="sparse_axes", mode="w") as group:
    group.array[np.arange(sparse_size)+1] = {"sparse_x_data": sparse_x_values, 
                                             "sparse_y_data": sparse_y_values, 
                                             "sparse_z_data": sparse_z_values, 
                                             "sparse_t_data": sparse_t_values}
    
    
dense_x_values = np.arange(1, dense_size+1)
dense_y_values = np.arange(1, dense_size+1)
dense_t_values = np.arange(1, t_size+1)

#### Metadata

Write Group metadata: 

In [None]:
with Group(group_uri, mode="w") as group:
    group.meta["description"] = "Example TileDB Group"
    group.meta["version"] = "1.0"
    group.meta["created on "] = time.ctime(time.time())

Write Array metadata:

In [None]:
with Group(group_uri, array="dense_3d", mode="w") as group:
    group.array_metadata["description"] = "Example 3D dense array with dimensions x, y and t"
    group.array.meta["description 2"] = "The same example of a 3D dense array with dimensions x, y and t"

with Group(group_uri, array="dense_axes_xy", mode="w") as group:
    group.array_metadata["description"] = "Values for the x and y dimensions of the 3D dense array"

Write Attribute metadata:

In [None]:
with Group(group_uri, array="dense_axes_xy", attr="dense_x_data", mode="w") as group:
    group.attr_metadata["description"] = "Values of x"

### Read data from the TileDB Group

##### Load the GroupSchema

In [None]:
group_schema = GroupSchema.load(group_uri)
group_schema

#### Read the metadata

Read the Group metadata keys and their values:

In [None]:
with Group(group_uri) as group:
    for key, value in group.meta.items():
        print(f"{key}: {value}")

* `array.meta.items()` is the metadata object for the entire array and includes the keys for attribute metadata
* `.array_metadata.items()` ignores any metadata that starts with `__tiledb_attr`

Read the metadata for the `dense_axes_xy` array with `array.meta.items()` or `.array_metadata.items()`:

In [None]:
with Group(group_uri, array="dense_axes_xy") as group:
    for key, value in group.array.meta.items():
        print(f"{key}: {value}")      

In [None]:
with Group(group_uri, array="dense_axes_xy") as group:
    for key, value in group.array_metadata.items():
        print(f"{key}: {value}")

Alternatively, print the value of a metadata key with `.get_attr_metadata()`:

In [None]:
with Group(group_uri, array="dense_axes_xy") as group:
    print(group.get_attr_metadata("dense_x_data")["description"])

#### Read and visualise the data

In [None]:
with Group(group_uri, array="dense_3d", attr="dense_3d_data") as group:
    dense_3d_data = group.array[:]
    
with Group(group_uri, array="dense_axes_xy") as group:
    data = group.array[...]
    dense_x_data = data["dense_x_data"]
    dense_y_data = data["dense_y_data"]

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2,figsize=(12, 12))
axes[0,0].contourf(dense_x_data, dense_y_data, dense_3d_data[:,:,33]);
axes[0,1].contourf(dense_x_data, dense_y_data, dense_3d_data[:,:,66]);
axes[1,0].contourf(dense_x_data, dense_y_data, dense_3d_data[:,:,99]);
axes[1,1].contourf(dense_x_data, dense_y_data, dense_3d_data[:,:,95]);

In [None]:
with Group(group_uri, array="sparse_4d") as group:
    df = pd.DataFrame(group.array[...])
    
df.head()    

In [None]:
df.describe().transpose()

In [None]:
df.plot.scatter(x="sparse_x", y="sparse_4d_data");

In [None]:
with Group(group_uri, array="dense_1d") as group:
    df2 = pd.DataFrame(group.array[...])
    
df2.head() 

In [None]:
df2.plot();