# EOPF EO core containers and Zarr store

## Import and constants 
Import python dependancy 

In [1]:
import os
import shutil
import numpy as np
import xarray

from eopf.product.core import EOGroup, EOProduct, EOVariable
from eopf.product.conveniences import init_product
from eopf.product.store import EOZarrStore

Create an empty output folder.

In [2]:
output_folder="output"
output_filename="product_demo_sprint1.zarr"
try:
    os.mkdir(output_folder)
except FileExistsError:
    pass
try:
    shutil.rmtree(f"{output_folder}/{output_filename}")
except FileNotFoundError:
    pass

Initalise a few array of data/coordinates of python array, nparray and xarray for future insertion in the product

In [3]:
product = None

data_a = np.array([1,1])
data_b = np.array([2])
data_c = xarray.DataArray([[3],[3]], dims=["time", "space"])
data_d = np.array([[4.1],[4.2],[4.3]])
data_e = xarray.DataArray(np.zeros(10), dims=["dim_group/dim_10"]) 

data_coord_time = np.array([1])
data_coord_space = [2]
data_coord_dim_10 = xarray.DataArray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

## Creating and writing an EOProduct

Jupiter notebooks can explore the content of our objects, rerun the following block to see the change of the product after each steps:

In [4]:
product

### Initialising a product and indexing a prduct

In [5]:
product_empty = EOProduct("product_empty ", store_or_path_url=EOZarrStore(f"{output_folder}/{output_filename}"))
product_empty



init_product initialise a product with it's mandatory groups.
We link it to a EOZarrStore in write mode, and check the mandatory groups existance.

In [6]:
product: EOProduct = init_product("product_written", store_or_path_url=EOZarrStore(f"{output_folder}/{output_filename}"))
product.open(mode="w")

print(product)
print("product[measurements], product[coordinates]")
print(product["measurements"], product["coordinates"])
print(product["/measurements"], product["/coordinates"])
print(product.measurements, product.coordinates)

[EOProduct]0x7f880f8ec550
product[measurements], product[coordinates]
[EOGroup]0x7f880f8d2c10 [EOGroup]0x7f880f8d2bb0
[EOGroup]0x7f880f8d2c10 [EOGroup]0x7f880f8d2bb0
[EOGroup]0x7f880f8d2c10 [EOGroup]0x7f880f8d2bb0


Note than we can use both relative and absolute paths.
An absolute path from any EOGroup/Product is equivalent to a relative path from the product.

In [7]:
print("product[coordinates][/measurements], product[measurements][/coordinates]")
print(product["coordinates"]["/measurements"], product["measurements"]["/coordinates"])
try:
    print(product["/coordinates"]["measurements"], product["/measurements"]["coordinates"])
except KeyError:
    print("product[/coordinates][measurements] : KeyError !")

product[coordinates][/measurements], product[measurements][/coordinates]
[EOGroup]0x7f880f8d2c10 [EOGroup]0x7f880f8d2bb0
product[/coordinates][measurements] : KeyError !


### Adding groups

add_group can be used to add a group recursivly to a Product/Group.

In [8]:
product.add_group("group0")
product.add_group("measurements/group1", coords={"time": data_coord_time}, dims=["space"])
product.add_group("measurements/group1/group2/group3") # We create both group2 and group3
product["measurements"].add_group("group1/group2b") 
product.measurements.group1.group2.assign_dims(["dim_group/dim_10"])
product.measurements["group1"].add_group("/measurements/group1/group2b/group3")

You can also add group by using EOGroup/Product as a dictionary. 
It won't add intermediary groups.

In [9]:
product["group0b"] = EOGroup()
product["group0b/groupb1"] = EOGroup()
product["group0b"]["groupb1/groupb2"] = EOGroup(product=product, dims=["d1"])
try:
    product["group0b/non_existing_group/groupb2b"] = EOGroup()
except KeyError:
    print("EOContainer[\"path\"] = EOContainer doesn't support adding multiple group recursivly with __add_item__.")
    

EOContainer["path"] = EOContainer doesn't support adding multiple group recursivly with __add_item__.


### Adding variables
Same for add_variable

In [10]:
product.measurements["group1"].add_variable("variable_a", data=data_a)
product["measurements/group1"].add_variable("group2/variable_b", data=data_b)
product["measurements/group1"]["group2"].add_variable("/measurements/group1/group2/variable_c", data=data_c, dims=data_c.dims)
product.add_variable("measurements/group1/group2c/variable_d", dims=["c1"], data=data_d) # We create both group2c and variable_d.
product["measurements/group1"]["group2"]["variable_e"] = EOVariable(data=data_e)
try:
    product["measurements/group1"]["non_existing_group/variable_e"]= EOVariable()
except KeyError:
    print("EOContainer[\"path\"] = EOObject doesn't support adding variable to non existing group with __add_item__.")


EOContainer["path"] = EOObject doesn't support adding variable to non existing group with __add_item__.


### Add and read coordinates
The coordinates/ group contain the coordinates.

EOObjects can match their dimensions to the coordinates with their coordinates attributes.

In [11]:
product["coordinates/space"] = EOVariable(data=data_coord_space)
product.coordinates.add_variable("dim_group/dim_10",data=data_coord_dim_10)

print(product["measurements/group1/group2"].coordinates)
print(product["measurements/group1/group2/variable_b"].coordinates)
print(product["measurements/group1/group2/variable_c"].coordinates)
print(product["measurements/group1/group2/variable_e"].coordinates)


{'dim_group/dim_10': <weakproxy at 0x7f880f878630 to EOProduct at 0x7f880f8ec550> -> /->coordinates->dim_group -> dim_10}
{}
{'/time': <weakproxy at 0x7f880f878630 to EOProduct at 0x7f880f8ec550> -> /->coordinates -> time, '/space': <weakproxy at 0x7f880f878630 to EOProduct at 0x7f880f8ec550> -> /->coordinates -> space}
{}


## Add and read attributes
The attrs atributes give acess to their attribute dictionary.

In [12]:
product.attrs["33"]=4.2
product.attrs["test_key"]="test_value"
product.attrs["test_none"]=None
print("product: ", product.attrs)

product["measurements/group1/group2"].attrs["33"]=4.3
product["measurements/group1/group2"].attrs["test_key"]="test_value"
product["measurements/group1/group2"].attrs["test_none"]=None
print("group: ", product["measurements/group1/group2"].attrs)

product["measurements/group1/group2/variable_b"].attrs["33"]=4.3
product["measurements/group1/group2/variable_b"].attrs["test_key"]="test_value"
product["measurements/group1/group2/variable_b"].attrs["test_none"]=None
print("variable: ", product["measurements/group1/group2/variable_b"].attrs)

product:  {'33': 4.2, 'test_key': 'test_value', 'test_none': None}
group:  {'_EOPF_DIMENSIONS_PATHS': ['dim_group'], '_EOPF_DIMENSIONS': ['dim_10'], '33': 4.3, 'test_key': 'test_value', 'test_none': None}
variable:  {'33': 4.3, 'test_key': 'test_value', 'test_none': None}


### Write all yet unwritten content of product

In [17]:
product.write()

## Reading an EOProduct

In [14]:
product_r: EOProduct = EOProduct("product_read", store_or_path_url=EOZarrStore(f"{output_folder}/{output_filename}"))
product_r.open(mode="r")
product_r

We can read the previous EOGroups from product_r.

In [18]:
print(product_r["measurements/group1"])
print(product_r["measurements/group1/group2"])
product_r["measurements/group1/group2"]

[EOGroup]0x7f880f7fd400
[EOGroup]0x7f880f8ab8b0


We print the internal xarray of our variable to check if the store correctly loaded the data :

In [19]:
print(product["measurements/group1/variable_a"]._data)
print(product_r["measurements/group1/variable_a"]._data)
print(product_r["measurements/group1/variable_a"]._data.to_numpy())
print(product_r["measurements/group1/group2/variable_b"]._data)
print(product_r["measurements/group1/group2/variable_b"]._data.to_numpy())
print(product_r["measurements/group1/group2/variable_c"]._data)
print(product_r["measurements/group1/group2/variable_c"]._data.to_numpy())
print(product_r["measurements/group1/group2c/variable_d"]._data)
print(product_r["measurements/group1/group2c/variable_d"]._data.to_numpy())
print(product_r["measurements/group1/group2/variable_e"]._data)
print(product_r["measurements/group1/group2/variable_e"]._data.to_numpy())

<xarray.DataArray 'variable_a' (dim_0: 2)>
array([1, 1])
Dimensions without coordinates: dim_0
<xarray.DataArray 'variable_a' (dim_0: 2)>
dask.array<open_dataset-31b6fe2b553ec17cea6e9e50083bff77variable_a, shape=(2,), dtype=int64, chunksize=(2,), chunktype=numpy.ndarray>
Dimensions without coordinates: dim_0
[1 1]
<xarray.DataArray 'variable_b' (dim_0: 1)>
dask.array<open_dataset-15aa38b7ccb335c69da81994afe2aa00variable_b, shape=(1,), dtype=int64, chunksize=(1,), chunktype=numpy.ndarray>
Dimensions without coordinates: dim_0
[2]
<xarray.DataArray 'variable_c' (time: 2, space: 1)>
dask.array<open_dataset-15aa38b7ccb335c69da81994afe2aa00variable_c, shape=(2, 1), dtype=int64, chunksize=(2, 1), chunktype=numpy.ndarray>
Dimensions without coordinates: time, space
Attributes:
    _EOPF_DIMENSIONS:        ['time', 'space']
    _EOPF_DIMENSIONS_PATHS:  ['', '']
[[3]
 [3]]
<xarray.DataArray 'variable_d' (dim_0: 3, dim_1: 1)>
dask.array<open_dataset-3ec2ac0f6a2d8a670f1e9a385635e392variable_d, sh