# Installing the Zarr Backend for NWB

```
conda create -n nwbzarr python=3.9
conda activate nwbzarr
conda install Cython
conda install numpy==1.21.0
conda install pkgconfig
conda install h5py=3.3.0
conda install pandas==1.3.0
conda install python-dateutil==2.8.1
git clone --recurse-submodules https://github.com/NeurodataWithoutBorders/pynwb.git
cd pynwb
git checkout add/zarrio
pip install -e .
pip uninstall hdmf
git clone --recurse-submodules https://github.com/hdmf-dev/hdmf.git
cd hdmf
git checkout 1.0.3-zarr
conda install --file requirements.txt
pip install -e .

```

To use this notebook, the following optional package should also be installed

```
pip install dandi
conda install jupyter
```

In [1]:
import os
import shutil

# Download a file from DANDI

In [2]:
from dandi.dandiapi import DandiAPIClient

dandiset_id = '000207'  
filepath = "sub-1/sub-1_ses-1_ecephys+image.nwb"  # 5 MB file
with DandiAPIClient() as client:
    asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(filepath)
    s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)
    filename = os.path.basename(asset.path)

In [3]:
asset.download(filename)

# Define output settings and clean up old files 

In [4]:
zarr_filename = "test_zarr_" + filename
hdf_filename = "test_hdf5_" + filename

# Delete our converted HDF5 file from previous runs of this notebook
if os.path.exists(hdf_filename):
    print("Removing %s" % hdf_filename)
    os.remove(hdf_filename)
# Delete our converted Zarr file from previous runs of this notebook
if os.path.exists(zarr_filename):
    print("Removing %s" % zarr_filename)
    shutil.rmtree(zarr_filename)

Removing test_hdf5_sub-1_ses-1_ecephys+image.nwb
Removing test_zarr_sub-1_ses-1_ecephys+image.nwb


# Convert the file to Zarr

In [5]:
from pynwb import NWBHDF5IO, NWBZarrIO

with NWBHDF5IO(filename , 'r', load_namespaces=False) as read_io:
    with NWBZarrIO(zarr_filename, mode='w', chunking=True) as export_io:
        export_io.export(src_io=read_io, write_args=dict(link_data=False))



# Read the Zarr file back in

In [6]:
zr = NWBZarrIO(zarr_filename, 'r')
zf = zr.read()

  warn("Length of data does not match length of timestamps. Your data may be transposed. Time should be on "


# Convert the Zarr file back to HDF5

In [7]:
with NWBZarrIO(zarr_filename, mode='r') as read_io:
     with NWBHDF5IO(hdf_filename , 'w') as export_io:
         export_io.export(src_io=read_io, write_args=dict(link_data=False))

# Read the new HDF5 file back

Now our file has been converted from HDF5 to Zarr and back again to HDF5. Here we check that we can stil read that file

In [8]:
with NWBHDF5IO(hdf_filename , 'r') as hr:
    hf = hr.read()

# Notes

The ZarrIO backend for NWB is under development as part of the following PRs on GitHub:

* **HDMF**: https://github.com/hdmf-dev/hdmf/pull/696

   * Related PR: https://github.com/hdmf-dev/hdmf/pull/697 This PR includes all of the general changes to HDMF that we did to implement the Zarr backend. Once #697 is merged #696 should be agains synced with dev, so that the PR then only includes the changes to add Zarr itself.
   
* **PyNWB**: https://github.com/NeurodataWithoutBorders/pynwb/pull/1018