# Accessing HSDS from the HDF5 library

<div class="alert alert-block alert-danger">
This notebook will work only with a running HSDS instance. If you don't have one running, you can start one using the <a href="./05-HSDS.ipynb">HSDS notebook</a>.
</div>

## Introducing the HDF5 REST VOL connector

### Installing the HDF5 REST VOL connector

- We need a newer version of HDF5 than is available in the Debian repositories.  We'll install from source.
- We'll also need to install the HDF5 REST VOL connector from source.

<div class="alert alert-block alert-warning">
The cloning and compilation takes, depending on the machine type, three to five minutes.
</div>

In [None]:
%%bash
git clone https://github.com/HDFGroup/hdf5.git build/hdf5
mkdir -p build/hdf5/build
cd build/hdf5/build
cmake -DCMAKE_INSTALL_PREFIX=/home/vscode/.local ../ 2>&1 > /dev/null
make -j 4 2>&1 > /dev/null
make install 2>&1 > /dev/null
cd ../../..
git clone https://github.com/HDFGroup/vol-rest.git build/rest-vol
mkdir -p build/rest-vol/build
cd build/rest-vol/build
cmake -DCMAKE_INSTALL_PREFIX=/home/vscode/.local ../ 2>&1 > /dev/null
make -j 4 2>&1 > /dev/null
make install 2>&1 > /dev/null

### Modifying the C++ HDF5 example to use the REST VOL connector

By adding five lines and changing a single line of code, we can make our original C++ example "talk" to HSDS instead of the local filesystem.

- We must initialize the REST VOL connector (line 27)
- We must add a file access property (lines 28,29) and pass it to `H5Fcreate` (line 31)
- We must release the file access property (line 68) and terminate the REST VOL connector (line 71)

And that's it!

In [None]:
%%writefile src/ou_restvol.cpp
#include "docstring.hpp"
#include "ou_sampler.hpp"
#include "rest_vol_public.h"
#include "hdf5.h"
#include <iostream>
#include <vector>

using namespace std;

int main()
{
    const size_t path_count = 100, step_count = 1000;
    const double dt = 0.01, theta = 1.0, mu = 0.0, sigma = 0.1;

    cout << "Running with parameters:"
         << " paths=" << path_count << " steps=" << step_count
         << " dt=" << dt << " theta=" << theta << " mu=" << mu << " sigma=" << sigma << endl;

    vector<double> ou_process;
    ou_sampler(ou_process, path_count, step_count, dt, theta, mu, sigma);
    
    //
    // Write the sample paths to an HDF5 file using the HDF5 REST VOL!
    //

    H5rest_init();
    auto fapl = H5Pcreate(H5P_FILE_ACCESS);
    H5Pset_fapl_rest_vol(fapl);

    auto file = H5Fcreate("/home/vscode/ou_restvol.h5", H5F_ACC_TRUNC, H5P_DEFAULT, fapl);

    add_docstring(file, ".", "source", "https://github.com/HDFGroup/hdf5-tutorial");

    { // create & write the dataset
        hsize_t dimsf[] = {(hsize_t)path_count, (hsize_t)step_count};
        auto space = H5Screate_simple(2, dimsf, NULL);
        auto dataset = H5Dcreate(file, "/dataset", H5T_NATIVE_DOUBLE, space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
        H5Dwrite(dataset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, ou_process.data());
        H5Dclose(dataset);
        H5Sclose(space);
    }

    { // make the file self-describing by adding a few attributes to `dataset`
        add_docstring(file, "dataset", "comment", "This dataset contains sample paths of an Ornstein-Uhlenbeck process.");
        add_docstring(file, "dataset", "Wikipedia", "https://en.wikipedia.org/wiki/Ornstein%E2%80%93Uhlenbeck_process");
        add_docstring(file, "dataset", "rows", "path");
        add_docstring(file, "dataset", "columns", "time");
        
        auto scalar = H5Screate(H5S_SCALAR);
        auto acpl = H5Pcreate(H5P_ATTRIBUTE_CREATE);
        H5Pset_char_encoding(acpl, H5T_CSET_UTF8);
        
        auto set_attribute = [&](const string& name, const double& value) {
            auto attr = H5Acreate_by_name(file, "dataset", name.c_str(), H5T_NATIVE_DOUBLE, scalar, acpl, H5P_DEFAULT, H5P_DEFAULT);
            H5Awrite(attr, H5T_NATIVE_DOUBLE, &value);
            H5Aclose(attr);
        };
        set_attribute("dt", dt);
        set_attribute("θ", theta);
        set_attribute("μ", mu);
        set_attribute("σ", sigma);

        H5Pclose(acpl);
        H5Sclose(scalar);
    }

    H5Pclose(fapl);
    H5Fclose(file);

    H5rest_term();

    return 0;
}

In [None]:
%%bash
g++ -I/home/vscode/.local/include -L/home/vscode/.local/lib ./src/ou_restvol.cpp ./src/docstring.cpp ./src/ou_sampler.cpp -o ./build/ou_restvol -lhdf5 -lhdf5_vol_rest -lcurl -ldl -Wl,-rpath=/home/vscode/.local/lib
export HSDS_USERNAME=vscode
export HSDS_PASSWORD=vscode
export HSDS_ENDPOINT=http://localhost:5101
./build/ou_restvol

In [None]:
%%bash
hsls --showattrs /home/vscode/ou_restvol.h5

## Improving performance with multi-dataset I/O

In [None]:
%%writefile src/restvol.h
#ifndef RESTVOL_H
#define RESTVOL_H

#define HSDS_FILENAME_PREFIX "/home/vscode/"

#define FILENAME_BUFFER_SIZE 1024
#define NUM_DATASETS 2
#define NUM_INTEGERS (1000 * 1000 * 100) / NUM_DATASETS

#define ERROR(msg)                \
    fprintf(stderr, "%s\n", msg); \
    exit(1);

#define LOG(msg) \
    fprintf(stderr, "%s\n", msg);

// For displaying log messages after the progress bars for dataset read/write
#define SCROLL_AMNT (NUM_DATASETS < 21 ? NUM_DATASETS : 21)

#endif

In [None]:
%%writefile src/restvol.c
#include "restvol.h"
#include "hdf5.h"
#include "rest_vol_public.h"

#include <stdlib.h>

int main(void)
{
    LOG("Initializing REST VOL...")
    H5rest_init();

    // Set a file access property to use the REST VOL
    hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
    H5Pset_fapl_rest_vol(fapl);

    LOG("Creating file...")
    hid_t file = H5I_INVALID_HID;
    if ((file = H5Fcreate(HSDS_FILENAME_PREFIX "my_file.h5", H5F_ACC_TRUNC, H5P_DEFAULT, fapl)) < 0)
    {
        ERROR("Failed to create file");
    }

    LOG("Setting up data...");
    fprintf(stderr, "  %d Datasets of %d integers each\n", NUM_DATASETS, NUM_INTEGERS);

    int *write_data[NUM_DATASETS];
    int *read_data[NUM_DATASETS];

    hid_t dspace_ids[NUM_DATASETS];
    hid_t dset_ids[NUM_DATASETS];
    hid_t type_ids[NUM_DATASETS];
    hid_t sel_space_ids[NUM_DATASETS];

    for (size_t i = 0; i < NUM_DATASETS; i++)
    {
        dspace_ids[i] = H5Screate_simple(1, (hsize_t[]){NUM_INTEGERS}, NULL);

        char dset_name[FILENAME_BUFFER_SIZE];
        snprintf(dset_name, FILENAME_BUFFER_SIZE, "%s%zu", "dset", i);

        if ((dset_ids[i] = H5Dcreate(file, dset_name, H5T_NATIVE_INT, dspace_ids[i],
                                     H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0)
        {
            ERROR("Failed to create dataset");
        }

        type_ids[i] = H5T_NATIVE_INT;
        sel_space_ids[i] = H5S_ALL;

        if ((write_data[i] = calloc(NUM_INTEGERS, sizeof(int))) == NULL)
        {
            ERROR("Failed to allocate memory");
        }

        if ((read_data[i] = calloc(NUM_INTEGERS, sizeof(int))) == NULL)
        {
            ERROR("Failed to allocate memory");
        }

        for (size_t j = 0; j < NUM_INTEGERS; j++)
            write_data[i][j] = i * 10000 + j;
    }

    LOG("Writing data...");

    if (H5Dwrite_multi(NUM_DATASETS, dset_ids, type_ids, sel_space_ids, sel_space_ids,
                       H5P_DEFAULT, (const void **)write_data) < 0)
    {
        ERROR("Failed to perform multi write");
    }

    for (size_t i = 0; i < SCROLL_AMNT; i++)
        fprintf(stderr, "\n");

    LOG("Write complete. Reading data...");
    fflush(stderr);

    if (H5Dread_multi(NUM_DATASETS, dset_ids, type_ids, sel_space_ids, sel_space_ids,
                      H5P_DEFAULT, (void **)read_data) < 0)
    {
        ERROR("Failed to perform multi read");
    }

    for (size_t i = 0; i < SCROLL_AMNT; i++)
        fprintf(stderr, "\n");

    LOG("Verifying read correctness...");

    for (size_t i = 0; i < NUM_DATASETS; i++)
        for (size_t j = 0; j < NUM_INTEGERS; j++)
        {
            if (read_data[i][j] != write_data[i][j])
            {
                fprintf(stderr, "At read_data[%zu][%zu], %d != expected %d\n",
                        i, j, read_data[i][j], write_data[i][j]);
                exit(1);
            }
        }

    LOG("Multi read/write complete!")

    H5Pclose(fapl);
    H5Fclose(file);

    LOG("Terminating REST VOL...");
    H5rest_term();

    return 0;
}

In [None]:
%%bash
gcc -I/home/vscode/.local/include -L/home/vscode/.local/lib ./src/restvol.c -o ./build/restvol -lhdf5 -lhdf5_vol_rest -lcurl -ldl -Wl,-rpath=/home/vscode/.local/lib
export HSDS_USERNAME=vscode
export HSDS_PASSWORD=vscode
export HSDS_ENDPOINT=http://localhost:5101
./build/restvol