# Using `tables_io` to write to an hdf5 file from multiple places

In [20]:
# Standard imports
import os
from collections import OrderedDict
import numpy as np
import tables_io
#from ..tests.testUtils import make_test_data
import astropy.table as apTable
from mpi4py import MPI

## Set up 

### Some test data.

Ok, lets make some test data write it to an hdf5 file.

In [13]:
def make_test_data():
    nrow = 1000
    vect_size = 20
    mat_size = 5
    scalar = np.random.uniform(size=nrow)
    vect = np.random.uniform(size=nrow * vect_size).reshape(nrow, vect_size)
    matrix = np.random.uniform(size=nrow * mat_size * mat_size).reshape(nrow, mat_size, mat_size)
    data = dict(scalar=scalar, vect=vect, matrix=matrix)
    table = apTable.Table(data)
    table.meta["a"] = 1
    table.meta["b"] = None
    table.meta["c"] = [3, 4, 5]
    small_table = apTable.Table(dict(a=np.ones(21), b=np.zeros(21)))
    small_table.meta["small"] = True
    return(dict(data=table, md=small_table))

In [14]:
data = make_test_data()

### Some utility functions to get the output data shape

In [10]:
def get_shapes_and_type(aDict):
    return OrderedDict([(key, (val.shape, val.dtype)) for key, val in aDict.items()])

def modify_shape(shapeIn, length):
    shapeList = list(shapeIn)
    shapeList[0] = length
    return tuple(shapeList)

def set_lengths(aDict, length):
    return OrderedDict([(key, (modify_shape(val[0], length), val[1])) for key, val in aDict.items()])        

## Create the file

### Here we get the output shapes and data types

In [15]:
dout = {'data':set_lengths(get_shapes_and_type(data['data']), 10000)}

In [16]:
print(dout)

{'data': OrderedDict({'scalar': ((10000,), dtype('float64')), 'vect': ((10000, 20), dtype('float64')), 'matrix': ((10000, 5, 5), dtype('float64'))})}


### Here we allocate the file

In [17]:
os.unlink('test_multi_write.hdf5')
from tables_io import hdf5
groups, fout = hdf5.initialize_HDF5_write('test_mpi_write.hdf5',comm=MPI.COMM_WORLD, **dout)

## Write the file

### Here we fill the file

In [18]:
for i in range(10):
    data = make_test_data()
    start = i*1000
    end = (i+1)*1000
    hdf5.write_dict_to_HDF5_chunk(groups, data, start, end)

### Here write the metadata and close the file

In [19]:
hdf5.finalize_HDF5_write(fout, 'md', **data['md'])

## Test the file

### Here we read the file in chunks

In [None]:
for chunks in tables_io.io.iterHdf5ToDict('test_multi_write.hdf5', 1000, 'data'):
    print("%i:%i %s %s %s" % (chunks[0], chunks[1], 
                              chunks[2]['matrix'].shape, chunks[2]['vect'].shape, chunks[2]['scalar'].shape))