In [11]:
##
##
## Please start this after you install HDF5 hdf5-1.10.7  in your system correctly
## Don't use the HDf5 1.12.* since its filter function is broken 
##

#Comment out below two lines to install h5py
#import sys
#!{sys.executable} -m pip install h5py



import os
import h5py


##das_example.h5 is subset of westSac_180104233031.tdms.h5, channel 99-120
orignial_das_file="./das_example.h5"
compressed_das_file="./das_example_compressed.h5"

# open orginal file and dataset
das_f = h5py.File(orignial_das_file, 'r')
das_dset_orig = das_f['/Acoustic']


# 62016 is the ID of H5TurboPFor
compression_id=62016
# arguments for H5TurboPFor
## - compression_args[0]: data element type 0 - short type ; 
## - compression_args[1]: pre-processing method zipzag (1), abs(2), plusabsmin (3) 
## - compression_args[2-3]: chunk size
compression_args=(0, 1, das_dset_orig.shape[0], das_dset_orig.shape[1])
print(compression_args)

#create compresed file and write data, the compression happens within the write files
das_f_compressed = h5py.File(compressed_das_file, "w")
das_dset_compressed = das_f_compressed.create_dataset("/Acoustic", chunks=das_dset_orig.shape, data=das_dset_orig, compression=compression_id, compression_opts=compression_args)

#close files
das_f_compressed.close()
das_f.close()

#inspect the result
##First, let's see the compression ratio with "h5dump -pH"
##       it is 1.611:1 
print("\n======================================")
print("Let's see the compression ratio : ")
print("======================================")
!h5dump -pH ./das_example_compressed.h5

##Then, we can compare the differennce between the orginal file and the compressed file
##      with the h5diff command
##          "0 differences found" because  H5TurboPFor is lossless compression
##      Note: 
##         (0) h5diff command compare data element by element
##         (1) This step calls the decompression method automatically
##         (2) It may call decompression multiple times because the h5diff read data in small chunk.
print("\n======================================")
print("Let's verify the results: ")
print("======================================")

!h5diff -v das_example_compressed.h5 das_example.h5



#Troubleshooting:
#
#        ValueError: Unknown compression filter number: 62016
#
#Please restart the jupyter-notebook after run "source setup.sh" in Termial (not within jupyter-notebook )

# > cd ...../H5TurboPFor
# > source setup.sh
# > jupyter-notebook



(0, 1, 30000, 21)

Let's see the compression ratio : 
HDF5 "./das_example_compressed.h5" {
GROUP "/" {
   DATASET "Acoustic" {
      DATATYPE  H5T_STD_I16LE
      DATASPACE  SIMPLE { ( 30000, 21 ) / ( 30000, 21 ) }
      STORAGE_LAYOUT {
         CHUNKED ( 30000, 21 )
         SIZE 668260 (1.885:1 COMPRESSION)
      }
      FILTERS {
         USER_DEFINED_FILTER {
            FILTER_ID 62016
            COMMENT TurboPFor-Integer-Compression: https://github.com/dbinlbl/H5TurboPFor
            PARAMS { 0 1 30000 21 }
         }
      }
      FILLVALUE {
         FILL_TIME H5D_FILL_TIME_ALLOC
         VALUE  H5D_FILL_VALUE_DEFAULT
      }
      ALLOCATION_TIME {
         H5D_ALLOC_TIME_INCR
      }
   }
}
}

Let's verify the results: 

file1     file2
---------------------------------------
    x      x    /              
    x      x    /Acoustic      

group  : </> and </>
0 differences found
dataset: </Acoustic> and </Acoustic>
0 differences found
