In [1]:
import loompy
import numpy as np
import h5py
import random
import time

# open file with custom chunk cache settings
def open_HDF5(filename, cache=1, libver='earliest'):
    propfaid = h5py.h5p.create(h5py.h5p.FILE_ACCESS)
    settings = list(propfaid.get_cache())
    # will return default: [0, 521, 1048576, 0.75]

    # increase cache:
    settings[2] = 1024*1024*cache
    propfaid.set_cache(*settings)
    settings = propfaid.get_cache()
    #print(settings)
    fid = h5py.h5f.open(filename, flags=h5py.h5f.ACC_RDWR, fapl=propfaid)
    return h5py.File(fid, libver)

# benchmark random access time
def time_random_access(hdf5_file, times=100):
    rmax = hdf5_file["/matrix"].shape[0]-1
    start = time.perf_counter()
    for t in range(0, times):
        idx = random.randint(0, rmax)
        hdf5_file['/matrix'][idx,:]
    end = time.perf_counter()
    print("%f s" % (end-start))
    print("%f s" % (end-start))
    
# benchmark sequential access time (fancy indexing)
def time_sequential_access(hdf5_file, rows=1000, output=[]):
    rmax = hdf5_file["/matrix"].shape[0]-1
    start = time.perf_counter()
    hdf5_file['/matrix'][0:rows,:]
    end = time.perf_counter()
    output.append("%f s" % (end-start))
    
def bench_random_access(filename, rows=10):
    for i in range(0, 10):
        output.append("cache size: %d" % (1<<i))
        file = open_HDF5(filename, 1<<i)
        output.append("row length: %d" % (hdf5_file["/matrix"].shape[1]-1))
        time_random_access(file, rows)
        file.close()
        return output
        
        
def bench_sequential_access(filename, rows=100):
    output = []
    file = open_HDF5(filename, 1)
    output.append("row length: %d" % (file["/matrix"].shape[1]-1))
    file.close()

    output.append("cache size: 0")
    file = open_HDF5(filename, 0)
    time_sequential_access(file, rows, output)
    file.close()

    
    for i in range(0, 10):
        output.append("cache size: %d" % (1<<i))
        file = open_HDF5(filename, 1<<i)
        time_sequential_access(file, rows, output)
        file.close()
    for line in output:
        print(line)

In [11]:
filename = b"/home/job/loom-datasets/Published/cortex.loom"
bench_sequential_access(filename, 25000)

row length: 3004
cache size: 0
0.340922 s
cache size: 1
0.331973 s
cache size: 2
0.314102 s
cache size: 4
0.302156 s
cache size: 8
0.305439 s
cache size: 16
0.297736 s
cache size: 32
0.304083 s
cache size: 64
0.305899 s
cache size: 128
0.301489 s
cache size: 256
0.301215 s
cache size: 512
0.298512 s


In [13]:
filename = b"/home/job/loom-datasets/Build 161109/Forebrain_E9-E18.5.loom"
bench_sequential_access(filename, 1000)

row length: 44871
cache size: 0
0.779106 s
cache size: 1
0.776456 s
cache size: 2
0.794281 s
cache size: 4
0.778804 s
cache size: 8
0.790101 s
cache size: 16
0.809911 s
cache size: 32
0.792409 s
cache size: 64
0.818961 s
cache size: 128
0.783704 s
cache size: 256
0.775080 s
cache size: 512
0.837825 s


In [None]:
filename = b"/home/job/loom-datasets/Adolescent2/Oligos_All.loom"
bench_sequential_access(filename, 10000)

In [9]:
file = open_HDF5(b"/home/job/loom-datasets/Build 161109/Forebrain_E9-E18.5.loom", 100, 'latest')
matrix = file["/matrix"]

start = time.perf_counter()
for i in range(0, 10):
    t = matrix[i*100,(i+1)*100:]
end = time.perf_counter()
print(end-start)

start = time.perf_counter()
for i in range(0, 10):
    t = file["/matrix"][i*100,(i+1)*100:]
end = time.perf_counter()
print(end-start)

file.close()

0.4201705899999979
0.4296598740002082


In [4]:
filename = b"/home/job/loom-datasets/Buggy/test_loom.loom"
file = open_HDF5(filename, 100, 'latest')

In [2]:
filename = b"/home/job/loom-datasets/Published/cortex.loom"
ds = loompy.connect(filename)
ds.close()
ds

In [20]:
from IPython.display import display, HTML

out = None
ds2 = None
with loompy.connect(filename) as ds:
    output = ds._repr_html_()
    ds2 = ds
display(HTML(output))
ds2

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27
,,,,,,,,,,,,,,,,Age,21.0,20.0,20.0,21.0,25.0,20.0,25.0,23.0,21.0,21.0,...
,,,,,,,,,,,,,,,,BackSPIN_level_0_group,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...
,,,,,,,,,,,,,,,,BackSPIN_level_1_group,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...
,,,,,,,,,,,,,,,,BackSPIN_level_2_group,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...
,,,,,,,,,,,,,,,,BackSPIN_level_3_group,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...
,,,,,,,,,,,,,,,,CellID,1772071015_C02,1772071017_G12,1772071017_A05,1772071014_B06,1772067065_H06,1772071017_E02,1772067065_B07,1772067060_B09,1772071014_E04,1772071015_D04,...
,,,,,,,,,,,,,,,,Class,interneurons,interneurons,interneurons,interneurons,interneurons,interneurons,interneurons,interneurons,interneurons,interneurons,...
,,,,,,,,,,,,,,,,Diameter,0.0,9.56,11.1,11.7,11.0,11.9,11.3,10.9,12.9,11.2,...
,,,,,,,,,,,,,,,,Group,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...
,,,,,,,,,,,,,,,,Sex,1.0,-1.0,-1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,...


ValueError: Not a location (Invalid object id)

<loompy.loompy.LoomConnection at 0x7f50ac6e76d8>