In [120]:
import time
from tempfile import TemporaryFile

import numpy
import h5py

import qcodes
from qcodes import (
    initialise_or_create_database_at, load_or_create_experiment, 
    Measurement, Parameter,
    load_by_id
)
from qcodes.dataset.data_export import get_data_by_id

In [17]:
temp_db_file = TemporaryFile(suffix='.db')
temp_db_file.close()
initialise_or_create_database_at(temp_db_file.name)
load_or_create_experiment('save_load_speed_benchmarking', 'sqlite3_from_qcodes')

save_load_speed_benchmarking#sqlite3_from_qcodes#1@C:\Users\a-miasta\AppData\Local\Temp\tmpouuqbdsp.db
------------------------------------------------------------------------------------------------------

# Saving and loading speed

Let's define an iterator function that simulates a measurement process by just returning dummy data of a predefined shape.

In [18]:
n_pts_in_dim = 20

In [19]:
def produce_measurement_data(n_pts_in_dim):
    """
    This iterator represents the code that obtains
    measurement data. For the sake of example, it
    just returns random dummy data: 4 dimensions, 
    `n_pts_in_dim` per each dimension (which becomes
    `n_pts_in_dim**4` data points).
    
    Args:
        n_pts_in_dim
    
    Returns:
        tuple of values of the 4 dimensions obtain
        at a single "measurement" iteration
    """
    for s1_val in range(n_pts_in_dim):
        for s2_val in range(n_pts_in_dim):
            magn_vals, phas_vals = np.meshgrid(
                np.random.rand(n_pts_in_dim),
                np.random.rand(n_pts_in_dim),
            )
            magn_vals = np.reshape(magn_vals, -1)
            phas_vals = np.reshape(phas_vals, -1)
            
            yield s1_val, s2_val, magn_vals, phas_vals

## Via QCoDeS DataSet

In [20]:
s1 = Parameter('s1', label='Setting 1', unit='V', get_cmd=None, set_cmd=None)
s2 = Parameter('s2', label='Setting 2', unit='V', get_cmd=None, set_cmd=None)
magn = Parameter('magn', label='Magnitude', unit='V', get_cmd=None, set_cmd=None)
phas = Parameter('phas', label='Phase', unit='deg', get_cmd=None, set_cmd=None)

### with 'numeric' type

In [84]:
meas = Measurement()

meas.register_parameter(s1)
meas.register_parameter(s2)
meas.register_parameter(magn, setpoints=(s1, s2))
meas.register_parameter(phas, setpoints=(s1, s2))

In [85]:
meas.write_period = 10

In [86]:
%%timeit

with meas.run() as datasaver:
    
    t0_dataset_numeric = time.perf_counter()
    
    for s1_val, s2_val, magn_vals, phas_vals in produce_measurement_data(n_pts_in_dim):
        
        datasaver.add_result((s1, s1_val), (s2, s2_val), (magn, magn_vals),
                             (phas, phas_vals))
    
t1_dataset_numeric = time.perf_counter()
print(f"Data saving to dataset with 'numeric' paramtype took {t1_dataset_numeric-t0_dataset_numeric} s")

dataset = datasaver.dataset
run_id = dataset.run_id

Starting experimental run with id: 44
Data saving to dataset with 'numeric' paramtype took 3.8605869999992137 s
Starting experimental run with id: 45
Data saving to dataset with 'numeric' paramtype took 4.151191499999186 s
Starting experimental run with id: 46
Data saving to dataset with 'numeric' paramtype took 4.8023025000002235 s
Starting experimental run with id: 47
Data saving to dataset with 'numeric' paramtype took 5.033625399999437 s
Starting experimental run with id: 48
Data saving to dataset with 'numeric' paramtype took 5.1586685000002035 s
Starting experimental run with id: 49
Data saving to dataset with 'numeric' paramtype took 4.366736900001342 s
Starting experimental run with id: 50
Data saving to dataset with 'numeric' paramtype took 4.045378900000287 s
Starting experimental run with id: 51
Data saving to dataset with 'numeric' paramtype took 3.8398373999989417 s
4.65 s ± 483 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [121]:
dataset = load_by_id(44)

In [122]:
%%timeit
# returns list of rows, as in sqlite (each row is a list where every item corresponds to the column)
# data = dataset.get_data(*datasaver.dataset.parameters.split(','))
data = dataset.get_data(*dataset.parameters.split(','))

2.53 s ± 71 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [82]:
# another way is to use get_values and obtain values of each parameter one by one;
# but get_data_by_id uses it already, so let's not repeat it
# data = dataset.get_values

In [88]:
%%timeit
# returns list of lists of dicts with data and metadata;
# the first list contains elements for each dependent parameter;
# second list contains all the independent parameters, 
# and the last element is the dependent parameter
data = get_data_by_id(datasaver.dataset.run_id)

330 ms ± 6.19 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [118]:
%%timeit
data = get_data_by_id(44)

6.85 s ± 1.25 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


### with 'array' type

In [89]:
meas = Measurement()

meas.register_parameter(s1)
meas.register_parameter(s2)
meas.register_parameter(magn, setpoints=(s1, s2), paramtype='array')
meas.register_parameter(phas, setpoints=(s1, s2), paramtype='array')

In [90]:
meas.write_period = 10

In [94]:
# %%timeit

with meas.run() as datasaver:
    
    t0_dataset_array = time.perf_counter()
    
    for s1_val, s2_val, magn_vals, phas_vals in produce_measurement_data(n_pts_in_dim):
        
        datasaver.add_result((s1, s1_val), (s2, s2_val), (magn, magn_vals),
                             (phas, phas_vals))
    
t1_dataset_array = time.perf_counter()
print(f"Data saving to dataset with 'array' paramtype took {t1_dataset_array-t0_dataset_array} s")

dataset = datasaver.dataset
run_id = dataset.run_id

Starting experimental run with id: 60
Data saving to dataset with 'array' paramtype took 0.3064309000001231 s


In [123]:
dataset = load_by_id(60)

In [124]:
%%timeit
# returns list of rows, as in sqlite (each row is a list where every item corresponds to the column)
data = dataset.get_data(*dataset.parameters.split(','))

264 ms ± 5.77 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [82]:
# another way is to use get_values and obtain values of each parameter one by one;
# but get_data_by_id uses it already, so let's not repeat it
# data = dataset.get_values

In [125]:
%%timeit
# returns list of lists of dicts with data and metadata;
# the first list contains elements for each dependent parameter;
# second list contains all the independent parameters, 
# and the last element is the dependent parameter
# data = get_data_by_id(datasaver.dataset.run_id)
data = get_data_by_id(dataset.run_id)

334 ms ± 6.45 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Via numpy npy file

In [109]:
# %%timeit

outfile = TemporaryFile()
outfile.close()

t0_npy = time.perf_counter()

results_np = np.zeros((4, n_pts_in_dim**4))
last_index = 0

for s1_val, s2_val, magn_vals, phas_vals in produce_measurement_data(n_pts_in_dim):
    
    n_pts = len(magn_vals)

    results_np[0, last_index:last_index+n_pts] = s1_val
    results_np[1, last_index:last_index+n_pts] = s2_val
    results_np[2, last_index:last_index+n_pts] = magn_vals
    results_np[3, last_index:last_index+n_pts] = phas_vals

    last_index += n_pts
            
np.save(outfile.name, results_np)

t1_npy = time.perf_counter()
print(f"Data saving to numpy npy file took {t1_npy-t0_npy} s")

Data saving to numpy npy file took 0.05319410000083735 s


In [110]:
%%timeit
data = np.load(outfile.name+'.npy')

5.88 ms ± 901 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Via numpy npy file and memmap

In [111]:
# %%timeit

outfile = TemporaryFile()
outfile.close()

t0_npy_mm = time.perf_counter()

results_mm = np.lib.format.open_memmap(
    outfile.name, mode='w+', shape=(4, n_pts_in_dim**4))

last_index = 0

for s1_val, s2_val, magn_vals, phas_vals in produce_measurement_data(n_pts_in_dim):
    
    n_pts = len(magn_vals)

    results_mm[0, last_index:last_index+n_pts] = s1_val
    results_mm[1, last_index:last_index+n_pts] = s2_val
    results_mm[2, last_index:last_index+n_pts] = magn_vals
    results_mm[3, last_index:last_index+n_pts] = phas_vals

    last_index += n_pts
            
del results_mm  # closes the file and performs final flushing

t1_npy_mm = time.perf_counter()
print(f"Data saving to numpy npy file via memory map took {t1_npy_mm-t0_npy_mm} s")

Data saving to numpy npy file via memory map took 0.06714089999877615 s


In [113]:
%%timeit
data = np.load(outfile.name)

4.85 ms ± 513 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Via hdf5 file (directly to the file)

In [116]:
# %%timeit

outfile = TemporaryFile()
outfile.close()

t0_hdf5_2 = time.perf_counter()

last_index = 0

with h5py.File(outfile.name, 'w') as f:
    ds = f.create_dataset('results', shape=(4, n_pts_in_dim**4))

    for s1_val, s2_val, magn_vals, phas_vals in produce_measurement_data(n_pts_in_dim):

        n_pts = len(magn_vals)

        ds[0, last_index:last_index+n_pts] = s1_val
        ds[1, last_index:last_index+n_pts] = s2_val
        ds[2, last_index:last_index+n_pts] = magn_vals
        ds[3, last_index:last_index+n_pts] = phas_vals

        last_index += n_pts    

t1_hdf5_2 = time.perf_counter()
print(f"Data saving to hdf5 file took {t1_hdf5_2-t0_hdf5_2} s")

Data saving to hdf5 file took 0.4443922000009479 s


In [128]:
%%timeit
with h5py.File(outfile.name, 'r') as f:
    data = np.array(f['results'], copy=True)

5.04 ms ± 84.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Via hdf5 file (from allocated numpy array)

In [34]:
%%timeit

outfile = TemporaryFile()
outfile.close()

t0_hdf5 = time.perf_counter()

results_np = np.zeros((4, n_pts_in_dim**4))
last_index = 0

for s1_val, s2_val, magn_vals, phas_vals in produce_measurement_data(n_pts_in_dim):
            
    n_pts = len(magn_vals)

    results_np[0, last_index:last_index+n_pts] = s1_val
    results_np[1, last_index:last_index+n_pts] = s2_val
    results_np[2, last_index:last_index+n_pts] = magn_vals
    results_np[3, last_index:last_index+n_pts] = phas_vals

    last_index += n_pts
            
with h5py.File(outfile.name, 'w') as f:
    ds = f.create_dataset('results', data=results_np)

t1_hdf5 = time.perf_counter()
print(f"Data saving to hdf5 file took {t1_hdf5-t0_hdf5} s")

Data saving to hdf5 file took 0.04720199999997021 s
Data saving to hdf5 file took 0.061583200000086435 s
Data saving to hdf5 file took 0.04233099999987644 s
Data saving to hdf5 file took 0.04147600000010243 s
Data saving to hdf5 file took 0.04036139999993793 s
Data saving to hdf5 file took 0.06866820000004736 s
Data saving to hdf5 file took 0.041715699999940625 s
Data saving to hdf5 file took 0.04046949999997196 s
Data saving to hdf5 file took 0.040578400000185866 s
Data saving to hdf5 file took 0.039881400000012945 s
Data saving to hdf5 file took 0.04231320000008054 s
Data saving to hdf5 file took 0.059740500000089014 s
Data saving to hdf5 file took 0.042064800000161995 s
Data saving to hdf5 file took 0.04004609999992681 s
Data saving to hdf5 file took 0.04069900000013149 s
Data saving to hdf5 file took 0.04015419999996084 s
Data saving to hdf5 file took 0.05341750000002321 s
Data saving to hdf5 file took 0.05221260000007533 s
Data saving to hdf5 file took 0.051638200000070356 s
Data 