# Example 4: Reading and Writing Benchmark

## Hardware Specification for Rerun

Desktop workstation with 2x (AMD EPYC 7702 64-Core) with total of 128 physical and 256 logical cores, 1024 GB DDR4 with Ubuntu 22.04 LTS operating system.

In [1]:
# This script is to show the process of timing measurement
import molli as ml
import timeit

N = 3

# ml.aux.assert_molli_version_min("1.0.0b2")

_clib = ml.ConformerLibrary("bpa_aligned.clib")

# Loading all ensembles into memory
# So that we are only testing the writing speeds
with _clib.reading():
    ensembles = {k: v for k, v in _clib.items()}

These are the varying collection formats available in the back end of `molli`

In [2]:

def _dir_col(path, overwrite=False):
    return ml.storage.Collection(
        path,
        backend=ml.storage.DirCollectionBackend,
        value_decoder=lambda x: ml.ConformerEnsemble.loads_mol2(x.decode()),
        value_encoder=lambda x: ml.ConformerEnsemble.dumps_mol2(x).encode(),
        ext=".mol2",
        readonly=False,
        overwrite=overwrite,
    )


def _zip_col(path, overwrite=False):
    return ml.storage.Collection(
        path,
        backend=ml.storage.ZipCollectionBackend,
        value_decoder=lambda x: ml.ConformerEnsemble.loads_mol2(x.decode()),
        value_encoder=lambda x: ml.ConformerEnsemble.dumps_mol2(x).encode(),
        ext=".mol2",
        readonly=False,
        overwrite=overwrite,
    )


def _ukv_col(path, overwrite=False):
    return ml.ConformerLibrary(
        path,
        readonly=False,
        overwrite=overwrite,
    )

## Writing Times

In [3]:
# Note: bpa_test_deflate5.zip is not here as you cannot write into the compressed format
for prep, path in (
    (_ukv_col, "bpa_test.clib"),
    (_zip_col, "bpa_test.zip"),
    (_dir_col, "bpa_test"),
):

    clib_write_times = timeit.Timer(
        stmt="""with library.writing():\n    for k, v in ensembles.items(): library[k]=v""",
        setup="""library = prep(path, overwrite=True)""",
        globals=globals(),
    ).repeat(5, number=1)

    print("Writing times", path, min(clib_write_times), clib_write_times, flush=True)


Writing times bpa_test.clib 0.3480684249952901 [0.3782551420008531, 0.34833620399876963, 0.35106149199418724, 0.3510784639947815, 0.3480684249952901]
Writing times bpa_test.zip 93.47247516000061 [93.87680340299266, 94.93543296400458, 93.47247516000061, 93.61168274100055, 94.20502133599075]
Writing times bpa_test 93.41699376401084 [94.56352856599551, 93.70587370399153, 93.74676171099418, 93.41699376401084, 96.13699470600113]


## Reading Times

In [4]:
# Note: bpa_test_deflate5.zip is written from the compressed "bpa_test" directory created after the first one
for prep, path in (
    (_ukv_col, "bpa_test.clib"),
    (_zip_col, "bpa_test.zip"),
    (_zip_col, "bpa_test_deflate5.zip"),
    (_dir_col, "bpa_test"),
):
    clib_read_times = timeit.Timer(
        stmt="""with library.reading():\n    for k, v in library.items(): pass""",
        setup="""library = prep(path, overwrite=False)""",
        globals=globals(),
    ).repeat(5, number=1)

    print("Read times", path, min(clib_read_times), clib_read_times, flush=True)

Read times bpa_test.clib 1.0547445749980398 [1.0947346960019786, 1.0660112299956381, 1.0552394859987544, 1.0547445749980398, 1.0610972559952643]
Read times bpa_test.zip 154.67337175300054 [154.67337175300054, 166.21870341600152, 157.39443366299383, 160.86328307799704, 163.08083603800333]
Read times bpa_test_deflate5.zip 160.31052669300698 [160.31052669300698, 168.26776486101153, 162.00970906899602, 172.00367274400196, 174.04174041900842]
Read times bpa_test 162.5466837839922 [162.5466837839922, 171.23339160499745, 165.0368733159994, 165.7297141720046, 167.20620242299628]
