In [None]:
import sys
sys.path.append("..")
import proteka

In [None]:
import numpy as np
import mdtraj as md

In [None]:
# ala2 test data
ala2_top = md.load_topology("./example_dataset_files/ala2_all_atom.pdb")
dts = np.load("./example_dataset_files/ala2_all_atom.npz")
coords_raw = dts["coords"] # unit: A
forces_raw = dts["forces"] # unit: kcal/mol/A
time_raw = np.arange(2000) * 500 # unit: ps

In [None]:
from proteka.quantity import Quantity # for wrapping arrays with non-standard units

## Simple use case:

In [None]:
# init an Ensemble object
## with quantities and metadata
ens = proteka.dataset.Ensemble("ala2", ala2_top, Quantity(coords_raw, "A"), 
                               quantities={"forces": Quantity(forces_raw, "kcal/mol/A")},
                               metadata={"forcefield": "AMBER ff99SB-ILDN"})

In [None]:
# __repr__
ens, ens.list_quantities()

In [None]:
# topology
ens.top

In [None]:
# other stuff
ens.n_frames, ens.n_atoms

In [None]:
# retrieve coords and forces
print(ens.coords.shape, ens.get_unit("coords"))
print(ens.forces.shape, ens.get_unit("forces"))

## Get `mdtraj.Trajectory`

In [None]:
# get a mdtraj.Trajectory for all data available
ens.get_all_in_one_mdtraj()

In [None]:
## when unit cell information was saved, the mdtraj will automatically include them
ens.cell_lengths = np.repeat([2.7222, 2.7222, 2.7222], 2000).reshape([2000, 3]).astype(np.float32)
ens.cell_angles = np.repeat(90.0, 6000).reshape([2000, 3]).astype(np.float32)
t = ens.get_all_in_one_mdtraj()
t.unitcell_vectors[1]

In [None]:
# when data comes from multiple trajectories
ens.register_trjs({
    "trj0": slice(0, 500),
    "trj1": slice(500, 1000),
    "trj2": slice(1000, 1500),
    "trj3": slice(1500, 2000),
})
ens.n_trjs, ens.trj_n_frames
# this info will be tracked by HDF5 file automatically, just as other `Quantity`s

In [None]:
## independent trajectories can be retrieved
ens.get_mdtrajs()

## Unit and Quantity

In [None]:
# unit system: [L]ength, [M]ass, [T]ime and [E]nergy
print("Unit system:", ens.unit_system)
print()
# they will be used for default unit of the builtin quantities (also for storage)
print("Builtin quantities (shape and units):")
ens.unit_system.builtin_quantities

In [None]:
# Init a builtin quantity with builtin quantity
print("Builtin unit for velocities:", ens.get_unit("velocities"))
velos_raw = np.random.rand(2000, 22, 3) * 100. # assume they are in unit A/ps
## it can be as simple as this
ens.velocities = Quantity(velos_raw, "A/ps")

In [None]:
## or when you are sure the unit is compatible
ens.velocities = velos_raw / 10.

In [None]:
# Retrieve a Quantity
forces_q = ens["forces"]
forces_q

In [None]:
## this is useful when we need the value in another unit instead of the storage one
force_q_in_original_unit = forces_q.in_unit_of("kcal/mol/A")
assert np.allclose(force_q_in_original_unit, forces_raw)

In [None]:
# special Quantity: strings
ens.some_string = "Hello, World!"
print("Type:", ens["some_string"])
print("Content:", ens.some_string)

In [None]:
## insider note: `top` and `trjs` are handled as special Quantity of serialized JSON strings

## Metadata

In [None]:
# metadata
# limited by the HDF5 format, max 64KB!
# best practice: using pure string or serialized JSONs, etc
ens.metadata["temperature_in_K"] = 300
ens.metadata

## Save to and load from a HDF5 file

In [None]:
# write everything so far to a HDF5 group
import h5py
with h5py.File("./example_dataset_files/temp_storage.h5", "w") as fo:
    ens.write_to_hdf5(fo, ens.name)

In [None]:
# read it back
with h5py.File("./example_dataset_files/temp_storage.h5", "r") as fo:
    ens2 = proteka.dataset.Ensemble.from_hdf5(fo["ala2"])

In [None]:
# do your own checks to see whether everything is in place
ens2, ens2["coords"]

In [None]:
# special case: unit conversion is automatic, 
# when the desired unit system is not the same as the one for storage
with h5py.File("./example_dataset_files/temp_storage.h5", "r") as fo:
    ens3 = proteka.dataset.Ensemble.from_hdf5(fo["ala2"],
                                              unit_system='A-g/mol-ps-kcal/mol')
assert np.allclose(ens3.forces, forces_raw)

In [None]:
# cleanup
import os
os.unlink("./example_dataset_files/temp_storage.h5")