# Trajectory featurization and metrics calculation

In [27]:
import sys
sys.path.append("..")


import numpy as np

In [28]:
from proteka.metrics.utils  import generate_grid_polymer
from proteka.dataset  import Ensemble, Quantity
from proteka.metrics import StructuralIntegrityMetrics, EnsembleQualityMetrics
from proteka.metrics import Featurizer

### Generate a test ensemble

for demonstration, will generate a trajectories, that consist from atoms placed on a cubic grid. The bond length is constant and equal to `grid_size`. Self-crossings and clashes are allowed

In [31]:
grid_size = 0.38
n_frames = 1000
n_atoms = 5
traj_target = generate_grid_polymer(n_frames=n_frames, n_atoms=n_atoms, grid_size=grid_size)
target = Ensemble("CAgrid", traj_target.top, Quantity(traj_target.xyz, "nm"))

traj_reference = generate_grid_polymer(n_frames=n_frames, n_atoms=n_atoms, grid_size=grid_size)
reference = Ensemble("CAgrid", traj_reference.top, Quantity(traj_reference.xyz, "nm"))

In [32]:
reference.list_quantities()

['coords', 'top']

To add features to an ensemble, one need to create a Featurizer object and pass it a corresponding ensemble. A new features are calculated and added to the ensemble

In [33]:
### Trajectory featurization
featurizer = Featurizer(reference)
featurizer.add_ca_bonds()
featurizer.add_end2end_distance()
reference.list_quantities()

['coords', 'top', 'ca_ca_pseudobonds', 'ca_ca_end2end_distance']

In [34]:
sim = StructuralIntegrityMetrics()
# TO DO: implement selection of metrics to compute
sim(target)

{'N clashes': 5417, 'max z-score': 0.0, 'rms z-score': 0.0}

In [35]:
eqm = EnsembleQualityMetrics()

In [36]:
eqm(target, reference=reference)

{'d end2end, KL divergence': 0.008117496638929756}