# Trajectory featurization and metrics calculation

In [1]:
import sys
import numpy as np
sys.path.append("..")

In [2]:
from proteka.metrics.utils  import generate_grid_polymer
from proteka.dataset  import Ensemble, Quantity
from proteka.metrics import StructuralIntegrityMetrics, EnsembleQualityMetrics
from proteka.metrics import Featurizer

### Generate a test ensemble

for demonstration, will generate a trajectories, that consist from atoms placed on a cubic grid. The bond length is constant and equal to `grid_size`. Self-crossings and clashes are allowed

In [3]:
grid_size = 0.4
n_frames = 1000
n_atoms = 5
traj_target = generate_grid_polymer(n_frames=n_frames, n_atoms=n_atoms, grid_size=grid_size)
target = Ensemble("CAgrid", traj_target.top, Quantity(traj_target.xyz, "nm"))

traj_reference = generate_grid_polymer(n_frames=n_frames, n_atoms=n_atoms, grid_size=grid_size)
reference = Ensemble("CAgrid", traj_reference.top, Quantity(traj_reference.xyz, "nm"))

In [4]:
reference.list_quantities()

['coords', 'top']

To add features to an ensemble, one need to create a Featurizer object and pass it a corresponding ensemble. A new features are calculated and added to the ensemble

In [5]:
### Trajectory featurization
featurizer = Featurizer(reference)
featurizer.add_ca_bonds()
featurizer.add_end2end_distance()
reference.list_quantities()

['coords', 'top', 'ca_bonds', 'end2end_distance']

In [6]:
sim = StructuralIntegrityMetrics()
sim(target)

{'N clashes': 597, 'max z-score': 0.4000008, 'rms z-score': 0.40000024}

In [7]:
eqm = EnsembleQualityMetrics()

In [8]:
eqm(target, reference=reference)

{'d end2end, KL divergence': 0.01533468984195132,
 'Rg, KL divergence': 0.02755840409977273,
 'CA distance, KL divergence': 0.0015145113020085257,
 'CA distance, JS divergence': 0.00035620680646996863,
 'TICA, KL divergence': None,
 'TICA, JS divergence': None}