This notebook should be run separately in order to ensure eficient memory usage. \
The result can then be imported to other notebook for visualization purpose

In [1]:
# Import SBMOpenMM library
import sbmOpenMM

# Import PyEMMA library
import pyemma.util.contexts

# Import ploting tools
%matplotlib inline
import matplotlib.pyplot as plt

# Import numpy 
import numpy as np

# Import system library
import os

### Get trajectories

In [2]:
# Store aligned trajectory files
folder = '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/'
trajectory_files = []
for d in sorted(os.listdir(folder)):
    if os.path.isdir(folder+d):
        for f in sorted(os.listdir(folder+d)):
            if f.endswith('.dcd'):
                trajectory_files.append(folder+d+'/'+f)
print(trajectory_files)

['/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/01/01_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/02/02_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/03/03_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/04/04_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/05/05_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/06/06_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/07/07_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/08/08_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/09/09_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/10/10_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/11/11_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/12/12_trajectory.dcd', '/home/martin/Projects/FoxP/AA/FoldingSimulation_Tf/kx/13/13_tr

### Calculate Native contacts

In [3]:
# Define an SBMOpenMM All-Atom SBM without parameters, forces, and system attributes.
structure_file = '../input/FoxP_monomer.pdb'
contact_file = '../input/FoxP_monomer.contacts'
AA_sbmModel = sbmOpenMM.models.getAllAtomModel(structure_file, contact_file, default_parameters=False)

Generating AA SBM for structure file ../input/FoxP_monomer.pdb

Setting up geometrical parameters:
_________________________________
Removing hydrogens from topology
Added 747 atoms
Added 767 bonds
Added 1038 angles
Added 855 torsions
Added 166 impropers
Added 357 planars
Reading contacts from contact file: ../input/FoxP_monomer.contacts
Added 822 native contacts



In [4]:
def getCAcontacts(AA_sbmModel, residue_index=False):
    """
    Get per-residue native contacts. This function reads an AA SBM class
    and returns the per-residue native contacts based on the indexes of the
    alpha-carbon atoms in the system. If option residue_index is given then
    the function returns the residue indexes instead of the alpha-carbon atoms'
    indexes.
    """
    
    # Create a set with residue contacts only
    residue_contacts = set()
    for c in AA_sbmModel.contacts:
        residue_contacts.add((c[0].residue.index, c[1].residue.index))
        
    # Create a map from residue index to CA atom index
    # or residue index
    CA_atom = {}
    for a in AA_sbmModel.atoms:
        if a.name == 'CA':
            if residue_index:
                CA_atom[a.residue.index] = a.residue.index+1
            else:
                CA_atom[a.residue.index] = a.index+1
            
    # Define per-residue native contacts based on CA indexes.
    ca_contacts = []
    
    for c in sorted(residue_contacts):
        ca_contacts.append((CA_atom[c[0]], CA_atom[c[1]]))
        
    return np.array(ca_contacts)

In [5]:
# Get list of per-residue native contacts based on the SBM alpha-carbons indexes.
ca_native_contacts = getCAcontacts(AA_sbmModel)

### Featurization

In [None]:
# Define CA atom native distances as the feature
ca_native_contacts_feat = pyemma.coordinates.featurizer(structure_file)
ca_native_contacts_feat.add_distances(ca_native_contacts, periodic=False)
ca_native_contacts_data = pyemma.coordinates.load(trajectory_files, features=ca_native_contacts_feat)

HBox(children=(FloatProgress(value=0.0, description='Obtaining file info', layout=Layout(flex='2'), max=15.0, …

HBox(children=(FloatProgress(value=0.0, description='getting output of FeatureReader', layout=Layout(flex='2')…

### Select lag time and calculate TICAs

In [None]:
# Define a lag time for TICA calculation
lag_time = 33
tica = pyemma.coordinates.tica(ca_native_contacts_data, lag=lag_time)
tica_output = tica.get_output()

### Calculate clusters for MSM 

In [None]:
# Select the number of clusters to build the MSM
n_clusters = 1000
# Calculate cluster in TICA space using the k-means algorithm
cluster = pyemma.coordinates.cluster_kmeans(tica_output, k=n_clusters, max_iter=100, stride=10)

### Chapman - Kolmogorov (CK) test

In [None]:
# Calculate Bayesian MSM
msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=lag_time, dt_traj='20 ps')

In [None]:
# Define the number of states for the CK test
nstates = 2
# Run CK test
cktest = msm.cktest(nstates)

In [None]:
pyemma.plots.plot_cktest(cktest, dt=0.02, units='ns')
plt.savefig('output/ck_test.png')