In [1]:
# Generic libraries
import numpy as np
import pandas as pd
import scipy as sp
import tqdm
import seaborn as sns
from itertools import product
import inspect
import multiprocessing
import time
import os
import glob
import ipympl

%matplotlib widget
import matplotlib.pyplot as plt
plt.style.use('seaborn-deep')

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.ticker as mtick
from collections import namedtuple
from tabulate import tabulate
from IPython.display import Latex
from IPython.display import HTML
from IPython.core.pylabtools import figsize
from matplotlib import rc


In [2]:
# MDAnalysis
import MDAnalysis as mda
from MDAnalysis.analysis import encore
from MDAnalysis.analysis.encore.clustering import ClusteringMethod as clm
from MDAnalysis.analysis.encore.dimensionality_reduction import DimensionalityReductionMethod as drm

In [3]:
## Import common data (e.g. pdb and psf files)
from pathlib import Path
simdir = Path('../data/00-external/')
PDB = simdir / '5y36.pdb'
GRO = simdir / '5Y36_box.gro'

# set paths for output
plotpath = "../plots/"
datapath = "../data/analysis/"
output_name = "prod_global_"


In [4]:
## import full trajectory data 
trajdir = '../data/04-prod/'
XTC  = trajdir + 'cas9_prod_pbc_all.xtc'

## universe creation
u = mda.Universe(str(GRO), str(XTC))
#u_new = u.copy() # used as a reference in memory for mapping calculations
#ref0 = mda.Universe(str(GRO))
print(u.select_atoms("protein or name MG").n_atoms, 'protein atoms')

# other info
box = u.dimensions
print('box dimensions:', box)
nframes = u.trajectory.n_frames
print(nframes, 'frames')
dt = u.trajectory.dt # ns per frame, equal to nstxout*0.002
time = np.arange(0, dt*nframes, dt)
t_ns = time / 1000
print(t_ns[-1], 'ns')


22526 protein atoms
box dimensions: [183.38486  183.38486  183.38478   60.000008  60.000008  90.      ]
5901 frames
295.0 ns


In [5]:
# we need this because XTCReader objects don't support the timeseries attribute
from MDAnalysis.analysis.base import AnalysisFromFunction
from MDAnalysis.coordinates.memory import MemoryReader

coordinates = AnalysisFromFunction(lambda ag: ag.positions.copy(),
                                   u.atoms).run().results
coordinates
#u2 = mda.Universe(PDB, coordinates, format=MemoryReader)

#u_all = u.select_atoms("all")
#u_r = mda.Merge(u_all).load_new(
#         AnalysisFromFunction(lambda ag: ag.positions.copy(), u_all).run().results,
#    format=MemoryReader, in_memory=True)


{'timeseries': array([[[154.6     ,  99.01    ,  85.11001 ],
        [155.36002 ,  99.450005,  85.600006],
        [153.77    ,  99.590004,  85.04001 ],
        ...,
        [140.39    , 115.55    ,  60.340004],
        [135.15001 , 147.51001 ,  54.25    ],
        [145.04001 , 116.90001 ,  59.750004]],

       [[154.47    ,  95.72    ,  83.32001 ],
        [153.51001 ,  95.840004,  83.590004],
        [154.72002 ,  94.75    ,  83.36    ],
        ...,
        [140.22    , 115.45    ,  59.360004],
        [133.68001 , 147.15001 ,  52.960007],
        [144.81001 , 117.79    ,  58.320004]],

       [[155.09001 ,  93.670006,  80.92    ],
        [155.66    ,  93.87    ,  81.740005],
        [154.16    ,  94.00001 ,  81.13    ],
        ...,
        [140.28001 , 115.11001 ,  58.450005],
        [134.19    , 146.12001 ,  52.240005],
        [145.13    , 117.22    ,  58.07    ]],

       ...,

       [[100.990005,  13.14    , 108.48    ],
        [101.21001 ,  12.750001, 107.58    ],
       

In [6]:
km1 = clm.KMeans(12,  # no. clusters
                 init = 'k-means++',  # default
                 algorithm="auto")    # default

km2 = clm.KMeans(6,  # no. clusters
                 init = 'k-means++',  # default
                 algorithm="auto")    # default

km3 = clm.KMeans(3,  # no. clusters
                 init = 'k-means++',  # default
                 algorithm="auto")    # default

In [None]:
ces_conv2 = encore.ces_convergence(u2,  # universe
                                  10,  # window size
                                  select='name CA',
                                  clustering_method=[km1, km2, km3]
                                 )
ces_conv2.shape


In [None]:
labels = ['12 clusters', '6 clusters', '3 clusters']

ces_fig2, ces_ax2 = plt.subplots()
for data, label in zip(ces_conv2.T, labels):
    plt.plot(data, label=label)
ces_ax2.set_xlabel('Window')
ces_ax2.set_ylabel('Jensen-Shannon divergence')
plt.legend()

In [16]:
dir(u.trajectory)

['OtherWriter',
 'Writer',
 '_Timestep',
 '__class__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply_limits',
 '_apply_transformations',
 '_auxs',
 '_check_for_aux',
 '_file',
 '_frame',
 '_frame_to_ts',
 '_load_offsets',
 '_read_frame',
 '_read_frame_with_aux',
 '_read_next_timestep',
 '_read_offsets',
 '_reopen',
 '_sliced_iter',
 '_sub',
 '_transformations',
 '_ts_kwargs',
 '_writer',
 '_xdr',
 'add_auxiliary',
 'add_transformations',
 'aux_list',
 'check_slice_indices',
 'close',
 'convert_forces_from_native',
 'convert_forces_to_native',
 'convert_pos_from_nat