In [66]:
import re
from glob import glob
import mdtraj as md
import numpy as np
from os.path import join

# Introduction

This workbook collates information and cleans the trajectories of Mal-L. Only the highly flexible region, residues 374 - 459 are dealt with here. 




## Align and save

In [1]:
!ln -s /Volumes/REA/MD/MAL_L/reactant-region-374-459/ reactant
!ln -s /Volumes/REA/MD/MAL_L/ts-region-374-459/ transition_state

In [70]:
!mkdir reactant/aligned
!mkdir transition_state/aligned

mkdir: reactant/aligned: File exists


In [71]:
def clean_name(fn):
    nums = [int(x) for x in re.findall("[0-9]+", fn)]
    
    return 'run_{0:02d}_{1:03d}-{2:03d}ns.xtc'.format(*nums)

Get the names of trajectories.  These are split to remove the anomolous parts.  This was done by MC. 

In [72]:
r_trajs_fn = glob('reactant/trajectories/*.xtc')
ts_trajs_fn = glob('reactant/trajectories/*.xtc')

Load the data

In [73]:
r_top = md.load('reactant/top-374-459.pdb')
ts_top = md.load('transition_state/top.pdb')

In [74]:
r_trajs = [md.load(x, top=r_top) for x in r_trajs_fn]
ts_trajs = [md.load(x, top=ts_top) for x in ts_trajs_fn]


Align the trajectories to the crystal structure

In [76]:
ix = r_top.top.select('name CA')
ix2 = ts_top.top.select('name CA')
print(np.allclose(ix, ix2))

True


In [77]:
r_trajs_aligned = [traj.superpose(r_top, atom_indices=ix) for traj in r_trajs]
ts_trajs_aligned = [traj.superpose(r_top, atom_indices=ix) for traj in ts_trajs]

In [78]:
_ = [r_trajs_aligned[i].save_xtc(join('reactant/aligned', clean_name(r_trajs_fn[i]))) for i in range(len(r_trajs_aligned))]
_ = [ts_trajs_aligned[i].save_xtc(join('transition_state/aligned', clean_name(ts_trajs_fn[i]))) for i in range(len(ts_trajs_aligned))]

## Simulation time

There's no timestep information in the trajectories (?) however, from the number of frames and the file name we can see that the timestep is 0.1ns

In [97]:
print(T_r[0])
print(r_trajs_fn[0])

4500
reactant/trajectories/run-1-60ns-510ns.xtc


The distribution of timesteps are: 

In [88]:
T_r = np.array([x.n_frames for x in r_trajs_aligned])
T_ts = np.array([x.n_frames for x in ts_trajs_aligned])

In [100]:
np.unique(T_r, return_counts=True)

(array([ 500, 1000, 2100, 2400, 2900, 4500]), array([2, 2, 5, 5, 2, 3]))

In [101]:
np.unique(T_ts, return_counts=True)

(array([ 500, 1000, 2100, 2400, 2900, 4500]), array([2, 2, 5, 5, 2, 3]))

Total simulation time is is 4.48 microsseconds for each simulation. 

In [104]:
np.sum(T_r)*0.1

4480.0

In [105]:
np.sum(T_ts)*0.1

4480.0