In [1]:
import mdtraj as md
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import numpy as np
import pandas as pd

This notebook creates active site trajectories based on two different criteria. 
1. Core-Active-Site: This definition is the six residues from the ... paper. 
2. Extended-Active-Site: This includes residues either side of the core - this is for calculating the phi/psi angle.

The extended active site is calculated from PyMol because it is difficult to select by residue with `mdtraj`. Both defintions have waters included by default. The trajectories will be subset by the index so I have to work out these by calculating differences from the res number in the two topologies

In [2]:
# Get data
traj_top = md.load('../data/MD/2agy_final_min-stripped_1frame.pdb')
traj_df, _ = traj_top.top.to_dataframe()
xtal = md.load('../data/MD/2agy_c36_state0.pdb')
xtal = xtal.atom_slice(xtal.top.select('not water'))
xtal_df, _ = xtal.top.to_dataframe()


Calculate the differences between the two sequence numbers

In [3]:
traj_df.loc[traj_df.resName=='TTW', 'resSeq'].unique()

array([399, 872])

In [4]:
xtal_df.loc[xtal_df.resName=='TTW', 'resSeq'].unique()

array([109])

## Core Active Site

In [5]:
as_res_num_diff = np.array([82, 109, 84, 172, 128, 160]) - 109
as_res_num_diff

array([-27,   0, -25,  63,  19,  51])

Get indices in trajectory topology

In [6]:
as_d_ix = list(traj_df.loc[traj_df.resSeq.isin(as_res_num_diff+399), :].index)
as_h_ix = list(traj_df.loc[traj_df.resSeq.isin(as_res_num_diff+872), :].index)

Calculate a pdb as a topology file

In [9]:
traj_top.atom_slice(as_d_ix).save('../data/MD/trajectories/as_d_top.pdb')
traj_top.atom_slice(as_h_ix).save('../data/MD/trajectories/as_h_top.pdb')

In [13]:
# for i in range(100):
#     path = '/Volumes/JGI/AAHD/round_1/{}ns/100ns-production-stripped.xtc'.format(i+1)
#     traj = md.load(path, top='../data/MD/2agy_final_min-stripped_1frame.pdb')
#     traj.atom_slice(as_d_ix).save('../data/MD/trajectories/as_d_{}.xtc'.format(i+1))
#     traj.atom_slice(as_h_ix).save('../data/MD/trajectories/as_h_{}.xtc'.format(i+1))
#     print(i, end=', ')

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 

## Extended Active Site

In [10]:
as_res_num_diff = np.array([81, 82, 83, 108, 109,110, 84,85, 171, 172, 173, 127, 128, 129, 159, 160, 161]) - 109
as_res_num_diff

array([-28, -27, -26,  -1,   0,   1, -25, -24,  62,  63,  64,  18,  19,
        20,  50,  51,  52])

Get indices in trajectory topology

In [12]:
as_d_ix = list(traj_df.loc[traj_df.resSeq.isin(as_res_num_diff+399), :].index)
as_h_ix = list(traj_df.loc[traj_df.resSeq.isin(as_res_num_diff+872), :].index)

Calculate a pdb as a topology file

In [13]:
traj_top.atom_slice(as_d_ix).save('../data/MD/trajectories/ext_as_d_top.pdb')
traj_top.atom_slice(as_h_ix).save('../data/MD/trajectories/ext_as_h_top.pdb')

In [14]:
for i in range(100):
    path = '/Volumes/JGI/AAHD/round_1/{}ns/100ns-production-stripped.xtc'.format(i+1)
    traj = md.load(path, top='../data/MD/2agy_final_min-stripped_1frame.pdb')
    traj.atom_slice(as_d_ix).save('../data/MD/trajectories/ext_as_d_{}.xtc'.format(i+1))
    traj.atom_slice(as_h_ix).save('../data/MD/trajectories/ext_as_h_{}.xtc'.format(i+1))
    print(i, end=', ')

END
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 

In [16]:
len(as_h_ix)

255

In [24]:
traj_df.loc[traj_df.resSeq.isin([as_res_num_diff[0]+399]), :]

Unnamed: 0,serial,name,element,resSeq,resName,chainID,segmentID
5734,5735,N,N,371,CYS,0,
5735,5736,H,H,371,CYS,0,
5736,5737,CA,C,371,CYS,0,
5737,5738,HA,H,371,CYS,0,
5738,5739,CB,C,371,CYS,0,
5739,5740,HB3,H,371,CYS,0,
5740,5741,HB2,H,371,CYS,0,
5741,5742,SG,S,371,CYS,0,
5742,5743,C,C,371,CYS,0,
5743,5744,O,O,371,CYS,0,


In [22]:
traj_df.loc[traj_df.resSeq.isin([as_res_num_diff[0]+872]), :]

Unnamed: 0,serial,name,element,resSeq,resName,chainID,segmentID
12943,12944,N,N,844,CYS,0,
12944,12945,H,H,844,CYS,0,
12945,12946,CA,C,844,CYS,0,
12946,12947,HA,H,844,CYS,0,
12947,12948,CB,C,844,CYS,0,
12948,12949,HB3,H,844,CYS,0,
12949,12950,HB2,H,844,CYS,0,
12950,12951,SG,S,844,CYS,0,
12951,12952,HG,H,844,CYS,0,
12952,12953,C,C,844,CYS,0,
