In [1]:
import pyemma as pe
import pandas as pd
import numpy as np
import mdtraj as md
import matplotlib.pyplot as plt
%matplotlib inline
from pathlib import Path, PurePath
import itertools as it
import seaborn as sns

In [10]:
data_dir = '/Users/robertarbon/OneDrive - University of Bristol/Research/AADH/data/MD/'
traj_names = ['as_{0}_{1}.xtc'.format(s, i+1) for s in ['d', 'h'] for i in range(100)]
ext_traj_names = ['ext_'+x for x in traj_names]
traj_paths = [data_dir + 'trajectories/' + x for x in traj_names]
ext_traj_paths = [data_dir + 'trajectories/' + x for x in ext_traj_names]
npy_names = [x.replace('xtc', 'npy') for x in traj_names]
ext_npy_names = [x.replace('xtc', 'npy') for x in traj_names]
xtal_path = data_dir + '2agy_as_h.pdb' # both D and H are approximately the same. 


In [3]:
xtal = md.load(xtal_path)

In [4]:
trajs = [md.load(x, top=xtal_path) for x in traj_paths]

In [5]:
heavy_ix = list(xtal.top.select('mass > 2'))


## RMSD

In [25]:
feature = 'rmsd'

out_path = data_dir+feature
out = Path(out_path)
out.mkdir(parents=True, exist_ok=True)
for i, traj in enumerate(trajs):
    out_i = out.joinpath(npy_names[i])
    rmsd = md.rmsd(traj, xtal, atom_indices=heavy_ix)
    np.save(file=out_i, arr=rmsd)       
    

## Heavy contacts

In [41]:
feature = 'contact_heavy'
res_pairs = np.array(list(it.combinations(range(xtal.n_residues), 2)))

out_path = data_dir+feature
out = Path(out_path)
out.mkdir(parents=True, exist_ok=True)
for i, traj in enumerate(trajs):
    out_i = out.joinpath(npy_names[i])
    out_f, _ = md.compute_contacts(traj, contacts=res_pairs, scheme='closest-heavy')
    np.save(file=out_i, arr=out_f)  


## Alpha contacts

In [42]:
feature = 'contact_alpha'
res_pairs = np.array(list(it.combinations(range(xtal.n_residues), 2)))

out_path = data_dir+feature
out = Path(out_path)
out.mkdir(parents=True, exist_ok=True)
for i, traj in enumerate(trajs):
    out_i = out.joinpath(npy_names[i])
    out_f, _ = md.compute_contacts(traj, contacts=res_pairs, scheme='ca')
    np.save(file=out_i, arr=out_f) 

## Interatomic distances

In [111]:

feature = 'interatomic_dist'
atom_pairs = np.array(list(it.combinations(heavy_ix, 2)))

out_path = data_dir+feature
out = Path(out_path)
out.mkdir(parents=True, exist_ok=True)
for i, traj in enumerate(trajs):
    out_i = out.joinpath(npy_names[i])
    out_f = md.compute_distances(traj, atom_pairs)
    np.save(file=out_i, arr=out_f) 

## Dihedrals

We'll need the extended trajectories for this as Phi/Psi need neighbouring residues. There is a mistake in the pdb - 
C81 has an extra proton (the disulphide bridge patch didn't work here!)

In [37]:
ext_top_path = data_dir + 'trajectories/ext_as_d_top.pdb'
ext_trajs  = [md.load(x, top=ext_top_path) for x in ext_traj_paths[:100]]
ext_top_path = data_dir + 'trajectories/ext_as_h_top.pdb'
ext_trajs  = ext_trajs+[md.load(x, top=ext_top_path) for x in ext_traj_paths[100:]]

The chi dihedrals aren't defined for TTW. So I'll base my definitions on Trp. Open structure with PyMol to check this. 

In [35]:
ttw_chi1 = ['N', 'CA', 'CX1', 'CX2']
ttw_chi2 = ['CA', 'CX1', 'CX2', 'CD1']
# 'CZ2', 'CH2', 'NT', 'CI2', 'C1', 'CW1', 'CW2' 
ttw_chi3 = ['CZ2', 'CH2', 'NT', 'CI2']
ttw_chi4 = ['CH2', 'NT', 'CI2', 'C1']
ttw_chi5 = ['NT', 'CI2', 'C1', 'CW1']
ttw_chi6 = ['CI2', 'C1', 'CW1', 'CW2']
ttw_chi = np.array([ttw_chi1, ttw_chi2, ttw_chi3, ttw_chi4, ttw_chi5, ttw_chi6 ])
d_ttw_chi = np.array([ext_trajs[0].top.select('resSeq 399 and (name {0} or name {1} or \
                                        name  {2} or name {3})'.format(*x)) for x in ttw_chi])
h_ttw_chi = np.array([ext_trajs[-1].top.select('resSeq 872 and (name {0} or name {1} or \
                                        name  {2} or name {3})'.format(*x)) for x in ttw_chi])

In [102]:
feature = 'dihedrals'
accumulate = []
out_path = data_dir+feature
out = Path(out_path)
out.mkdir(parents=True, exist_ok=True)


for i, traj in enumerate(ext_trajs):
    
    _, phi = md.compute_phi(traj)
    _, psi = md.compute_psi(traj)
    _, chi1 = md.compute_chi1(traj)
    _, chi2 = md.compute_chi2(traj)
    if i < 100: 
        ttw_chi = md.compute_dihedrals(traj, d_ttw_chi)
    else:
        ttw_chi = md.compute_dihedrals(traj, h_ttw_chi)
    dihed = np.concatenate([phi, psi, chi1, chi2, ttw_chi], axis=1)
    accumulate.append(dihed)
    # Save raw angles
    out_i = out.joinpath(ext_npy_names[i])
    np.save(file=out_i, arr=dihed) 
    
    dihed_sc = np.concatenate([np.sin(dihed), np.cos(dihed)], axis=1)
    out_i_sc = out.joinpath(ext_npy_names[i].replace('.npy', '_sincos.npy'))
    np.save(file=out_i_sc, arr=dihed_sc)

  indices = np.vstack(x for x in indices if x.size)[id_sort]
