`pytraj` is a Python wrapper of `cpptraj`. `pytraj` was written with the hope that it will introduce flexibilty in customizing data analysis workflow

Outline
    * Trajectory object
    * Frame object
    * Perform Action with frame/traj
    * Topology object
    * AtomMask object
         + AtomMask follows AMBER syntax for picking atoms.
        (check Amber14 manual http://ambermd.org/doc12/Amber14.pdf, page 327-330
         + indexing difference when using integer mask and character mask. frame[0] and frame[top(':1')]
    * Full example


# Trajectory objects

### * Two types of Trajectry objects

There are two types of trajectory objects in pytraj, one is immutatble (TrajectoryIterator) and another is mutable (Trajectory).
TrajectoryIterator is a thin wrapper of cpptraj Trajectory class that provides efficiency in loading large data file.
When method `load` is called by TrajReadOnly object, not all data is loaded to memory.

In constrast, Trajectory is mutable trajectory object. It was designed to hold frame objects in memory.

In [1]:
import numpy as np
from pytraj import io as mdio
from pytraj import Trajectory, DataSetList, DataFileList

traj = mdio.load(filename="../tests/data/md1_prod.Tc5b.x", top="../tests/data/Tc5b.top")
farray = Trajectory(filename="../tests/data/md1_prod.Tc5b.x", top="../tests/data/Tc5b.top")
# those files can be found in
# $PYTRAJHOME/tests/data

In [2]:
print (traj)

<Trajectory with 10 frames, 304 atoms/frame>
           


In [3]:
print (farray)

<Trajectory with 10 frames, 304 atoms/frame>
           


In [4]:
print ("traj object is immutable")

# print x-coordinate of 1st atom in 1st frame
print (traj[0, 0, 0])

# try to change its x-coordinate: pytraj does not allow this, it will raise Error saying user should use Trajectory
#traj[0, 0, 0] = 100.

# print x-coordinate of 1st atom in 1st frame
#print (traj[0, 0, 0])

traj object is immutable
-16.492


In [5]:
print ("farray object is mutable")

# print x-coordinate of 1st atom in 1st frame
print (farray[0, 0, 0])

# try to change its x-coordinate
farray[0, 0, 0] = 100.

# print x-coordinate of 1st atom in 1st frame
print (farray[0, 0, 0])

farray object is mutable
-16.492
100.0


### * iterate trajectory

In [6]:
# pytraj provides fast and easy way to iterate over trajectories

# iterate from 0-th frame to 8-th frame and skip every 2 frames
for frame in traj(start=0, stop=8, stride=2):
    print (frame)

<Frame with 304 atoms>
<Frame with 304 atoms>
<Frame with 304 atoms>
<Frame with 304 atoms>
<Frame with 304 atoms>


In [7]:
# we can shorten by this
for frame in traj(0, 8, 2):
    print (frame)

<Frame with 304 atoms>
<Frame with 304 atoms>
<Frame with 304 atoms>
<Frame with 304 atoms>
<Frame with 304 atoms>


In [8]:
# we can combine iterating with stripping atoms too

# iterate and strip all but CA atoms
for frame in traj(0, 8, 2, '@CA'):
    print (frame)

<Frame with 20 atoms>
<Frame with 20 atoms>
<Frame with 20 atoms>
<Frame with 20 atoms>
<Frame with 20 atoms>


### * Extract coordinates/frame from Trajectory

In [9]:
# pytraj provides different convinient ways to extract atom coordinates/frames from trajectory object.

# numpy-like object
# x, y and z coordinates of 1st atom in the 1st frame
print (traj[0, 0, :])

[-16.492  12.434 -11.018]


In [10]:
#x, y, z coordinates of all CA atoms for 1st and 2nd frames
#print (traj[:2]['@CA'])

# if you don't want to get raw coordinates, you can extract `frame` objects by adding `:frame` keyword
# the below command with result a Trajectory object with 2 frames containing only CA atoms
f0 = traj[:2]['@CA']
print (f0)

<Trajectory with 2 frames, 20 atoms/frame>
           


# Frame object

Just like cpptraj, Frame object is work-horse of pytraj.

In [11]:
frame0 = traj[0]

frame_methods_and_attributes = [att for att in dir(frame0) if not att.startswith("__")]
print ("frame_methods_and_attributes")
print ()
print (frame_methods_and_attributes)

frame_methods_and_attributes

['VCenterOfMass', 'VGeometricCenter', 'add_by_mask', 'append_vec3', 'append_xyz', 'atoms', 'box', 'box_crd', 'boxview', 'buffer1d', 'buffer2d', 'calc_angle', 'calc_dihedral', 'calc_distance', 'calc_inertia', 'calc_temperature', 'center_on_origin', 'check_coords_invalid', 'clear_atoms', 'coords', 'copy', 'dist_rmsd', 'divide', 'fit_to', 'frame_iter', 'get_subframe', 'get_top', 'has_box', 'has_vel', 'i_address', 'info', 'is_empty', 'mass', 'n_atoms', 'n_repdims', 'neg_translate', 'py_free_mem', 'rmsd', 'rmsd_centered_ref', 'rmsd_nofit', 'rotate', 'rotate_with_matrix', 'same_coords_as', 'save', 'scale', 'set_axis_of_rotation', 'set_box_angles', 'set_coords', 'set_coords_by_map', 'set_frame', 'set_frame_from_mask', 'set_frame_m', 'set_frame_v', 'set_frame_x_m', 'set_from_crd', 'set_nobox', 'set_top', 'shape', 'size', 'strip_atoms', 'swap_atoms', 't_address', 'temperature', 'time', 'to_ndarray', 'tolist', 'trans_rot_trans', 'translate', 'update_atom', 'update_a

In [12]:
# frame0 behaves like 2D array with shape of (n_atoms, 3)
print ('shape: ', frame0.shape)
print ('atom coords: ', frame0[0])

# to avoid data copy when using with numpy, one ca assess Frame's buffer
arr0 = np.asarray(frame0[:])

# update arr0 will update frame coords
print ('before updating arr0: ', frame0[0])
arr0[0, 0] = 100.
print ('after updating arr0: ', frame0[0])

shape:  (304, 3)
atom coords:  [-16.492  12.434 -11.018]
before updating arr0:  [-16.492  12.434 -11.018]
after updating arr0:  [ 100.      12.434  -11.018]


In [13]:
# extracting Frame coords with given mask. 
# 1st way: extract coords of CA atoms

print (frame0[traj.top("@CA")])

[[-17.146  12.069  -9.756]
 [-15.774   9.648  -7.087]
 [-16.761   7.688  -3.829]
 [-15.228   5.335  -1.254]
 [-15.974   4.229   2.418]
 [-14.016   1.006   2.992]
 [-12.158  -1.711   1.129]
 [-10.459  -5.137   2.023]
 [ -9.524  -7.464  -0.965]
 [ -8.858 -11.163  -0.287]
 [ -7.71  -12.693   3.106]
 [ -5.13  -11.608   5.838]
 [ -1.73   -9.971   5.185]
 [  1.379  -9.977   7.51 ]
 [  4.659  -7.949   7.677]
 [  8.367  -8.606   6.79 ]
 [ 11.671  -7.68    8.742]
 [ 12.513  -3.997   8.439]
 [ 15.912  -2.833   6.877]
 [ 19.12   -3.114   8.925]]


In [14]:
# 2nd way
# need to set Topology for frame to use AtomMask
frame0.set_top(traj.top)
frame0["@CA"]

array([[-17.146,  12.069,  -9.756],
       [-15.774,   9.648,  -7.087],
       [-16.761,   7.688,  -3.829],
       [-15.228,   5.335,  -1.254],
       [-15.974,   4.229,   2.418],
       [-14.016,   1.006,   2.992],
       [-12.158,  -1.711,   1.129],
       [-10.459,  -5.137,   2.023],
       [ -9.524,  -7.464,  -0.965],
       [ -8.858, -11.163,  -0.287],
       [ -7.71 , -12.693,   3.106],
       [ -5.13 , -11.608,   5.838],
       [ -1.73 ,  -9.971,   5.185],
       [  1.379,  -9.977,   7.51 ],
       [  4.659,  -7.949,   7.677],
       [  8.367,  -8.606,   6.79 ],
       [ 11.671,  -7.68 ,   8.742],
       [ 12.513,  -3.997,   8.439],
       [ 15.912,  -2.833,   6.877],
       [ 19.12 ,  -3.114,   8.925]])

In [15]:
# using cpptraj's mask for accessing frame coords
# coords of 11-th residue (index starts from 0)
print ("coords of residue 10: ", frame0[":10"])

coords of residue 10:  [[ -9.386  -9.873  -0.657]
 [-10.344  -9.807  -0.969]
 [ -8.858 -11.163  -0.287]
 [ -9.514 -11.931  -0.696]
 [ -7.851 -11.243  -0.697]
 [ -8.742 -11.387   1.292]
 [ -9.38  -10.654   2.081]]


# Perform Action with frame/traj

In [16]:
# how's about doing analysis with `frame` or trajectory object in pytraj?
# import `action` dictionanry holding all supported action keywords
from pytraj import adict
keys = adict.keys()
print (keys)

print ()
print ("number of supported actions in pytraj = %s" % len(keys))

['angle', 'areapermol', 'atomiccorr', 'atomicfluct', 'atommap', 'autoimage', 'average', 'bounds', 'box', 'center', 'channel', 'checkchirality', 'checkstructure', 'closest', 'clusterdihedral', 'contacts', 'createcrd', 'density', 'diffusion', 'dihedral', 'dihedralscan', 'dipole', 'distance', 'distrmsd', 'dnaiontracker', 'dssp', 'energy', 'filterbydata', 'fixatomorder', 'gist', 'grid', 'gridfreeenergy', 'hbond', 'image', 'jcoupling', 'lessplit', 'lie', 'makestructure', 'mask', 'matrix', 'minimage', 'molsurf', 'multidihedral', 'multivector', 'nastruct', 'nativecontacts', 'nmrrst', 'orderparameter', 'outtraj', 'pairdist', 'pairwise', 'principal', 'projection', 'pucker', 'radgyr', 'radial', 'randomizeions', 'replicatecell', 'rmsd', 'rotate', 'runningavg', 'scale', 'secstruct', 'setvelocity', 'spam', 'stfc_diffusion', 'strip', 'surf', 'surf_LCPO', 'surf_lcpo', 'symmetricrmsd', 'temperature', 'translate', 'unwrap', 'vector', 'velocityautocorr', 'volmap', 'volume', 'watershell']

number of supp

In [17]:
# to get the data from `action`, you can create DataSetList object
dslist = DataSetList()

# create `action` you want to perform
# example: calculate molecular surface
act = adict['molsurf']

# perform action to calculate surf for only CA atom for farray
act("@CA", farray, dslist=dslist)

# get the data
d0 = dslist[0]
print (d0[:])

<MemoryView of 'array' object>


In [18]:
# pytraj also provides shorter way to perform common actions
# adding `calc_` to keywork `molsurf`
from pytraj.common_actions import calc_molsurf

# get the data
d1 = calc_molsurf(farray, "@CA")
print (d1[:])

# how's about skipping frames?
# we use frame iterator: farray(start, stop, stride)
# need to provide topology object because the iterator does not hold this.
# calcualte molsurf for every 2 frames, starting from 0-th frame and stop at 6-th frame
d2 = calc_molsurf(farray(0, 6, 2), "@CA", farray.top)
print (d2[:])

# pytraj can process a list of mixing trajectory and frame objects too
trajlist = [traj, farray(0, 6, 2), traj[-1]]
d3 = calc_molsurf(trajlist, '@CA', farray.top)
print (d3[:])

<MemoryView of 'array' object>
<MemoryView of 'array' object>
<MemoryView of 'array' object>


### * save traj with different format

In [19]:
# saving traj with different format?
# AMBER netcdf
traj.save("./output/test.nc", overwrite=True)

# CHARMM dcd format
traj.save("./output/test.dcd", overwrite=True)

# save a list of trajs
mdio.write_traj("./output/combo_trajs.x", [traj, traj, traj[:3], traj[:0]], traj.top, overwrite=True)

!ls ./output/test.nc
!ls ./output/test.dcd
!ls ./output/combo_trajs.x

### * Common Actions

In [20]:
from pytraj import common_actions

cdict = common_actions.__dict__
calc_list = [act for act in cdict if act.startswith("calc")]
print (calc_list)
print ([act for act in cdict if act.startswith("do_")])

['calc_dih', 'calc_atomicfluct', 'calc_molsurf', 'calc_volume', 'calc_angle', 'calc_protein_score', 'calc_energies', 'calc_vector', 'calc_center_of_mass', 'calc_radial', 'calc_jcoupling', 'calc_distrmsd', 'calc_dihedral', 'calc_rdf', 'calc_radgyr', 'calc_multidihedral', 'calc_matrix', 'calc_watershell', 'calc_COG', 'calc_pairwise_rmsd', 'calc_mindist', 'calc_COM', 'calc_volmap', 'calc_score', 'calc_center_of_geometry', 'calc_distance', 'calc_rmsd', 'calc_temperatures', 'calc_multivector', 'calc_dssp', 'calculate']
['do_autoimage', 'do_translation', 'do_clustering', 'do_rotation', 'do_scaling']


In [21]:
from pytraj.common_actions import do_rotation

frame0 = traj[0]
print (frame0[0])

# do_rotation
do_rotation(frame0, "x 20 y 60 z 80", traj.top)

# cooords of frame0 were updated
print (frame0[0])

[ 100.      12.434  -11.018]
[-32.31040201  89.26079595 -35.56049646]


# Topology object

In [22]:
from pytraj import Topology
top = Topology("../tests/data/Tc5b.top")

In [23]:
print (top)

<Topology with 1 mols, 20 residues, 304 atoms, 310 bonds, non-PBC>


In [24]:
print ("get first atom in Topology")
print (top[0])

print ()
print ("get all atoms with mask CA")
print (top['@CA'])

get first atom in Topology
<N-atom, resnum=0, n_bonds=4>

get all atoms with mask CA
[<CA-atom, resnum=0, n_bonds=4>, <CA-atom, resnum=1, n_bonds=4>, <CA-atom, resnum=2, n_bonds=4>, <CA-atom, resnum=3, n_bonds=4>, <CA-atom, resnum=4, n_bonds=4>, <CA-atom, resnum=5, n_bonds=4>, <CA-atom, resnum=6, n_bonds=4>, <CA-atom, resnum=7, n_bonds=4>, <CA-atom, resnum=8, n_bonds=4>, <CA-atom, resnum=9, n_bonds=4>, <CA-atom, resnum=10, n_bonds=4>, <CA-atom, resnum=11, n_bonds=4>, <CA-atom, resnum=12, n_bonds=4>, <CA-atom, resnum=13, n_bonds=4>, <CA-atom, resnum=14, n_bonds=4>, <CA-atom, resnum=15, n_bonds=4>, <CA-atom, resnum=16, n_bonds=4>, <CA-atom, resnum=17, n_bonds=4>, <CA-atom, resnum=18, n_bonds=4>, <CA-atom, resnum=19, n_bonds=4>]


In [25]:
print ("loop all CA to get information")
for atom in top['@CA']:
    print (atom.name, atom.resnum)

loop all CA to get information
CA   0
CA   1
CA   2
CA   3
CA   4
CA   5
CA   6
CA   7
CA   8
CA   9
CA   10
CA   11
CA   12
CA   13
CA   14
CA   15
CA   16
CA   17
CA   18
CA   19


In [26]:
# how's about creating new Topology from new atoms?

from pytraj.core import Atom
pseudotop = Topology()

# create atom with name "CA" and nametype "CX"
# (dummy example)

for name, ntype in zip(["CH", "CO", "CB"], ["CX", "CY", "CZ"]):
    atom = Atom(name, ntype)
    pseudotop.add_atom(atom, resid=0, resname="TEST")

In [27]:
# get info for pseudotop
print (pseudotop)
print (pseudotop.n_atoms)

for atom in pseudotop:
    print (atom)

<Topology with 0 mols, 1 residues, 3 atoms, 0 bonds, non-PBC>
3
<CH-atom, resnum=0, n_bonds=0>
<CO-atom, resnum=0, n_bonds=0>
<CB-atom, resnum=0, n_bonds=0>


# AtomMask

In [28]:
# AtomMask object is another important class of pytraj/cpptraj. It provide easy and convenient (to me) to select atom
# one can asses AtomMask object in different way
# traditionally,

from pytraj import AtomMask

# create AtomMask object with chosen '@CA' mask
atm = AtomMask('@CA')

In [29]:
print (dir(atm))

print (atm.mask_string)

['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '_indices_view', 'add_atom', 'add_atom_range', 'add_atoms', 'add_mask_at_position', 'add_selected_indices', 'atoms_in_char_mask', 'back', 'brief_mask_info', 'clear_selected', 'convert_to_char_mask', 'convert_to_int_mask', 'indices', 'invert_mask', 'is_char_mask', 'is_empty', 'mask_expression', 'mask_info', 'mask_string', 'mask_string_was_set', 'n_atoms', 'n_selected', 'num_atoms_in_common', 'print_mask_atoms', 'reset_mask', 'selected_indices', 'setup_char_mask', 'setup_int_mask']
b'@CA'


In [30]:
# or use shortcut

atm0 = traj.top("@CA")

In [31]:
print (atm0)

<pytraj.AtomMask.AtomMask object at 0x2aaac8efd0a8>


In [32]:
# access useful data

# number of selected atoms
print (atm0.n_atoms)

20


In [33]:
# get indices of selected atoms
print (atm0.selected_indices())

array('i', [4, 18, 37, 58, 77, 94, 118, 137, 159, 171, 178, 193, 199, 210, 221, 228, 260, 274, 288, 294])


In [34]:
# When using `string` as mask, the index number starts from 1 (to be compatible with AMBER), otherwise the index starts from 0.

# choose CA at residue 2 and 3 
atm0 = traj.top(":2-3@CA")
print (atm0.selected_indices())

indices = atm0.selected_indices()

array('i', [18, 37])


In [35]:
print (traj.top.trunc_res_atom_name(indices[0]))
print (traj.top.trunc_res_atom_name(indices[1]))

LEU_2@CA
TYR_3@CA


# Full example

In [36]:
# 1
"""This script shows how to extract frames having the same temperature
from replica exchange MD run. You can do it with cpptraj but this shows how easily
to write new script with pytraj
# TODO : check typos for DOCo
"""

import unittest
from array import array
from glob import glob
from pytraj.base import *
from pytraj import io as mdio
from pytraj.utils.check_and_assert import assert_almost_equal

def get_frames_same_T():
   # create a list of all remd trajs
   flist = glob("../tests/data/Test_RemdTraj/rem.nc.*") 

   # make a list of TrajReadOnly instances
   trajlist = []
   for fh in flist:
       topfile = "../tests/data/Test_RemdTraj/ala2.99sb.mbondi2.parm7"

       # load trajectory and append to trajlist
       trajlist.append(mdio.load(fh, topfile))

   # make Trajectory instance that holds 492.2 T frames
   # we need to reserve n_frames to hold the data
   f4922 = Trajectory(n_frames=trajlist[0].n_frames)

   assert f4922.n_frames == trajlist[0].n_frames
   f4922.top = trajlist[0].top.copy()

   # extract frames having T = 492.2
   # use iteration for nested loops
   for traj in trajlist:
       for idx, frame in enumerate(traj):
           if frame.temperature == 492.2:
               # we don't use `append` method since we want to make sure 
               # frames are in the order of simulation time
               f4922[idx] = frame

   # make sure f4922 only hold frames having T = 492.2 K
   arr0 = array('d', [492.2, 492.2, 492.2, 492.2, 492.2, 
                     492.2, 492.2, 492.2, 492.2, 492.2])
   assert f4922.temperatures == arr0

   # make sure we reproduce cpptraj output
   cpptraj = mdio.load("../tests/data/Test_RemdTraj/temp0.crd.492.20", topfile)
   for idx, framepy in enumerate(f4922):
       assert_almost_equal(framepy.coords, cpptraj[idx].coords)
       print("rmsd between pytraj Frame and cpptraj Frame = %s " % framepy.rmsd(cpptraj[idx]))
   
   print ()
   print(f4922[5].coords[:10])
   print(cpptraj[5].coords[:10])

   print("YES, we can reproduce cpptraj output")

In [37]:
get_frames_same_T()

rmsd between pytraj Frame and cpptraj Frame = 0.0004537857325777221 
rmsd between pytraj Frame and cpptraj Frame = 0.00040011085473567147 
rmsd between pytraj Frame and cpptraj Frame = 0.0004965490165713683 
rmsd between pytraj Frame and cpptraj Frame = 0.0004901392777290847 
rmsd between pytraj Frame and cpptraj Frame = 0.00048039576004936676 
rmsd between pytraj Frame and cpptraj Frame = 0.0004474136708874317 
rmsd between pytraj Frame and cpptraj Frame = 0.0004878846620305841 
rmsd between pytraj Frame and cpptraj Frame = 0.0005227524683447184 
rmsd between pytraj Frame and cpptraj Frame = 0.00046504114463576655 
rmsd between pytraj Frame and cpptraj Frame = 0.0004950090575904039 

array('d', [8.037311553955078, 5.207451820373535, 1.3513193130493164, 7.069351673126221, 5.673007011413574, 1.1658153533935547, 7.176811695098877, 6.072388648986816, 0.15732811391353607, 6.865853786468506])
array('d', [8.037, 5.207, 1.351, 7.069, 5.673, 1.166, 7.177, 6.072, 0.157, 6.866])
YES, we can repr

In [38]:
""" 2: calc_pairwise_rmsd
You can do it with cpptraj but this shows how easily
to write new script with pytraj"""

import numpy as np
from pytraj.base import *
from time import time

TRAJ = Trajectory()
TRAJ.top = Topology("../tests/data/Tc5b.top")
TRAJ.load("../tests/data/md1_prod.Tc5b.x")

def calc_pairwise_rmsd():
    farray = Trajectory()
    farray.top = TRAJ.top
    #
    for frame in TRAJ:
        frame.strip_atoms("!@CA", TRAJ.top.copy())
        farray.append(frame)
    
    size = farray.size
    arr = np.empty(shape=(size, size))
    
    for i, framei in enumerate(farray):
        for j, framej in enumerate(farray):
            arr[i, j] = framei.rmsd(framej)
    return arr

arr0 = calc_pairwise_rmsd()

In [39]:
print (arr0[:20])

[[  5.41057859e-07   9.75457220e+00   9.65628956e+00   1.11727631e+01
    1.09077360e+01   1.03929239e+01   1.11727298e+01   1.10058707e+01
    1.05837293e+01   1.14651283e+01]
 [  9.75457220e+00   0.00000000e+00   3.27419669e+00   4.04657903e+00
    5.57790805e+00   7.13030493e+00   6.37631614e+00   6.18558755e+00
    6.78389254e+00   5.24499039e+00]
 [  9.65628956e+00   3.27419669e+00   1.84678237e-07   3.84257042e+00
    4.69312503e+00   6.14501885e+00   5.26026237e+00   5.12229179e+00
    6.00964523e+00   4.73110002e+00]
 [  1.11727631e+01   4.04657903e+00   3.84257042e+00   1.06624030e-07
    3.65342118e+00   5.55976251e+00   5.26531127e+00   5.06034737e+00
    5.43988366e+00   3.43892531e+00]
 [  1.09077360e+01   5.57790805e+00   4.69312503e+00   3.65342118e+00
    0.00000000e+00   3.47111248e+00   3.01260132e+00   3.16157971e+00
    3.50381802e+00   3.16548984e+00]
 [  1.03929239e+01   7.13030493e+00   6.14501885e+00   5.55976251e+00
    3.47111248e+00   2.55675442e-07   3.02214