In [1]:
import mdtraj as md
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

# Basics

## Load trajectory

In [2]:
# To load files that don’t contain topology information, we need to supply something with the top keyword argument
traj = md.load('/DURF_datasets/triad_molecule/triad_dataset.nc', top= '/DURF_datasets/triad_molecule/triad_forcefield_ground.prmtop')
print(traj)

<mdtraj.Trajectory with 100000 frames, 207 atoms, 1 residues, and unitcells>


## Slice the frames

In [3]:
# Pretty much like slicing a list
print(traj[:10])
print(traj[:10:2])

<mdtraj.Trajectory with 10 frames, 207 atoms, 1 residues, and unitcells>
<mdtraj.Trajectory with 5 frames, 207 atoms, 1 residues, and unitcells>


## Save traj back to disk

In [4]:
# The format will be parsed based on the extension
# Choose several frames for the tutorial, try not to touch the full dataset
traj[::10000].save('/DURF_datasets/triad_molecule/for_tutorial.xyz')

In [2]:
# Load the subset for tutorial
data_dir = '/DURF_datasets/triad_molecule'
traj_file = f'{data_dir}/for_tutorial.h5'
traj = md.load(traj_file)

## # of atoms, residues, and frames

In [6]:
print(traj.n_atoms)
print(traj.n_residues)
print(traj.n_frames)

207
1
10


## Cartesian coordinates

In [7]:
# The Cartesian coordinates are stored in an ndarray xyz
print (traj.xyz.shape)
# Display the coordinate of the 8th atom at the 5th frame
print(tuple(traj.xyz[4,7,:]))

(10, 207, 3)
(5.6700797, 3.2196846, 4.0595813)


# Topology Object

## Topology
The `Topology` of a `Trajectory` contains all the connectivity information of your system and specific chain, residue, and atom information

In [8]:
top = traj.topology
top

<mdtraj.Topology with 1 chains, 1 residues, 207 atoms, 247 bonds at 0x254a158dc40>

## Select an atom / loop through atoms

In [9]:
# Select the 4th atom
print(top.atom(3)) # Index starts from 0
# Loop through the first 20 atoms
print([atom for atom in top.atoms if atom.index < 20])

TRI0-C4
[TRI0-C1, TRI0-C2, TRI0-C3, TRI0-C4, TRI0-C5, TRI0-C6, TRI0-C7, TRI0-C8, TRI0-C9, TRI0-C10, TRI0-C11, TRI0-C12, TRI0-C13, TRI0-C14, TRI0-C15, TRI0-C16, TRI0-C17, TRI0-C18, TRI0-C19, TRI0-C20]


## Properties of atoms
- index
- name
- element
    - mass
    - name
    - symbol
    - number (atomic)
    - radius
- n_bonds

In [10]:
# Want to see the # of bonds for the 5th atom
atom = top.atom(4)
atom.n_bonds

3

# Analysis

## RMSD Analysis
Basic syntax: `md.rmsd(target_traj, reference_conformation, frame_num, atom_indices)`

3rd and 4th arguments are optional: if the reference is a trajectory, then you need to give the frame num. Atom_indices are given when you want to select atoms for the calculation

### Pairwise RMSD
Compute the RMSD value for each two pairs of conformation

In [11]:
# Create a numpy array to store all the values
# One loop will produce the same quantity of values as frame number
# Loop for (n_frames time) to cover all the conbinations
distances = np.empty((traj.n_frames, traj.n_frames))
for i in range(traj.n_frames):
    distances[i] = md.rmsd(traj, traj, i)
pd.DataFrame(distances)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,0.366514,0.530243,0.685211,0.637866,0.47429,0.621992,0.575354,0.438588,0.491408
1,0.366516,0.0,0.505941,0.719797,0.472186,0.459605,0.551277,0.476775,0.446234,0.509999
2,0.530243,0.505941,0.0,0.580467,0.527394,0.439379,0.614798,0.524209,0.430268,0.466283
3,0.685212,0.719797,0.580466,0.0,0.872281,0.768772,1.01884,0.913994,0.656724,0.881711
4,0.637864,0.472183,0.527395,0.872281,0.0,0.372797,0.469098,0.350479,0.419442,0.523004
5,0.47429,0.459604,0.439379,0.768772,0.372797,0.0,0.45491,0.503081,0.256254,0.494223
6,0.621992,0.55127,0.614797,1.01884,0.469098,0.45491,0.0,0.431566,0.594019,0.384573
7,0.575354,0.476771,0.52421,0.913994,0.350475,0.503084,0.431566,0.0,0.545555,0.308
8,0.438588,0.446234,0.430268,0.656724,0.419443,0.256256,0.594019,0.545555,0.0,0.560115
9,0.491408,0.509999,0.466284,0.881711,0.523004,0.494223,0.384573,0.308,0.560116,0.0


The rmsd results are the same when you exchange the target and reference. It is obvious that the results are symmetric along the diagonal, so almost half of the values are repetitive. Maybe a better version:

In [12]:
distances = np.empty((traj.n_frames, traj.n_frames))
for i in range(traj.n_frames):
    distances[i] = np.concatenate((np.zeros((i+1,), dtype = float),md.rmsd(traj[i+1:], traj, i)))
for i in range(traj.n_frames):    
    for j in range(i+1, traj.n_frames):
        distances[j,i] = distances[i,j]
pd.DataFrame(distances)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,0.366515,0.530243,0.685212,0.637864,0.47429,0.621992,0.575353,0.438589,0.491407
1,0.366515,0.0,0.505941,0.719797,0.472186,0.459604,0.551277,0.476775,0.446234,0.509999
2,0.530243,0.505941,0.0,0.580467,0.527394,0.439379,0.614798,0.524209,0.430268,0.466283
3,0.685212,0.719797,0.580467,0.0,0.872281,0.768772,1.01884,0.913994,0.656724,0.881711
4,0.637864,0.472186,0.527394,0.872281,0.0,0.372797,0.4691,0.350479,0.419442,0.523004
5,0.47429,0.459604,0.439379,0.768772,0.372797,0.0,0.45491,0.503081,0.256254,0.494223
6,0.621992,0.551277,0.614798,1.01884,0.4691,0.45491,0.0,0.431566,0.594019,0.384573
7,0.575353,0.476775,0.524209,0.913994,0.350479,0.503081,0.431566,0.0,0.545555,0.308
8,0.438589,0.446234,0.430268,0.656724,0.419442,0.256254,0.594019,0.545555,0.0,0.560115
9,0.491407,0.509999,0.466283,0.881711,0.523004,0.494223,0.384573,0.308,0.560115,0.0


## Distances between atom pairs
Syntax: `md.compute_distances(traj, atom_pairs)`

The atoms pairs must be a 2D array

You can also choose a frame or slice the trajectory by traj\[index\] or traj\[:\]

In [13]:
# Compute the distance from the first atom to the last atom in each frame
md.compute_distances(traj, [[0,206]]) # Instead of [0, 206]

array([[3.5096152],
       [4.2567015],
       [3.9282374],
       [3.0221667],
       [4.611256 ],
       [3.9118419],
       [4.876852 ],
       [4.7752786],
       [3.7427042],
       [4.284359 ]], dtype=float32)

## Center of mass
Syntax: `md.compute_center_of_mass(traj)`

In [14]:
# Compute the center of mass for the 1st frame
md.compute_center_of_mass(traj[0])

array([[ 0.06516466, -0.27059172,  0.24396148]])

## Bond angles
Syntax: `md.compute_angles(traj, atom_pairs)`

Since three points makes an angle, the shape of the `atom_pairs` array should be (any, 3)

The angle is different with different combinations \[(1,2,3) and (1,3,2)\], sequence matters

The output values are **in radians**

In [15]:
# Compute the bond angle for the first three atoms with different combinations
md.compute_angles(traj, [[0,1,2], [0,2,1]])

array([[0.85182655, 1.9585959 ],
       [0.8671579 , 1.9493879 ],
       [0.8898469 , 1.9029696 ],
       [0.8415426 , 1.9722351 ],
       [0.8533788 , 1.9675242 ],
       [0.8862655 , 1.9257556 ],
       [0.89262086, 1.9146466 ],
       [0.88158923, 1.929238  ],
       [0.9173416 , 1.8782334 ],
       [0.9590764 , 1.8350362 ]], dtype=float32)

## Dihedral angles
Syntax: `md.compute_dihedrals(traj, atom_pairs)`

The angle is between the planes spanned by the **first three atoms and the last three atoms**, a torsion around the bond **between the middle two atoms**. Thus, the shape of the `atom_pairs` array should be (any, 4)

The angle is different with different combinations \[(1,2,3,4) and (1,3,2,4)\], sequence matters

The output values are **in radians**

In [16]:
# Compute the dihedral angle for the first four atoms with different combinations
md.compute_dihedrals(traj, [[0,1,2,3], [2,0,1,3]])

array([[0.38780627, 0.24913691],
       [0.32706693, 0.22300279],
       [0.3452081 , 0.24714601],
       [0.32840657, 0.20750956],
       [0.36724725, 0.23700854],
       [0.35468683, 0.25511026],
       [0.35093868, 0.2326066 ],
       [0.36644608, 0.26423943],
       [0.32642913, 0.23825598],
       [0.34445974, 0.25956428]], dtype=float32)

In [17]:
traj[1].xyz

array([[[ 1.57919323e+00, -3.22780997e-01,  8.24259222e-01],
        [ 1.18679440e+00, -4.87803817e-02,  3.73079240e-01],
        [ 1.13837922e+00, -1.33300215e-01,  5.77075899e-01],
        [ 1.24749863e+00, -1.91269785e-01,  7.83605993e-01],
        [ 1.38234818e+00,  3.64261270e-02,  2.58797586e-01],
        [ 1.27946579e+00, -5.58561683e-02,  2.69635618e-01],
        [ 1.30045664e+00,  1.41506284e-01,  4.62674081e-01],
        [ 1.24078763e+00,  3.47409844e-02,  7.12875783e-01],
        [ 1.63070834e+00, -1.08296797e-01,  8.60405862e-01],
        [ 1.39326727e+00,  1.40234083e-01,  3.55101049e-01],
        [ 1.46625531e+00, -2.55641848e-01,  8.72481763e-01],
        [ 1.71751797e+00, -8.84408206e-02,  3.04578722e-01],
        [ 1.49838364e+00, -3.12604308e-02,  2.13548139e-01],
        [ 1.82328093e+00, -1.46913916e-01,  5.15432298e-01],
        [ 1.76151431e+00,  7.66950985e-03,  6.93304956e-01],
        [ 1.80565035e+00, -1.17143556e-01,  6.46088064e-01],
        [ 1.63274968e+00

In [18]:
traj.superpose(traj)

<mdtraj.Trajectory with 10 frames, 207 atoms, 1 residues, and unitcells at 0x254a1543430>

In [19]:
traj[1].xyz

array([[[ 6.17751479e-01, -1.37892151e+00,  9.97196198e-01],
        [ 1.54745251e-01, -1.01454413e+00,  7.04842329e-01],
        [ 3.75182420e-01, -1.03510416e+00,  6.59158647e-01],
        [ 5.59357822e-01, -1.18423581e+00,  7.01127172e-01],
        [-1.50748668e-02, -1.14389050e+00,  8.18826258e-01],
        [ 6.11658506e-02, -1.02874207e+00,  8.06724310e-01],
        [ 1.02873057e-01, -1.23412752e+00,  6.25799656e-01],
        [ 3.75037670e-01, -1.25759792e+00,  5.71617603e-01],
        [ 5.18022299e-01, -1.53989017e+00,  8.78423095e-01],
        [ 2.63869483e-03, -1.24932885e+00,  7.25345433e-01],
        [ 6.35540783e-01, -1.35340178e+00,  8.60739827e-01],
        [ 4.32020538e-02, -1.34684193e+00,  1.11112916e+00],
        [-3.06010284e-02, -1.17280626e+00,  9.56724763e-01],
        [ 2.31664747e-01, -1.49535859e+00,  1.14971352e+00],
        [ 2.97524512e-01, -1.61229086e+00,  9.46393251e-01],
        [ 3.23371768e-01, -1.55982316e+00,  1.07420087e+00],
        [-2.49219220e-03