## Distributed parallel RMSD calculation with pytraj 

#### Load python modules

In [6]:
import pytraj as pt
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import pickle

#### Change directory to the location of the trajectory file.

In [None]:
%cd ~/scratch/workshop/pdb/6N4O/simulation/sim_pmemd/4-production

#### Create job submission file "rmsd.py"

In [None]:
%%file rmsd.py

import pytraj as pt
import pickle
from mpi4py import MPI

# initialize MPI 
comm = MPI.COMM_WORLD

# get the rank of the process
rank = comm.rank

# load the trajectory file
traj=pt.iterload('mdcrd_nowat.nc', top='prmtop_nowat.parm7') 
ref_coor = pt.load('inpcrd_nowat.pdb')

# call pmap_mpi function for MPI.
# we dont need to specify the nuber of CPUs, 
# because we will use srun to run the script
data = pt.pmap_mpi(pt.rmsd, traj, mask='@C,N,O', ref=ref_coor)

# pmap_mpi sends data to rank 0
# rank 0 saves data 
if rank == 0:
    with open("rmsd.dat", "wb") as fp: 
         pickle.dump(data, fp)


#### Run MPI job on the cluster

In [9]:
!srun python rmsd.py

#### Load data

In [10]:
with open("rmsd.dat", "rb") as fp: 
    data=pickle.load(fp)
rmsd=data.get('RMSD_00001')

#### Define the time axis for the RMSD plot

In [None]:
time=np.linspace(0,1.999,2000)

#### Set seaborn plot theme parameters

In [12]:
sns.set_theme()
sns.set_style("darkgrid")

#### Plot RMSD

In [None]:
plt.plot(time,rmsd)
plt.xlabel("Time, ns")
plt.ylabel("RMSD, $ \AA $")