# embarrassingly parallel

In [1]:
# require: mpi4py, python2.7
# mpi4py does not work with python3.4 yet (AFAIK: installed mpi4py from conda)

In [2]:
# mpi version
!time mpirun -n 8 python ./rmsd_mpi_example.py


real	0m1.510s
user	0m9.670s
sys	0m2.299s


In [3]:
# serial version
!time python ./rmsd_serial_example.py


real	0m10.528s
user	0m7.455s
sys	0m3.069s


In [4]:
# mdtraj openmp version

! export OMP_NUM_THREADS=8 && time python ./rmsd_openmp_mdtraj.py


real	0m18.311s
user	0m18.045s
sys	0m2.598s


In [5]:
# speed up = ~ 7 times (serial pytraj vs mpi pytraj)
# speed up = ~ 10 time vs mdtraj openmp version
# make sure to get the same rmsd values for both mpi 
import numpy as np
rmsd_serial = np.loadtxt("serial_rmsd.txt")
rmsd_mpi = np.loadtxt("mpi_rmsd.txt")
rmsd_openmp_mdtraj = np.loadtxt("rmsd_mdtraj_openmp.txt") * 10 # mdtraj use `nm` unit. need ot "*10"
print (np.any(rmsd_serial == rmsd_mpi))
np.testing.assert_almost_equal(rmsd_mpi, rmsd_openmp_mdtraj, decimal=2) # need to use this since mdtraj use f4 (pytraj: f8)
print (rmsd_serial[:-10])
print (rmsd_mpi[:-10])

True
[  0.           2.7885276    3.54482596 ...,  10.25132034  10.10238168
  10.12056193]
[  0.           2.7885276    3.54482596 ...,  10.25132034  10.10238168
  10.12056193]


# Code: mpi

# %load rmsd_mpi_example.py
``` python
"""calculat RMSD for 8 replica trajs using 8 cores.
Reference frame is the 1st frame of remd.x.000
(embarrassingly parallel)

System: 17443 atoms, 1000 frames, netcdf, 8 replicas (000 to 007), 200Mb/replica
mpirun -n 8 python rmsd_mpi_example.py
"""

import numpy as np
from pytraj import io
from pytraj import Frame
import pytraj.common_actions as pyca
from mpi4py import MPI

comm = MPI.COMM_WORLD
rank = comm.rank
size = comm.size

root_dir = "../../tests/data/nogit/remd/"
fname = root_dir + "/remd.x.00" + str(rank) # 000, 001, 002, 003 ...
top_name = root_dir + "myparm.top"

traj = io.load(fname, top_name)
n_atoms =  traj.top.n_atoms
n_frames = traj.n_frames

if rank == 0:
    ref = traj[0]
    ref_xyz = np.asarray(ref.xyz, dtype=np.float64)
else:
    ref = None
    ref_xyz = np.empty((n_atoms, 3), dtype=np.float64)

# broadcast ref_xyz to other cores from master
comm.Bcast([ref_xyz, MPI.DOUBLE])

if rank != 0:
    # need to reconstruct ref
    ref = Frame()
    ref.append_xyz(ref_xyz)

_ref = ref.copy()

def rmsd_mpi(traj, _ref):
    arr0 = pyca.calc_rmsd("@CA", traj, traj.top, _ref)
    return arr0

arr0 = rmsd_mpi(traj, _ref)

if rank == 0:
    data = np.empty(size * traj.n_frames, dtype=np.float64)
else:
    data = None

data = comm.gather(arr0, root=0)
if rank == 0:
    all_rmsd = np.asarray(data).flatten()
    np.savetxt("mpi_rmsd.txt", all_rmsd)

    # make sure to reproduce serial version
    # YES
    #sarr = np.empty((size, traj.n_frames))
    #REF = None
    #for i in range(size):
    #    fname = "remd.x.00" + str(i)
    #    straj = io.load(fname, traj.top)
    #    if i == 0:
    #        REF = straj[0]
    #    sarr[i] = straj.calc_rmsd("@CA", REF)
    #print(np.any(all_rmsd == sarr.flatten()))
```

# Code: serial

# %load rmsd_serial_example.py
```python
"""calculat RMSD for 8 replica trajs using 1 cores.
Reference frame is the 1st frame of remd.x.000

System: 17443 atoms, 1000 frames, netcdf, 8 replicas (000 to 007), 200Mb/replica

python rmsd_serial_example.py
"""

import numpy as np
from pytraj import io
import pytraj.common_actions as pyca

root_dir = "../../tests/data/nogit/remd/"
top_name = root_dir + "myparm.top"

# 8 replicas, 1000 frames
size = 8
sarr = np.empty((size, 1000))
REF = None
for i in range(size):
    fname = root_dir + "/remd.x.00" + str(i) # 000, 001, 002, 003 ...
    straj = io.load(fname, root_dir + "/myparm.parm7")
    if i == 0:
        REF = straj[0]
    sarr[i] = straj.calc_rmsd("@CA", REF)
np.savetxt("./serial_rmsd.txt", sarr.flatten())
```

# code mdtraj openmd

# %load rmsd_openmp_mdtraj.py

```python
"""calculat RMSD for 8 replica trajs using openmp with 8 cores
Reference frame is the 1st frame of remd.x.000

System: 17443 atoms, 1000 frames, netcdf, 8 replicas (000 to 007), 200Mb/replica

python rmsd_openmp_mdtraj.py
"""

import numpy as np
import mdtraj as md

size = 8
sarr = np.empty((size, 1000))
REF = None

root_dir = "../../tests/data/nogit/remd/"

for i in range(size):
    fname = root_dir + "/remd.x.00" + str(i)
    straj = md.load_netcdf(fname, root_dir + "/myparm.parm7")
    indices = straj.top.select("name CA")
    if i == 0:
        REF = straj[0]
    sarr[i] = md.rmsd(straj, REF, 0, indices)
np.savetxt("rmsd_mdtraj_openmp.txt", sarr.flatten())
```