#### Note

In order to use 4 cores, you need to install `ipython` and run below command in seperate terminal. 

```sh
ipcluster start -n 4 --engines=MPIEngineSetLauncher
```

In [1]:
# start ipython client
from IPython.parallel import Client
clients = Client()
# make sure that we DO use 4 cores
clients.ids

[0, 1, 2, 3]

In [2]:
# enter parallel mode. From now, we execute commands in all 4 nodes
%autopx

%autopx enabled


In [3]:
# import mpi4py to get rank (I dont know how to get rank from ipython yet).
from mpi4py import MPI
rank = MPI.COMM_WORLD.rank
print (rank)

[stdout:0] 1
[stdout:1] 2
[stdout:2] 0
[stdout:3] 3


In [4]:
# load `TrajectoryIterator` to each node
# we are loading 4 replica trajs, 200 M each.

import pytraj as pt
traj = pt.iterload(["./data/nogit/remd/remd.x.000",
                    "./data/nogit/remd/remd.x.001",
                    "./data/nogit/remd/remd.x.002",
                    "./data/nogit/remd/remd.x.004"], "./data/nogit/remd/myparm.parm7")
print (traj)

[stdout:0] 
<pytraj.TrajectoryIterator with 4000 frames: <Topology with 17443 atoms, 5666 residues, 5634 mols, 17452 bonds, PBC with box type = truncoct>>
           
[stdout:1] 
<pytraj.TrajectoryIterator with 4000 frames: <Topology with 17443 atoms, 5666 residues, 5634 mols, 17452 bonds, PBC with box type = truncoct>>
           
[stdout:2] 
<pytraj.TrajectoryIterator with 4000 frames: <Topology with 17443 atoms, 5666 residues, 5634 mols, 17452 bonds, PBC with box type = truncoct>>
           
[stdout:3] 
<pytraj.TrajectoryIterator with 4000 frames: <Topology with 17443 atoms, 5666 residues, 5634 mols, 17452 bonds, PBC with box type = truncoct>>
           


In [5]:
# since we want to use 4 cores to speed up our calculation. We need to split `traj` into 4 chunks
# saying we want to do `autoimage`, then `rmsfit` to first frame, then calculate phi values for residue 4 and 5, then
# we just need to create 4 independent iterators in each node with different (start, stop, stride)
# NOTE: to make this notebook look nice, I used stride=200 to get less data.
print (traj.split_iterators(n_chunks=4, stride=200, rank=rank, autoimage=True, rmsfit=(traj[0], '@CA')))

[stdout:0] 
<pytraj.core.frameiter.FrameIter with start=1000, stop=2000, stride=200 
autoimage=True, rmsfit=(<Frame with 17443 atoms>, '@CA')> 

[stdout:1] 
<pytraj.core.frameiter.FrameIter with start=2000, stop=3000, stride=200 
autoimage=True, rmsfit=(<Frame with 17443 atoms>, '@CA')> 

[stdout:2] 
<pytraj.core.frameiter.FrameIter with start=0, stop=1000, stride=200 
autoimage=True, rmsfit=(<Frame with 17443 atoms>, '@CA')> 

[stdout:3] 
<pytraj.core.frameiter.FrameIter with start=3000, stop=4000, stride=200 
autoimage=True, rmsfit=(<Frame with 17443 atoms>, '@CA')> 



In [6]:
# now try to calc_phi for residue 4 and 5 (index starts from 1 if using string)
# we use dtype='dict' to keep track the data
pt.calc_phi(traj.split_iterators(n_chunks=4, stride=200, rank=rank, autoimage=True, rmsfit=(traj[0], '@CA')), 
           'resrange 4-5',
           dtype='dict')

[0;31mOut[0:4]: [0m
OrderedDict([(u'phi:4', array([-47.35086751, -66.74990664, -57.28938819, -55.2935864 , -65.96906479])), (u'phi:5', array([  87.14510798,  125.22372104,  -67.75719088,  -76.00990448,
         63.81106866]))])

[0;31mOut[1:4]: [0mOrderedDict([(u'phi:4', array([-48.5240012 , -67.52593951, -48.66738046, -64.49397896, -47.37284612])), (u'phi:5', array([-66.23860371, -72.52066004,  68.06294711, -71.22785313, -85.01771935]))])

[0;31mOut[2:4]: [0m
OrderedDict([(u'phi:4', array([-76.54410603, -82.41555541, -72.73910819, -32.18948005, -60.7393818 ])), (u'phi:5', array([  68.45212642, -115.87636793,   88.48026246,  -65.84427223,
        -75.06110822]))])

[0;31mOut[3:4]: [0m
OrderedDict([(u'phi:4', array([-68.97014011, -57.75613538, -56.87883718, -68.34210047, -66.88863395])), (u'phi:5', array([ 165.9800418 , -178.27815588,  179.87481015,  -65.04365968,
        120.21027024]))])

In [7]:
# scattering data is not really easy to manipulate, so just `gather` them. In this case, we create a variable, like `data`
# to hold the results
# (we just re-do the caculation)
data = pt.calc_phi(traj.split_iterators(n_chunks=4, stride=200, rank=rank, autoimage=True, rmsfit=(traj[0], '@CA')), 
                   'resrange 4-5', 
                   dtype='dict')
print (data) # for each node

[stdout:0] 
OrderedDict([(u'phi:4', array([-47.35086751, -66.74990664, -57.28938819, -55.2935864 , -65.96906479])), (u'phi:5', array([  87.14510798,  125.22372104,  -67.75719088,  -76.00990448,
         63.81106866]))])
[stdout:1] OrderedDict([(u'phi:4', array([-48.5240012 , -67.52593951, -48.66738046, -64.49397896, -47.37284612])), (u'phi:5', array([-66.23860371, -72.52066004,  68.06294711, -71.22785313, -85.01771935]))])
[stdout:2] 
OrderedDict([(u'phi:4', array([-76.54410603, -82.41555541, -72.73910819, -32.18948005, -60.7393818 ])), (u'phi:5', array([  68.45212642, -115.87636793,   88.48026246,  -65.84427223,
        -75.06110822]))])
[stdout:3] 
OrderedDict([(u'phi:4', array([-68.97014011, -57.75613538, -56.87883718, -68.34210047, -66.88863395])), (u'phi:5', array([ 165.9800418 , -178.27815588,  179.87481015,  -65.04365968,
        120.21027024]))])


In [8]:
# we need to exit from `parallel` mode to go to local node so we can gather data
%autopx

%autopx disabled


In [9]:
from pytraj.parallel import gather
all_data = gather('data', clients, restype='dict')
print (all_data)

{u'phi:4': array([-47.35086751, -66.74990664, -57.28938819, -55.2935864 ,
       -65.96906479, -48.5240012 , -67.52593951, -48.66738046,
       -64.49397896, -47.37284612, -76.54410603, -82.41555541,
       -72.73910819, -32.18948005, -60.7393818 , -68.97014011,
       -57.75613538, -56.87883718, -68.34210047, -66.88863395]), u'phi:5': array([  87.14510798,  125.22372104,  -67.75719088,  -76.00990448,
         63.81106866,  -66.23860371,  -72.52066004,   68.06294711,
        -71.22785313,  -85.01771935,   68.45212642, -115.87636793,
         88.48026246,  -65.84427223,  -75.06110822,  165.9800418 ,
       -178.27815588,  179.87481015,  -65.04365968,  120.21027024])}


In [11]:
# if we like pandas and want to convert to pandas.DataFrame, just
import pandas as pd
df = pd.DataFrame(all_data)
print (df)
print (type(df))

        phi:4       phi:5
0  -47.350868   87.145108
1  -66.749907  125.223721
2  -57.289388  -67.757191
3  -55.293586  -76.009904
4  -65.969065   63.811069
5  -48.524001  -66.238604
6  -67.525940  -72.520660
7  -48.667380   68.062947
8  -64.493979  -71.227853
9  -47.372846  -85.017719
10 -76.544106   68.452126
11 -82.415555 -115.876368
12 -72.739108   88.480262
13 -32.189480  -65.844272
14 -60.739382  -75.061108
15 -68.970140  165.980042
16 -57.756135 -178.278156
17 -56.878837  179.874810
18 -68.342100  -65.043660
19 -66.888634  120.210270
<class 'pandas.core.frame.DataFrame'>


### See also

[Using IPython for parallel computing](http://ipython.org/ipython-doc/2/parallel/magics.html)