In [1]:
import MDAnalysis as mda
from MDAnalysis.analysis import rms

prefix = "/home/marinegor/modelling/destab/2020-08-06_MD"
traj, top = f"{prefix}/md_0_1.xtc", f"{prefix}/md_0_1.gro"

u = mda.Universe(top, traj)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from MDAnalysis.analysis.align import AverageStructure
avg = AverageStructure(mobile=u).run(backend='multiprocessing', n_workers=16)

ValueError: backend='multiprocessing' is not in self.available_backends=['local'] for class AverageStructure

In [None]:
AverageStructure.available_backends

['local']

In [3]:
%%time

from MDAnalysis.analysis.align import AverageStructure
avg = AverageStructure(mobile=u).run(step=100)

ref = avg.results.universe

CPU times: user 4.77 s, sys: 15.5 ms, total: 4.79 s
Wall time: 4.79 s


In [4]:
groupselections = ("protein", "backbone", "name CA")

R = rms.RMSD(
    u,  # universe to align
    ref,  # reference universe or atomgroup
    groupselections=groupselections,
    # select="backbone",  # group to superimpose and calculate RMSD
)


In [9]:
%%time

R.run()

KeyboardInterrupt: 

In [10]:
R.run(verbose=True)

  8%|â–Š         | 7713/100001 [00:21<04:16, 360.32it/s]


KeyboardInterrupt: 

In [12]:
R.available_backends

('local', 'dask', 'multiprocessing', 'dask.distributed')

In [13]:
%%time

R_multiprocessing = R.run(
    backend='multiprocessing', n_workers=4
)

self.n_workers=4, len(computations)=4
CPU times: user 101 ms, sys: 55.9 ms, total: 156 ms
Wall time: 1min 14s


In [14]:
%%time

R_multiprocessing = R.run(
    backend='multiprocessing', n_workers=16
)

self.n_workers=16, len(computations)=16
CPU times: user 630 ms, sys: 258 ms, total: 888 ms
Wall time: 54.4 s


In [5]:
%%time

R_dask = R.run(
    backend='dask', n_workers=16
)

self.n_workers=16, len(computations)=16
CPU times: user 493 ms, sys: 314 ms, total: 807 ms
Wall time: 1min 48s


In [16]:
%%time

R_dask = R.run(
    backend='dask.distributed', n_workers=16
)

ValueError: Backend or client is not set properly: self.backend='dask.distributed', self.client=None

In [5]:
from dask.distributed import Client, LocalCluster

cluster = LocalCluster(n_workers=16, 
                       threads_per_worker=1,
                       memory_limit='30Gb')
client = Client(cluster)

In [10]:
from dask.distributed import Client

client = Client()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 33445 instead


In [6]:
cluster.dashboard_link

'http://127.0.0.1:8787/status'

In [7]:
%%time

R_distributed = R.run(client=client)

self.n_workers=16, len(computations)=16
CPU times: user 24.4 s, sys: 3.5 s, total: 27.9 s
Wall time: 1min


In [11]:
%%time

R_distributed = R.run(client=client, n_parts=96)

self.n_workers=16, len(computations)=96
CPU times: user 25.8 s, sys: 5.56 s, total: 31.4 s
Wall time: 1min 21s


In [None]:
import numpy as np


def dropna(arr: np.ndarray) -> np.ndarray:
    return arr[~np.isnan(arr)]


runs = {
    "local": R,
    "multiprocessing": R_multiprocessing,
    "dask": R_dask,
    "dask.distributed": R_distributed,
}
ref_run = R

true_rmsd = dropna(R.results.rmsd)
for key, run in runs.items():
    rmsd = dropna(run.results.rmsd)
    assert np.all(true_rmsd == rmsd)
