## find-landmarks.ipynb

In [None]:
"""Cluster based on RMSD between conformations

msmbuilder autogenerated template version 2
created 2017-03-03T10:36:10.027417
please cite msmbuilder in any publications


"""
import mdtraj as md

from msmbuilder.cluster import MiniBatchKMedoids
from msmbuilder.io import load_meta, itertrajs, save_generic, backup

## Set up parameters

In [None]:
kmed = MiniBatchKMedoids(
    n_clusters=500,
    metric='rmsd',
)

## Load

In [None]:
meta = load_meta()

## Try to limit RAM usage

In [None]:
def guestimate_stride():
    total_data = meta['nframes'].sum()
    want = kmed.n_clusters * 10
    stride = max(1, total_data // want)
    print("Since we have", total_data, "frames, we're going to stride by",
          stride, "during fitting, because this is probably adequate for",
          kmed.n_clusters, "clusters")
    return stride

## Fit

In [None]:
kmed.fit([traj for _, traj in itertrajs(meta, stride=guestimate_stride())])
print(kmed.summarize())

## Save

In [None]:
save_generic(kmed, 'clusterer.pickl')

## Save centroids

In [None]:
def frame(traj_i, frame_i):
    # Note: kmedoids does 0-based, contiguous integers so we use .iloc
    row = meta.iloc[traj_i]
    return md.load_frame(row['traj_fn'], frame_i, top=row['top_fn'])


centroids = md.join((frame(ti, fi) for ti, fi in kmed.cluster_ids_),
                    check_topology=False)
centroids_fn = 'centroids.xtc'
backup(centroids_fn)
centroids.save("centroids.xtc")