In [1]:
 import optunity
import optunity.metrics
import sklearn.svm
import numpy as np

In [3]:
from sklearn.pipeline import Pipeline
from msmbuilder.feature_selection import FeatureSelector, VarianceThreshold
from msmbuilder.preprocessing import RobustScaler
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
from msmbuilder.msm import MarkovStateModel
from msmbuilder.io import save_generic
from features import tica_features



In [5]:
import mdtraj as md
trajs = [md.load('data/trajectory-{}.xtc'.format(i+1), top='data/fs-peptide.pdb') for i in range(28)]



In [48]:
to_ns = 0.5
msm_lag = int(40/to_ns)

pipe = Pipeline([('features', FeatureSelector(features=tica_features, which_feat=['alpha_angle'])),
                 ('variance_cut', VarianceThreshold()),
                 ('scaling', RobustScaler()),
                 ('tica', tICA(kinetic_mapping=True, n_components=2, lag_time=20)),
                 ('cluster', MiniBatchKMeans(random_state=42)),
                 ('msm', MarkovStateModel(lag_time=msm_lag, verbose=False, n_timescales=2))])

In [49]:
features = dict(tica_features)
pipe_novar = Pipeline([('variance_cut', VarianceThreshold()),
                 ('scaling', RobustScaler()),
                 ('tica', tICA(kinetic_mapping=True, n_components=2, lag_time=20)),
                 ('cluster', MiniBatchKMeans(random_state=42)),
                 ('msm', MarkovStateModel(lag_time=msm_lag, verbose=False, n_timescales=2))])

In [83]:
def msm(x_train, y_train, x_test, y_test, n_clusters,which_feat='alpha_angle' ):
    # could easily make this 
    feat = features[which_feat]
    f_train = feat.fit_transform(x_train)
    f_test = feat.fit_transform(x_test)
    
    n_clusters=int(n_clusters)
    pipe_novar.set_params(cluster__n_clusters = n_clusters)
    
    pipe_novar.fit(f_train)
    score = pipe_novar.score(f_test)
    return score
    


In [51]:
msm(trajs[:14], y_train=None, x_test=trajs[14:], y_test=None, n_clusters=100.5, which_feat='alpha_angle')

2.1103557595255729

In [82]:
def msm2(x_train, y_train, x_test, y_test, n_clusters,which_feat='alpha_angle' ):
    # could easily make this    
    n_clusters=int(n_clusters)
    pipe.set_params(cluster__n_clusters = n_clusters)
    
    pipe.fit(x_train)
    score = pipe.score(x_test)
    return score

In [68]:
s1 = msm2(trajs[:14], y_train=None, x_test=trajs[14:], y_test=None, n_clusters=100.5, which_feat='alpha_angle')
s2 = msm2(trajs[14:], y_train=None, x_test=trajs[:14], y_test=None, n_clusters=100.5, which_feat='alpha_angle')
print(np.mean([s1,s2]))

2.39579820765


## Optimize cluster numbers

In [84]:
@optunity.cross_validated(x=trajs, num_folds=2, aggregator=optunity.cross_validation.mean) #_and_list)
def msm(x_train, x_test, n_clusters,which_feat='alpha_angle'):
    # could easily make this 
    feat = features[which_feat]
    f_train = feat.fit_transform(x_train)
    f_test = feat.fit_transform(x_test)
    
    n_clusters=int(n_clusters)
    pipe_novar.set_params(cluster__n_clusters = n_clusters)
    
    pipe_novar.fit(f_train)
    score = pipe_novar.score(f_test)
    return score


In [85]:
msm(which_feat='alpha_angle', n_clusters=100.5)

2.446667247780862

In [86]:
opt_ncluster, info, _ = optunity.maximize(msm, num_evals=10, n_clusters=[100, 200])


In [None]:
opt_ncluster, info, _ = optunity.maximize(msm, num_evals=10, n_clusters=[100, 200], solver='bayesopt')
