Comparing algorithms:
* Cross-validation 
* Random states
* Grid search
    * Cannot use sklearn because algs need all data to perform clustering
* Score functions
* Synthetic data (shift, scale, etc.)

In [1]:
import numpy as np
from matplotlib import pyplot as plt

from datetime import datetime

from sklearn.datasets import make_checkerboard
from sklearn.datasets import samples_generator as sg

from sklearn.metrics import consensus_score

import cluster

In [2]:
class timer:
    
    def __init__(self):
        self.datetime = datetime
    
    def __enter__(self):
        self.tic = self.datetime.now()
        
    def __exit__(self, *args, **kwargs):
        print('runtime: {}'.format(self.datetime.now() - self.tic))

In [3]:
def synth_experiment(model):
    """Times biclustering algorithms applied to synthetic data."""
    
    n_clusters = (4, 3)

    target, rows, columns = make_checkerboard(
        shape=(5000, 500), n_clusters=n_clusters, noise=10,
        shuffle=False, random_state=0)

    data, row_idx, col_idx = sg._shuffle(target, random_state=0)
    
    with timer():
        biclusters = model.fit_transform(data)
        score = consensus_score(
            biclusters, (rows[:, row_idx], columns[:, col_idx])
        )
        
    return score

In [22]:
def exper_experiment(model):
    """Times biclustering algorithms applied to experimental data."""
    
    pass

## Setup

In [None]:
datasets = [
    (noisy_circles, {'damping': .77, 'preference': -240,
                     'quantile': .2, 'n_clusters': 2}),
    (noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),
    (varied, {'eps': .18, 'n_neighbors': 2}),
    (aniso, {'eps': .15, 'n_neighbors': 2}),
    (blobs, {}),
    (no_structure, {})]

## Cheng Church

In [23]:
#score = synth_experiment(cluster.ChengChurch())
#print('CC score: ', score)

## Plaid

In [24]:
score = synth_experiment(cluster.Plaid())
print('Plaid score: ', score)

runtime: 0:01:11.318843
Plaid score:  0.166666666667


## Spectral bi

In [25]:
score = synth_experiment(cluster.Spectral(model='bi'))
print('Spectral bi score: ', score)

runtime: 0:00:01.902937
Spectral bi score:  0.625400641026


## Spectral co

In [26]:
score = synth_experiment(cluster.Spectral(model='co'))
print('Spectral bi score: ', score)

runtime: 0:00:00.217611
Spectral bi score:  0.208564998676


## xMotifs

In [27]:
score = synth_experiment(cluster.XMotifs())
print('xMotifs score: ', score)

runtime: 0:00:26.817454
xMotifs score:  0.0


## CPB

In [4]:
score = synth_experiment(cluster.CPB())
print('CPB score: ', score)

runtime: 0:00:05.910645
CPB score:  0.0316666666667
