In [None]:
# Show plots in Jupyter notebooks
%matplotlib inline

# Reload modules whenever they change
# (for development purposes)
%load_ext autoreload
%autoreload 2

# Make clusterking package available even without installation
import sys
sys.path = ["../../"] + sys.path

In [None]:
import clusterking as ck
from clusterking.stability.subsamplestability import SubSampleStabilityTester
from clusterking.stability.ccfom import *
from clusterking.stability.ccpreprocessor import *
from clusterking.cluster import HierarchyCluster
import numpy as np
import pandas as pd
import tqdm

In [None]:
d = ck.Data("output/tutorial_basics.sql")

## SubSampleStabilityTester

In [None]:
ssst = SubSampleStabilityTester()
ssst.set_fraction(0.999)
ssst.set_repeat(1000)
ssst.add_fom(
    MatchingClusters(
        preprocessor=TrivialClusterMatcher(),
        name="MatchingClusters"
    )
)
ssst.add_fom(DeltaNClusters(name="DeltaNClusters"))

In [None]:
c = HierarchyCluster()
c.set_max_d(0.2)

In [None]:
r = ssst.run(d, c)

In [None]:
r.df.hist(["MatchingClusters", "DeltaNClusters"], density=True, layout=(1, 2), figsize=(15, 5));

In [None]:
r.df.describe()

In [None]:
r.df["fom"].mean()

## SubSampleStabilityVsFraction

In [None]:
from clusterking.stability.subsamplestability import SubSampleStabilityVsFraction

In [None]:
ssst = SubSampleStabilityTester()
ssst.set_repeat(repeat=100)
ssst.add_fom(
    MatchingClusters(
        preprocessor=TrivialClusterMatcher(),
        name="MatchingClusters"
    )
)
ssst.add_fom(DeltaNClusters(name="DeltaNClusters"))

In [None]:
sssvf = SubSampleStabilityVsFraction()

In [None]:
r = sssvf.run(d, c, ssst, list(np.linspace(0.7, 0.999, 5)))

In [None]:
r.df.plot.scatter("fraction", "MatchingClusters")

## Noisy sampling

In [None]:
s = ck.scan.Scanner()

In [None]:
s.set_spoints_equidist({"a": (-1, 1, 10), "b": (-1, 1, 10)})

In [None]:
original_spoints = s.spoints

In [None]:
s.add_spoints_noise("gauss", mean=1., sigma=0.)

In [None]:
s.spoints