In [None]:
# Show plots in Jupyter notebooks
%matplotlib inline

# Reload modules whenever they change
# (for development purposes)
%load_ext autoreload
%autoreload 2

# Make clusterking package available even without installation
import sys
sys.path = ["../../"] + sys.path

In [None]:
import clusterking as ck
from clusterking.stability.subsamplestability import SubSampleStabilityTester
from clusterking.stability.fom import *
from clusterking.stability.ccpreprocessor import *
from clusterking.cluster import HierarchyCluster
import numpy as np
import pandas as pd
import tqdm

In [None]:
d = ck.Data("output/tutorial_basics.sql")

## SubSampleStabilityTester

In [None]:
ssst = SubSampleStabilityTester()
ssst.set_fraction(0.999)
ssst.set_repeat(100)
ssst.add_fom(
    MatchingClusters(
        preprocessor=TrivialClusterMatcher(),
        name="MatchingClusters"
    )
)
ssst.add_fom(DeltaNClusters(name="DeltaNClusters"))

In [None]:
c = HierarchyCluster()
c.set_max_d(0.2)

In [None]:
r = ssst.run(d, c)

In [None]:
r.df.hist(["MatchingClusters", "DeltaNClusters"], density=True, layout=(1, 2), figsize=(15, 5));

In [None]:
r.df.describe()

## SubSampleStabilityVsFraction

In [None]:
from clusterking.stability.subsamplestability import SubSampleStabilityVsFraction

In [None]:
ssst = SubSampleStabilityTester()
ssst.set_repeat(repeat=100)
ssst.add_fom(
    MatchingClusters(
        preprocessor=TrivialClusterMatcher(),
        name="MatchingClusters"
    )
)
ssst.add_fom(DeltaNClusters(name="DeltaNClusters"))

In [None]:
sssvf = SubSampleStabilityVsFraction()

In [None]:
r = sssvf.run(d, c, ssst, list(np.linspace(0.7, 0.999, 5)))

In [None]:
r.df.plot.scatter("fraction", "MatchingClusters")

## Noisy sampling

In [None]:
from clusterking.stability.noisysamplestability import NoisySampleStabilityTester

In [None]:
import flavio
import numpy as np

s = ck.scan.WilsonScanner(scale=5, eft="WET", basis="flavio")

def dBrdq2(w, q):
    return flavio.np_prediction("dBR/dq2(B+->Dtaunu)", w, q)

s.set_dfunction(
    dBrdq2,
    binning=np.linspace(3.2, 11.6, 10),
    normalize=True,
    variable="q2"  # only sets name of variable
)

s.set_spoints_equidist(
    {
        "CVL_bctaunutau": (-1, 1, 3),
        "CSL_bctaunutau": (-1, 1, 3),
        "CT_bctaunutau": (-1, 1, 3)
    }
)

In [None]:
d = ck.Data()

In [None]:
c = HierarchyCluster()
c.set_max_d(0.2)

In [None]:
nsst = NoisySampleStabilityTester()
nsst.set_repeat(10)
nsst.set_noise("gauss", mean=0., sigma=0.02)
nsst.add_fom(
    MatchingClusters(
        preprocessor=TrivialClusterMatcher(),
        name="MatchingClusters"
    )
)
nsst.add_fom(DeltaNClusters(name="DeltaNClusters"))

In [None]:
r = nsst.run(d, s, c)

In [None]:
r.df

In [None]:
MatchingClusters(
    preprocessor=TrivialClusterMatcher()
).run(r._cached_data[0].df["cluster"], r._cached_data[9].df["cluster"]).fom

In [None]:
r._cached_data[1].plot_clusters_scatter(["CSL_bctaunutau", "CT_bctaunutau", "CVL_bctaunutau"]);

In [None]:
r._cached_data[0].plot_clusters_scatter(["CSL_bctaunutau", "CT_bctaunutau", "CVL_bctaunutau"]);

In [None]:
r._cached_data[0].df