In [1]:
import caspailleur as csp
from paspailleur import pattern_structures as PS

In [14]:
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm

In [3]:
from sklearn import cluster, datasets, mixture
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler

In [4]:
# ============
# Generate datasets. We choose the size big enough to see the scalability
# of the algorithms, but not too big to avoid too long running times
# ============
n_samples = 500
seed = 30
noisy_circles = datasets.make_circles(
    n_samples=n_samples, factor=0.5, noise=0.05, random_state=seed
)

In [5]:
X, y = noisy_circles

X = X.round(6)

X.min(0), X.max(0)

(array([-1.069165, -1.154058]), array([1.06957 , 1.103701]))

In [6]:
borders_per_axis = [np.linspace(X[:,j].min(), X[:,j].max(), 11)
                      for j in range(X.shape[1])]
borders_per_axis

[array([-1.069165e+00, -8.552915e-01, -6.414180e-01, -4.275445e-01,
        -2.136710e-01,  2.025000e-04,  2.140760e-01,  4.279495e-01,
         6.418230e-01,  8.556965e-01,  1.069570e+00]),
 array([-1.154058 , -0.9282821, -0.7025062, -0.4767303, -0.2509544,
        -0.0251785,  0.2005974,  0.4263733,  0.6521492,  0.8779251,
         1.103701 ])]

In [7]:
basic_pss = [PS.IntervalPS(values=borders, ndigits=6) for borders in borders_per_axis]
ps_cart = PS.CartesianPS(basic_pss)
data = list(ps_cart.preprocess_data(X))

In [8]:
assert len(list(ps_cart.extent(data, ps_cart.intent(data)))) == len(data)

In [10]:
attr_extents = [ext for _, ext in ps_cart.iter_attributes(data, min_support=0.1)]
print(len(attr_extents))

34


In [13]:
%%time
stable_extents = csp.mine_equivalence_classes.list_stable_extents_via_gsofia(
    attr_extents, n_objects=len(data), min_delta_stability=0.01, min_supp=0.1, use_tqdm=True, n_attributes=len(attr_extents)
)
stable_extents = sorted(stable_extents, key=lambda ext: ext.count(), reverse=True)
print(len(stable_extents))

  0%|          | 0/34 [00:00<?, ?it/s]

1397
CPU times: user 79.3 ms, sys: 77.3 ms, total: 157 ms
Wall time: 216 ms


In [15]:
%%time
stable_intents = [ps_cart.intent(data, ext.search(True)) for ext in tqdm(stable_extents)]

  0%|          | 0/1397 [00:00<?, ?it/s]

CPU times: user 653 ms, sys: 19.6 ms, total: 673 ms
Wall time: 675 ms


In [16]:
ps_cart.verbalize(stable_intents[3])

'0: [-1.07, 0.86], 1: [-1.15, 1.10]'