In [1]:
import pandas as pd
import sklearn
from sklearn.datasets import load_breast_cancer
from sklearn import ensemble
from gale import create_mapper
import kmapper as km
import numpy as np

data = load_breast_cancer()

X= pd.DataFrame(data.data, columns = data.feature_names)

y = data.target
column_names=data.feature_names
X= X.to_numpy()
model = ensemble.IsolationForest(random_state=1729)
model.fit(X)
lens1 = model.decision_function(X).reshape((X.shape[0], 1))
mapperForLens = km.KeplerMapper(verbose=0)

# Create another 1-D lens with L2-norm
lens2 = mapperForLens.fit_transform(X, projection="l2norm")

# Combine lenses pairwise to get a 2-D lens i.e. [Isolation Forest, L^2-Norm] lens
lens = np.c_[lens1, lens2]



In [2]:
#create list of mapper outputs and lenses
mapper1 = create_mapper(X, lens, resolution=15, gain=0.7, dist_thresh=0.5, clusterer=sklearn.cluster.KMeans(n_clusters=2,random_state=3471))
mapper2 = create_mapper(X, lens, resolution=15, gain=0.4, dist_thresh=0.5, clusterer=sklearn.cluster.KMeans(n_clusters=4,random_state=3471))
    

In [6]:
color_values = lens [:,0] - lens[:,0].min()


In [10]:
import glob
from mountaineer import Mountaineer

In [11]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [12]:
#visualize
test = Mountaineer()
mapper_outputs=[mapper1, mapper2]
lenses = [color_values, color_values]
test.visualize(X,y, mapper_outputs, lenses, column_names, 'UMAP') #Projection method- 'UMAP' or 'TSNE'

In [159]:

# Create a custom 1-D lens with Isolation Forest

# Define the simplicial complex
scomplex = mapperForLens.map(lens,
                      X,
                      cover=km.Cover(n_cubes=15,
                                     perc_overlap=0.7),
                      clusterer=sklearn.cluster.KMeans(n_clusters=2,
                                                       random_state=3471))

In [160]:
len(mapper1["nodes"])

276

In [43]:
len(scomplex["nodes"])

276

In [44]:
len(color_values)

569

In [25]:
color_values

array([-8.09852507e-02,  6.88871376e-02,  7.01782870e-02, -1.17732910e-01,
        5.33050529e-02,  8.07802788e-02,  9.72840015e-02,  8.51364559e-02,
        6.31368426e-02, -4.62381665e-02,  9.96137154e-02,  9.71349344e-02,
       -1.02191976e-01,  9.67946916e-02,  4.05253619e-02,  5.48460492e-02,
        1.07296913e-01,  7.36242058e-02,  4.32761557e-02,  1.30917946e-01,
        1.32273642e-01,  1.05193022e-01,  1.02062006e-02,  3.12302262e-02,
        6.11632894e-02, -1.53791939e-02,  5.60360812e-02,  7.43478461e-02,
        7.77448524e-02,  1.07075191e-01,  5.24482021e-02,  4.66877759e-02,
        6.66646760e-02,  4.79251222e-02,  7.34355562e-02,  9.24437926e-02,
        1.21070248e-01,  9.76808850e-02,  1.06231171e-02,  1.01122259e-01,
        1.21727617e-01,  9.07820143e-02, -8.81820638e-02,  1.11461717e-01,
        1.03982622e-01,  5.75248001e-02,  4.42334292e-02,  9.13248291e-02,
        1.40193819e-01,  1.34323635e-01,  1.23196987e-01,  1.24264991e-01,
        1.37897550e-01,  

In [26]:
counter = 0
for node in scomplex['links']:
    counter+=len(node)

In [27]:
counter

3965

In [29]:
scomplex["links"]

defaultdict(list,
            {'cube3_cluster0': ['cube16_cluster0'],
             'cube3_cluster1': ['cube13_cluster0', 'cube16_cluster1'],
             'cube8_cluster0': ['cube9_cluster0',
              'cube21_cluster0',
              'cube22_cluster0',
              'cube36_cluster0',
              'cube37_cluster0'],
             'cube8_cluster1': ['cube9_cluster1',
              'cube21_cluster1',
              'cube22_cluster1',
              'cube36_cluster1',
              'cube37_cluster1',
              'cube38_cluster1',
              'cube49_cluster1',
              'cube50_cluster1',
              'cube51_cluster1'],
             'cube9_cluster0': ['cube21_cluster0',
              'cube22_cluster0',
              'cube36_cluster0',
              'cube37_cluster0'],
             'cube9_cluster1': ['cube21_cluster1',
              'cube22_cluster1',
              'cube36_cluster1',
              'cube37_cluster1',
              'cube38_cluster1',
              'cube49_clust

In [31]:
counter=0
for key in scomplex["links"]:
    counter+=len(scomplex["links"][key])
print (counter)

2867


In [33]:
counter=0
for key in scomplex["links"]:
    counter+=len(mapper1["links"][key])
print (counter)

2867
