### Main functionality of k-cluster module

In [1]:
import kcluster
import numpy as np
from scipy.spatial.distance import pdist, squareform

#### Test case: 100 random points in the unit square

In [2]:
N = 100
d = 2
P = np.random.random((N,d))

#### For inputing a distance matrix

##### returning only persistence diagram

In [3]:
# create distance matrix
D = squareform(pdist(P))

# return persistence diagram for k=4
PD = kcluster.persistenceDiagram(D,k=4)
print(PD)

[[0.05750362 0.05904843]
 [0.04345583 0.07449347]
 [0.07329119 0.08410519]
 [0.09147484 0.09640639]
 [0.06837715 0.09902108]
 [0.09989753 0.10942252]
 [0.08790118 0.11001198]
 [0.04330672 0.11004628]
 [0.04279317 0.11292701]
 [0.0438699  0.12527136]
 [0.09664967 0.14024834]
 [0.04000611        inf]]


##### returning persistence diagram and filtration

In [4]:
# F is the function on the vertices, E is the new MST with 
# correct edge weights
PD,F,E = kcluster.persistenceDiagram(D,k=4,return_filtration=True)
print(PD)

[[0.05750362 0.05904843]
 [0.04345583 0.07449347]
 [0.07329119 0.08410519]
 [0.09147484 0.09640639]
 [0.06837715 0.09902108]
 [0.09989753 0.10942252]
 [0.08790118 0.11001198]
 [0.04330672 0.11004628]
 [0.04279317 0.11292701]
 [0.0438699  0.12527136]
 [0.09664967 0.14024834]
 [0.04000611        inf]]


##### For getting clusters

In [5]:
# We first need a threshold
## say we want 5 clusters with multiplicative weights
## death/birth
alpha = kcluster.getThreshold(PD,5) 
print(f"alpha multiplicative: {alpha}")

## say we want 5 clusters with additive weights
## death - birth
alpha_st = kcluster.getThreshold(PD,5,multiplicative=False) 
print(f"alpha additive: {alpha_st}")


alpha multiplicative: 1.582667068167511
alpha additive: 0.03731815571911512


In [6]:
# getting clusters
clstrs_multiplicative, clstr_list = kcluster.getClusters(F,E,alpha)
print(f"number of clusters: {len(clstr_list)}")

# cluster list is just a lookup table for the cluster representatives to 0..n-1
# where n is the number of clusters - if you want an ordered cluster list for 
# testing it can be useful to run 
clstrs_ordered = [clstr_list[i] for i in clstrs_multiplicative]
print(clstrs_ordered)

number of clusters: 5
[0, 0, 4, 1, 0, 0, 0, 1, 3, 1, 0, 1, 1, 0, 1, 1, 1, 0, 2, 1, 0, 0, 1, 0, 1, 3, 2, 4, 0, 1, 1, 2, 1, 1, 0, 3, 1, 0, 1, 1, 0, 1, 4, 1, 1, 1, 4, 0, 2, 0, 0, 0, 1, 1, 4, 3, 1, 0, 4, 2, 1, 1, 1, 0, 1, 0, 1, 1, 4, 3, 2, 1, 4, 1, 2, 1, 0, 0, 0, 0, 4, 1, 1, 4, 0, 1, 1, 4, 0, 1, 3, 1, 3, 0, 1, 1, 1, 0, 3, 1]


In [7]:
# getting clusters with additive threshold
clstrs_additive, clstr_list_a = kcluster.getClusters(F,E,alpha_st,multiplicative=False)
print(f"number of clusters: {len(clstr_list_a)}")

# cluster list is just a lookup table for the cluster representatives to 0..n-1
# where n is the number of clusters - if you want an ordered cluster list for 
# testing it can be useful to run 
clstrs_ordered_a = [clstr_list_a[i] for i in clstrs_additive]
print(clstrs_additive)

number of clusters: 5
[ 0. 27. 27.  7. 27. 27. 27.  7. 25.  7. 27.  7.  7. 27.  7.  7.  7. 27.
 18.  7. 27. 27.  7. 27.  7. 25. 18. 27. 27.  7.  7. 18.  7.  7. 27. 25.
  7. 27.  7.  7. 27.  7. 27.  7.  7.  7. 27.  0. 18. 27.  0. 27.  7.  7.
 27. 25.  7. 27. 27. 18.  7.  7.  7. 27.  7. 27.  7.  7. 27. 25. 18.  7.
 27.  7. 18.  7.  0. 27. 27. 27. 27.  7.  7. 27. 27.  7.  7. 27. 27.  7.
 25.  7. 25. 27.  7.  7.  7. 27. 25.  7.]


#### You can also put in  a graph directly 

##### First we create a graph

In [8]:
from scipy.spatial import Delaunay
# this makes a Delaunay graph
def createGraph(P):
    dlnay = Delaunay(P)
    tri = dlnay.simplices
    edges = np.vstack((tri[:,[0,1]],tri[:,[1,2]],tri[:,[0,2]]))
    edges.sort(axis=1)
    edges = np.unique(edges, axis=0)

    E = []
   
    for i in range(edges.shape[0]):
        dist = np.sqrt(np.sum((P[edges[i,0],:] - P[edges[i,1],:])**2))
        E.append((edges[i,0],edges[i,1], dist))
    return E

In [9]:
# create graph
G = createGraph(P)

# given a list of tuples (int,int, float), we can compute the MST
E = kcluster.computeMST(G)

# and compute the persistence diagram
PD_Graph = kcluster.persistenceDiagramGraph(E,4)
print(PD_Graph)

[[0.05750362 0.05904843]
 [0.04345583 0.07449347]
 [0.07329119 0.08410519]
 [0.09147484 0.09640639]
 [0.06837715 0.09902108]
 [0.09989753 0.10942252]
 [0.08790118 0.11001198]
 [0.04330672 0.11004628]
 [0.04279317 0.11292701]
 [0.0438699  0.12527136]
 [0.09664967 0.14024834]
 [0.04000611        inf]]


In [12]:
# the functionality is the same as for the distance matrix
PD_Graph,F,E_new = kcluster.persistenceDiagramGraph(E,4,return_filtration=True)

alpha = kcluster.getThreshold(PD,3) 
print(f"alpha multiplicative: {alpha}")

# the function is getClustersGraph 
clstrs_graph, clstr_list = kcluster.getClustersGraph(F,E_new,alpha)
print(clstr_list)

alpha multiplicative: 2.589996205942634
{25.0: 0, 18.0: 1, 7.0: 2}
