In [1]:
import clusim.clugen as clugen
import clusim.sim as sim
from clusim.clustering import Clustering, print_clustering

In [2]:
# A clustering over 7 elements
c1 = clugen.make_random_clustering(n_elements = 7, n_clusters = 2, random_model='num')
print_clustering(c1)

0134|256


In [3]:
# now mimic an instance where the algorithm doesnt cluster all 10 elements
elm2clu_dict = c1.to_elm2clu_dict()
elm2clu_dict[7] = []
elm2clu_dict[8] = []
elm2clu_dict[9] = []

# if we try to make a clustering, it raises an UnassignedElementError
c2 = Clustering().from_elm2clu_dict(elm2clu_dict)

UnassignedElementError: There are 3 elements unassigned to a cluster.

In [5]:
# we can fix that error by grouping all of the missing elements to a single cluster
elm2clu_dict2 = clugen.cluster_missing_elements(list(range(10)), c1.to_elm2clu_dict(), new_cluster_type = 'giant')
c2 = Clustering().from_elm2clu_dict(elm2clu_dict2)
print_clustering(c2)

# or we can fix that error by placing each of the missing elements into a singleton cluster
elm2clu_dict3 = clugen.cluster_missing_elements(list(range(10)), c1.to_elm2clu_dict(), new_cluster_type = 'singleton')
c3 = Clustering().from_elm2clu_dict(elm2clu_dict3)
print_clustering(c3)

0134|256|897
0134|256|8|9|7


In [6]:
# Also note that all similarity comparisons must be between Clusterings with the same element sets

# so trying to compare Clustering c1 to c3 will produce a ClusteringSimilarityError
sim.rand_index(c1, c3)

ClusteringSimilarityError: The element sets must be the same for both clusterings.