Skip to content

Commit

Permalink
🆕 add Chinese Whisper algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
GiulioRossetti committed Jul 21, 2020
1 parent 1e280d2 commit 6a9005e
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 2 deletions.
48 changes: 47 additions & 1 deletion cdlib/algorithms/crisp_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from cdlib.algorithms.internal.Markov import markov
from karateclub import EdMot
import markov_clustering as mc
from chinese_whispers import chinese_whispers as cw, aggregate_clusters

import networkx as nx

Expand All @@ -45,7 +46,7 @@
__all__ = ["louvain", "leiden", "rb_pots", "rber_pots", "cpm", "significance_communities", "surprise_communities",
"greedy_modularity", "der", "label_propagation", "async_fluid", "infomap", "walktrap", "girvan_newman", "em",
"scan", "gdmp2", "spinglass", "eigenvector", "agdl", "frc_fgsn", "sbm_dl", "sbm_dl_nested",
"markov_clustering", "edmot"]
"markov_clustering", "edmot", "chinese_whispers"]


def girvan_newman(g_original, level):
Expand Down Expand Up @@ -1043,6 +1044,51 @@ def markov_clustering(g_original, expansion=2, inflation=2, loop_value=1, iterat
'convergence_check_frequency': convergence_check_frequency})


def chinese_whispers(g_original, weighting='top', iterations=20, seed=None):
"""
Fuzzy graph clustering that (i) creates an intermediate representation of the input graph, which reflects the “ambiguity” of its nodes,
and (ii) uses hard clustering to discover crisp clusters in such “disambiguated” intermediate graph.
:param g_original:
:param weighting: edge weighing schemas. Available modalities: ['top', 'lin', 'log']
:param iterations: number of iterations
:param seed: random seed
:return: NodeClustering object
:Example:
>>> from cdlib import algorithms
>>> import networkx as nx
>>> G = nx.karate_club_graph()
>>> coms = algorithms.chinese_whispers(G)
:References:
Ustalov, D., Panchenko, A., Biemann, C., Ponzetto, S.P.: `Watset: Local-Global Graph Clustering with Applications in Sense and Frame Induction.`_ Computational Linguistics 45(3), 423–479 (2019)
.. note:: Reference implementation: https://github.com/nlpub/chinese-whispers-python
"""

g = convert_graph_formats(g_original, nx.Graph)
g, maps = nx_node_integer_mapping(g)

cw(g, weighting=weighting, iterations=iterations, seed=seed)

coms = []
if maps is not None:
for _, cluster in sorted(aggregate_clusters(g).items(), key=lambda e: len(e[1]), reverse=True):
coms.append([maps[n] for n in cluster])

nx.relabel_nodes(g, maps, False)
else:
for _, cluster in sorted(aggregate_clusters(g).items(), key=lambda e: len(e[1]), reverse=True):
coms.append(list(cluster))

return NodeClustering(coms, g_original, "Chinese Whispers",
method_parameters={'weighting': weighting, 'iterations': iterations})


def edmot(g_original, component_count=2, cutoff=10):
"""
The algorithm first creates the graph of higher order motifs. This graph is clustered by the Louvain method.
Expand Down
19 changes: 19 additions & 0 deletions cdlib/test/test_community_discovery_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,3 +489,22 @@ def test_percomvc(self):
if len(coms.communities) > 0:
self.assertEqual(type(coms.communities[0]), list)
self.assertEqual(type(coms.communities[0][0]), int)

def test_chinese_whispers(self):
g = get_string_graph()

communities = algorithms.chinese_whispers(g)
self.assertEqual(type(communities.communities), list)
if len(communities.communities) > 0:
self.assertEqual(type(communities.communities[0]), list)
if len(communities.communities[0]) > 0:
self.assertEqual(type(communities.communities[0][0]), str)

g = nx.karate_club_graph()

communities = algorithms.chinese_whispers(g)
self.assertEqual(type(communities.communities), list)
if len(communities.communities) > 0:
self.assertEqual(type(communities.communities[0]), list)
if len(communities.communities[0]) > 0:
self.assertEqual(type(communities.communities[0][0]), int)
1 change: 1 addition & 0 deletions docs/reference/cd_algorithms/node_clustering.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Methods in this subclass returns as result a ``NodeClustering`` object instance.
agdl
aslpaw
async_fluid
chinese_whispers
cpm
der
edmot
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ eva_lcd
karateclub>=1.0.0
bimlpa
ASLPAw
markov_clustering
markov_clustering
chinese_whispers

0 comments on commit 6a9005e

Please sign in to comment.