Skip to content

Commit

Permalink
Merge 18d3f18 into 47d53b1
Browse files Browse the repository at this point in the history
  • Loading branch information
dsalvaz committed Sep 12, 2019
2 parents 47d53b1 + 18d3f18 commit 620022a
Show file tree
Hide file tree
Showing 11 changed files with 181 additions and 16 deletions.
1 change: 1 addition & 0 deletions cdlib/__init__.py
@@ -1,3 +1,4 @@
from cdlib.classes.node_clustering import NodeClustering
from cdlib.classes.edge_clustering import EdgeClustering
from cdlib.classes.fuzzy_node_clustering import FuzzyNodeClustering
from cdlib.classes.attr_node_clustering import AttrNodeClustering
1 change: 1 addition & 0 deletions cdlib/algorithms/__init__.py
@@ -1,3 +1,4 @@
from .edge_clustering import *
from .crisp_partition import *
from .overlapping_partition import *
from .attribute_clustering import *
63 changes: 63 additions & 0 deletions cdlib/algorithms/attribute_clustering.py
@@ -0,0 +1,63 @@
try:
import igraph as ig
except ModuleNotFoundError:
ig = None

import Eva

from collections import defaultdict
from cdlib import AttrNodeClustering

import networkx as nx

from cdlib.utils import convert_graph_formats

__all__ = ['eva']

def eva(g, labels, weight='weight', resolution=1., randomize=False, alpha=0.5):

"""
The Eva algorithm extends the Louvain approach in order to deal with the attributes of the nodes (aka Louvain Extended to Vertex Attributes).
It optimizes - combining them linearly - two quality functions, a structural and a clustering one, namely the modularity and the purity.
A parameter alpha tunes the importance of the two functions: an high value of alpha favors the clustering criterion instead of the structural one.
:param g: a networkx/igraph object
:param weight: str, optional the key in graph to use as weight. Default to 'weight'
:param resolution: double, optional Will change the size of the communities, default to 1.
:param randomize: boolean, optional Will randomize the node evaluation order and the community evaluation order to get different partitions at each call, default False
:param alpha: a value assumed in [0,1] tuning the importance of modularity and purity criteria
:return: AttrNodeClustering object
:Example:
>>> from cdlib.algorithms import eva
>>> import networkx as nx
>>> import random
>>> l1 = ['A', 'B', 'C', 'D']
>>> l2 = ["E", "F", "G"]
>>> g = nx.barabasi_albert_graph(100, 5)
>>> labels=dict()
>>> for node in g.nodes():
>>> labels[node]={"l1":random.choice(l1), "l2":random.choice(l2)}
>>> communities = eva(g_attr, labels, alpha=0.8)
:References:
1. #####
.. note:: Reference implementation: https://github.com/GiulioRossetti/Eva/tree/master/Eva
"""

g = convert_graph_formats(g, nx.Graph)
nx.set_node_attributes(g, labels)

coms, coms_labels = Eva.eva_best_partition(g, weight=weight, resolution=resolution, randomize=randomize, alpha=alpha)

# Reshaping the results
coms_to_node = defaultdict(list)
for n, c in coms.items():
coms_to_node[c].append(n)

coms_eva = [list(c) for c in coms_to_node.values()]
return AttrNodeClustering(coms_eva, g, coms_labels, "Eva", method_parameters={"weight": weight, "resolution": resolution,
"randomize": randomize, "alpha":alpha})
1 change: 1 addition & 0 deletions cdlib/classes/__init__.py
@@ -1,3 +1,4 @@
from .node_clustering import NodeClustering
from .edge_clustering import EdgeClustering
from .fuzzy_node_clustering import FuzzyNodeClustering
from .attr_node_clustering import AttrNodeClustering
26 changes: 26 additions & 0 deletions cdlib/classes/attr_node_clustering.py
@@ -0,0 +1,26 @@
from cdlib.classes.node_clustering import NodeClustering
from cdlib import evaluation


class AttrNodeClustering(NodeClustering):

"""Attribute Node Communities representation.
:param communities: list of communities
:param graph: a networkx/igraph object
:param coms_labels: dictionary specifying for each community the frequency of the attribute values
:param method_name: community discovery algorithm name
:param method_parameters: configuration for the community discovery algorithm used
:param overlap: boolean, whether the partition is overlapping or not
"""

def __init__(self, communities, graph, coms_labels, method_name, method_parameters=None, overlap=False):
super().__init__(communities, graph, method_name, method_parameters, overlap)
self.coms_labels = coms_labels

def purity(self):

"""Purity is the product of the frequencies of the most frequent labels carried by the nodes within the communities
:return: FitnessResult object
"""
return evaluation.purity(self.coms_labels)
5 changes: 3 additions & 2 deletions cdlib/classes/node_clustering.py
Expand Up @@ -297,8 +297,9 @@ def avg_odf(self, **kwargs):
>>> from cdlib.algorithms import louvain
>>> g = nx.karate_club_graph()
>>> communities = louvain(g)
>>> mod = communities.avg_odf()
>>>
>>> communities = eva(g, alpha=alpha)
>>> pur = communities.purity()
"""
if self.__check_graph():
Expand Down
53 changes: 40 additions & 13 deletions cdlib/evaluation/fitness.py
Expand Up @@ -5,13 +5,13 @@
import numpy as np
import scipy
from cdlib.evaluation.internal.link_modularity import cal_modularity
import Eva

__all__ = ["FitnessResult", "link_modularity", "normalized_cut", "internal_edge_density", "average_internal_degree",
"fraction_over_median_degree", "expansion", "cut_ratio", "edges_inside", "flake_odf", "avg_odf", "max_odf",
"triangle_participation_ratio", "modularity_density", "z_modularity", "erdos_renyi_modularity",
"newman_girvan_modularity", "significance", "surprise", "conductance", "size", "avg_embeddedness",
"scaled_density", "avg_distance", "hub_dominance", "avg_transitivity"]

"scaled_density", "avg_distance", "hub_dominance", "avg_transitivity", "purity"]

# FitnessResult = namedtuple('FitnessResult', ['min', 'max', 'mean', 'std'])
FitnessResult = namedtuple('FitnessResult', 'min max score std')
Expand Down Expand Up @@ -84,7 +84,7 @@ def scaled_density(graph, communities, **kwargs):
"""

return __quality_indexes(graph, communities,
lambda graph, coms: nx.density(nx.subgraph(graph, coms))/ nx.density(graph), **kwargs)
lambda graph, coms: nx.density(nx.subgraph(graph, coms)) / nx.density(graph), **kwargs)


def avg_distance(graph, communities, **kwargs):
Expand Down Expand Up @@ -131,7 +131,7 @@ def hub_dominance(graph, communities, **kwargs):

return __quality_indexes(graph, communities,
lambda graph, coms: max([x[1] for x in
list(nx.degree(nx.subgraph(graph, coms)))])/(len(coms) - 1),
list(nx.degree(nx.subgraph(graph, coms)))]) / (len(coms) - 1),
**kwargs)


Expand Down Expand Up @@ -382,7 +382,6 @@ def edges_inside(graph, community, **kwargs):
1. Radicchi, F., Castellano, C., Cecconi, F., Loreto, V., & Parisi, D. (2004). Defining and identifying communities in networks. Proceedings of the National Academy of Sciences, 101(9), 2658-2663.
"""


return __quality_indexes(graph, community, pq.PartitionQuality.edges_inside, **kwargs)


Expand Down Expand Up @@ -622,7 +621,7 @@ def erdos_renyi_modularity(graph, communities, **kwargs):
c = nx.subgraph(graph, community)
mc = c.number_of_edges()
nc = c.number_of_nodes()
q += mc - (m*nc*(nc - 1)) / (n*(n-1))
q += mc - (m * nc * (nc - 1)) / (n * (n - 1))

return FitnessResult(score=(1 / m) * q)

Expand Down Expand Up @@ -710,8 +709,8 @@ def z_modularity(graph, communities, **kwargs):
for node in c:
dc += c.degree(node)

mmc += (mc/m)
dc2m += (dc/(2*m))**2
mmc += (mc / m)
dc2m += (dc / (2 * m)) ** 2

res = 0
try:
Expand Down Expand Up @@ -759,10 +758,10 @@ def surprise(graph, communities, **kwargs):
q += mc
qa += scipy.special.comb(nc, 2, exact=True)
try:
q = q/m
qa = qa/scipy.special.comb(n, 2, exact=True)
q = q / m
qa = qa / scipy.special.comb(n, 2, exact=True)

sp = m*(q*np.log(q/qa) + (1-q)*np.log2((1-q)/(1-qa)))
sp = m * (q * np.log(q / qa) + (1 - q) * np.log2((1 - q) / (1 - qa)))
except ZeroDivisionError:
pass

Expand Down Expand Up @@ -792,7 +791,7 @@ def significance(graph, communities, **kwargs):
m = graph.number_of_edges()

binom = scipy.special.comb(m, 2, exact=True)
p = m/binom
p = m / binom

q = 0

Expand All @@ -805,7 +804,35 @@ def significance(graph, communities, **kwargs):
binom_c = scipy.special.comb(nc, 2, exact=True)
pc = mc / binom_c

q += binom_c * (pc * np.log(pc/p) + (1-pc)*np.log((1-pc)/(1-p)))
q += binom_c * (pc * np.log(pc / p) + (1 - pc) * np.log((1 - pc) / (1 - p)))
except ZeroDivisionError:
pass
return FitnessResult(score=q)


def purity(communities):
"""Purity is the product of the frequencies of the most frequent labels carried by the nodes within the communities
:param communities: AttrNodeClustering object
:return: FitnessResult object
Example:
>>> from cdlib.algorithms import eva
>>> from cdlib import evaluation
>>> import random
>>> l1 = ['A', 'B', 'C', 'D']
>>> l2 = ["E", "F", "G"]
>>> g = nx.barabasi_albert_graph(100, 5)
>>> labels=dict()
>>> for node in g.nodes():
>>> labels[node]={"l1":random.choice(l1), "l2":random.choice(l2)}
>>> communities = eva(g_attr, labels, alpha=0.5)
>>> pur = evaluation.purity(communities)
:References:
1. ######
"""
pur = Eva.purity(communities.coms_labels)
return FitnessResult(score=pur)
1 change: 1 addition & 0 deletions cdlib/test/__init__.py
Expand Up @@ -7,3 +7,4 @@
from .test_partitions_comparisons import *
from .test_utils import *
from .test_viz_network import *
from .test_attributeclustering import *
23 changes: 23 additions & 0 deletions cdlib/test/test_attributeclustering.py
@@ -0,0 +1,23 @@
import unittest
from cdlib import algorithms
import networkx as nx
import random

class AttrCommunityDiscoveryTests(unittest.TestCase):

def test_eva(self):

l1 = ['one', 'two', 'three', 'four']
l2 = ["A", "B", "C"]
g = nx.barabasi_albert_graph(100, 5)
labels=dict()

for node in g.nodes():
labels[node]={"l1":random.choice(l1), "l2":random.choice(l2)}

coms = algorithms.eva(g,labels,alpha=0.5)

self.assertEqual(type(coms.communities), list)
if len(coms.communities) > 0:
self.assertEqual(type(coms.communities[0]), list)
self.assertEqual(type(coms.communities[0][0]), int)
22 changes: 21 additions & 1 deletion cdlib/test/test_fitness_functions.py
@@ -1,10 +1,12 @@
import unittest
from cdlib.algorithms import louvain
from cdlib.algorithms import louvain, eva
import networkx as nx
import numpy as np
import random
from cdlib import evaluation



class FitnessFunctionsTests(unittest.TestCase):

def test_link_modularity(self):
Expand Down Expand Up @@ -66,3 +68,21 @@ def test_pquality_indexes(self):
for idx in indexes:
res = idx(g, communities)
self.assertIsInstance(res, evaluation.FitnessResult)

def test_purity(self):

l1 = ['one', 'two', 'three', 'four']
l2 = ["A", "B", "C"]
g_attr = nx.barabasi_albert_graph(100, 5)
labels = dict()

for node in g_attr.nodes():
labels[node] = {"l1": random.choice(l1), "l2": random.choice(l2)}

coms = eva(g_attr, labels, alpha=0.8)

pur = evaluation.purity(coms)

self.assertIsInstance(pur, evaluation.FitnessResult)
self.assertGreaterEqual(pur.score, 0)
self.assertLessEqual(pur.score, 1)
1 change: 1 addition & 0 deletions requirements.txt
Expand Up @@ -16,3 +16,4 @@ pulp==1.6.*
pquality==0.0.7
seaborn==0.9.*
pandas==0.25.*
eva_lcd

0 comments on commit 620022a

Please sign in to comment.