-
Notifications
You must be signed in to change notification settings - Fork 0
/
louvain_level.rs
192 lines (167 loc) 路 7.79 KB
/
louvain_level.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
use louvain_domain::graph::Node;
use rand::seq::SliceRandom;
use rand::thread_rng;
use crate::{community_assignment::Community, louvain_graph::LouvainGraph, modularity::Modularity};
pub struct LouvainLevel<'a> {
graph: &'a LouvainGraph,
pub modularity: Modularity<'a>,
/// Threshold for modularity improvement in one pass. If the improvement
/// is smaller than this threshold, we stop the current iteration.
/// Set to 0.0 to ignore this threshold.
modularity_improvement_threshold: f64,
should_use_threshold: bool,
}
impl<'a> LouvainLevel<'a> {
pub fn new(graph: &'a LouvainGraph, epsilon_min: f64) -> Self {
let should_use_threshold = epsilon_min > 0.0;
Self {
graph,
modularity: Modularity::new(graph),
modularity_improvement_threshold: epsilon_min,
should_use_threshold,
}
}
/// Phase I of Louvain: Modularity Optimization
/// Optimizes modularity of the current graph, i.e. we calculate one level
/// of the Louvain hierarchy.
///
/// We repeatedly cycle through all nodes until there is no improvement
/// of modularity anymore and we've reached a local maximum.
/// For every node, we remove it from its community and move it to the
/// community of the neighbor for which the modularity gain (delta Q)
/// is maximal.
///
/// Returns whether there was an improvement in modularity in the current
/// level.
pub fn optimize_one_level(&mut self) -> bool {
let mut is_level_improvement = false;
// Prepare random access to nodes
let mut indices_shuffled: Vec<Node> = (0..self.graph.num_nodes()).collect();
indices_shuffled.shuffle(&mut thread_rng());
let mut quality = if self.should_use_threshold {
self.modularity.calc_singleton_modularity()
} else {
0.0 // dummy value
};
let mut is_improvement_in_one_graph_traversal;
let mut count_graph_traversals = 0;
loop {
is_improvement_in_one_graph_traversal = false;
let old_quality = quality;
for &node in &indices_shuffled {
let improvement = self.optimize_one_node(node);
if improvement {
is_improvement_in_one_graph_traversal = true;
is_level_improvement = true;
}
}
count_graph_traversals += 1;
// TODO: What is a good value here for "max_graph_traversals"?
// 50 is just chosen arbitrarily in the expectation that the
// actual number of graph traversals is much lower.
if count_graph_traversals > 50 {
panic!(
"The Louvain algorithm is somehow stuck in a local optimum
and cannot improve modularity anymore. Please report your specific
graph on GitHub, so we can investigate this issue."
);
}
if self.should_use_threshold {
quality = self.modularity.calc_singleton_modularity();
if (quality - old_quality) <= self.modularity_improvement_threshold {
break;
}
}
if !is_improvement_in_one_graph_traversal {
break;
}
}
is_level_improvement
}
/// Optimizes modularity for one node by removing it from its current
/// community and inserting it into the community of one of its neighbors,
/// so that modularity increase is maximal.
///
/// Returns whether there was an improvement in modularity, i.e.
/// whether the node was moved to another community or stayed in its
/// previous community.
fn optimize_one_node(&mut self, node: Node) -> bool {
let prev_community = self.modularity.assignment.get_community(node);
self.modularity
.assignment
.remove_node_from_its_community(node);
let new_community = self.find_best_community_to_move_into(node, prev_community);
self.modularity
.assignment
.insert_node_into_community(node, new_community);
new_community != prev_community
}
/// Computes the best community to move the current node into.
///
/// The default choice for the "best community" is the previous community
/// of the node if no improvement can be made otherwise (by moving into
/// a community of an adjacent node).
///
/// If there are multiple communities with the same gain, we choose
/// the first one we encounter.
fn find_best_community_to_move_into(&self, node: Node, prev_community: Community) -> Community {
let mut best_community = prev_community;
let mut best_gain = 0.0;
self.graph
.adjacent_nodes(node)
// TODO: explain difference between isolated nodes and
// singleton communities in docs.
.expect("Graph cannot contain isolated nodes, bug in Louvain Level algorithm")
.iter()
.for_each(|&adj_node| {
let target_community = self.modularity.assignment.get_community(adj_node);
let gain = self.modularity.gain(node, target_community);
if gain > best_gain {
best_community = target_community;
best_gain = gain;
}
});
best_community
}
/// Phase II of Louvain: Community Aggregation
/// Aggregates the current graph into a new graph, where each node
/// represents a community from the previous graph.
///
/// In the newly created, weighted graph:
/// - nodes are communities from Phase I (intuition: vertices from the
/// previous graph get merged into "super-vertices")
/// - edge weights are the sum of weights of edges between communities
/// - an edge within a community becomes two self-loops
pub fn get_next_level_graph(&mut self) -> LouvainGraph {
let num_communities = self.modularity.assignment.renumber_communities();
let mut next_graph = LouvainGraph::new(num_communities);
for (node, adj_node, weight) in self.graph.edges() {
let source_community = self.modularity.assignment.get_community(node);
let other_community = self.modularity.assignment.get_community(adj_node);
// Edges inside communities become self-loops in the next graph.
// The weights of these edges should be counted twice to make sure that
// m = 1/2 \sum_i,j A_ij is constant across the multiple graphs.
// We therefore need to double the diagonal matrix elements of the next graph
// corresponding to the weights of self-loops.
// However, self-loops that were self-loops in the current graph already
// do not fall under this reasoning, as they are not counted twice
// for each order of arguments in the sum.
let mut new_weight = weight;
if (source_community == other_community) && (node != adj_node) {
new_weight *= 2.0;
}
let adj_edges = next_graph.adjacent_edges(source_community);
if adj_edges.is_some() && adj_edges.unwrap().contains_key(&other_community) {
// Edge already in the next graph -> only adjust weight
// (There was already one node in this community connected
// to a node in the other community.)
next_graph.increase_edge_weight(source_community, other_community, new_weight);
} else {
// Edge not yet in the next graph -> Insert it
// Communities become the new vertices
next_graph.insert_edge(source_community, other_community, new_weight);
}
}
next_graph
}
}