/
probe_a_load.hpp
97 lines (92 loc) · 3.13 KB
/
probe_a_load.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#pragma once
#include <vector>
#include <cmath>
#include "data_structures/csr_matrix.hpp"
/**
* @brief This namespace contains required functions for
* the Probe a Load (PaL) partitioning algorithm. PaL partitioning
* is one of the algorithms that is used to solve
* mNC problem. Its dual solves the mLI problem
*/
#if defined(ENABLE_CPP_PARALLEL)
namespace sarma::probe_a_load {
#else
namespace sarma{
namespace probe_a_load {
#endif
/**
* @brief Implements the Probe a Load algorithm.
* This algorithm applies a greedy probe algorithm
* on diagonal direction by running binary searches
* over the possible range of maximum load targets.
*
* @param A Matrix
* @param P number of parts
* @tparam Ordinal data-type of indptr and indices structures in sarma::Matrix
* @tparam Value data-type of data structure in sarma::Matrix
* @return a cut vector
*/
template <typename Ordinal, typename Value>
std::vector<Ordinal> probe(const Matrix<Ordinal, Value> &A, const Value L, std::vector<Ordinal> *UB = nullptr) {
const auto sps = A.get_sps();
std::vector<Ordinal> p;
if (UB)
p.reserve(UB->size());
p.push_back(0);
while ((!UB && p.back() < sps.size()) || (UB && p.size() < UB->size())) {
auto l = p.back(), r = (UB ? (*UB)[p.size()] : sps.size());
while (l < r) {
const auto m = (l + 1) + (r - (l + 1)) / 2;
p.push_back(m);
const auto CL = std::max(sps.compute_maxload(p, p, p.size() - 1, true),
sps.compute_maxload(p, p, p.size() - 1, false));
if (CL <= L)
l = m;
else
r = m - 1;
p.pop_back();
}
p.push_back(l);
if (p.back() == p[p.size() - 2]) {
while(UB && p.size() < UB->size())
p.push_back(p.back());
break;
}
}
return p;
}
/**
* @brief Implements the dual of the Probe a Load (PaL) algorithm
* using Bound a Load procedure and applying PaL as the mNC algorithm.
* This algorithm applies a greedy probe algorithm
* on diagonal direction by running binary searches
* over the possible range of maximum load targets.
*
* @param A Matrix
* @param P number of parts
* @return a cut vector
*/
template <typename Ordinal, typename Value>
auto partition(const Matrix<Ordinal, Value> &A, const Ordinal P) {
const auto sps = A.get_sps();
const auto N = sps.size();
const auto M = sps.total_load();
auto l = (M - 1 + P * P) / P / P;
auto r = std::min(M, (M + P - 1) / P + N);
std::vector<Ordinal> UB(P + 1, N);
while (l + .5 < r) {
const auto m = l + (r - l) / 2;
const auto p = probe(A, m, &UB);
if (p.back() < N)
l = m + 1;
else {
r = m;
UB = p;
}
}
return probe(A, r, &UB);
}
}
#if !defined(ENABLE_CPP_PARALLEL)
} // nested namespace
#endif