From 31bc5ce7261da17dde59797854d7affc9448caec Mon Sep 17 00:00:00 2001 From: l-kotzur Date: Wed, 15 Sep 2021 09:39:41 +0200 Subject: [PATCH 1/6] remove .vscode from version control --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f88d4b6..68f9bf1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # directories .idea/ +.vscode/ __pycache__/ trash/ *.pytest_cache/ From 76aafe62247ebb4ce9d9b8f7349549612496c3e0 Mon Sep 17 00:00:00 2001 From: l-kotzur Date: Wed, 15 Sep 2021 09:40:29 +0200 Subject: [PATCH 2/6] correct naming of the tests --- test/test_k_medoids.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_k_medoids.py b/test/test_k_medoids.py index 2347042..4cc5ab0 100644 --- a/test/test_k_medoids.py +++ b/test/test_k_medoids.py @@ -8,7 +8,7 @@ -def test_hierarchical(): +def test_k_medoids(): raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','testdata.csv'), index_col = 0) @@ -35,4 +35,4 @@ def test_hierarchical(): if __name__ == "__main__": - test_hierarchical() \ No newline at end of file + test_k_medoids() \ No newline at end of file From c3cbb896486c743210c47feb0eeb8788d7659b69 Mon Sep 17 00:00:00 2001 From: l-kotzur Date: Fri, 17 Sep 2021 22:29:07 +0200 Subject: [PATCH 3/6] restructure exact k_medoids and isolote model initialization --- tsam/utils/k_medoids_exact.py | 119 ++++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 49 deletions(-) diff --git a/tsam/utils/k_medoids_exact.py b/tsam/utils/k_medoids_exact.py index a3d8c26..4ff5384 100644 --- a/tsam/utils/k_medoids_exact.py +++ b/tsam/utils/k_medoids_exact.py @@ -129,69 +129,90 @@ def _k_medoids_exact(self, distances, n_clusters): n_clusters : int, required Number of clusters. """ - # Create model - M=pyomo.ConcreteModel() + + # Create pyomo model + M = _setup_k_medoids(distances, n_clusters) - # get distance matrix - M.d = distances + # And solve + r_x, r_y, r_obj = _solve_given_pyomo_model(M, solver=self.solver) - # set number of clusters - M.no_k = n_clusters + return (r_y, r_x.T, r_obj) - # Distances is a symmetrical matrix, extract its length - length = distances.shape[0] +def _setup_k_medoids(distances, n_clusters): + '''Define the k-medoids model with pyomo. + In the spatial aggregation community, it is referred to as Hess Model for political districting + with an additional constraint of cluster-sizes/populations. + (W Hess, JB Weaver, HJ Siegfeldt, JN Whelan, and PA Zitlau. Nonpartisan political redistricting by computer. Operations Research, 13(6):998–1006, 1965.) + ''' + # Create model + M=pyomo.ConcreteModel() - # get indices - M.i = [j for j in range(length)] - M.j = [j for j in range(length)] + # get distance matrix + M.d = distances - # initialize vars - M.z = pyomo.Var(M.i, M.j, within=pyomo.Binary) - M.y = pyomo.Var(M.i, within=pyomo.Binary) + # set number of clusters + M.no_k = n_clusters + # Distances is a symmetrical matrix, extract its length + length = distances.shape[0] - # get objective - def objRule(M): - return sum(sum(M.d[i,j] * M.z[i,j] for j in M.j) for i in M.i) - M.obj=pyomo.Objective(rule=objRule) + # get indices + M.i = [j for j in range(length)] + M.j = [j for j in range(length)] - # s.t. - # Assign all candidates to clusters - def candToClusterRule(M,j): - return sum(M.z[i,j] for i in M.i) == 1 - M.candToClusterCon = pyomo.Constraint(M.j, rule=candToClusterRule) + # initialize vars + # Decision every candidate to every possible other candidate as cluster center + M.z = pyomo.Var(M.i, M.j, within=pyomo.Binary) - # no of clusters - def noClustersRule(M): - return sum(M.y[i] for i in M.i) == M.no_k - M.noClustersCon = pyomo.Constraint(rule=noClustersRule) - # cluster relation - def clusterRelationRule(M,i,j): - return M.z[i,j] <= M.y[i] - M.clusterRelationCon = pyomo.Constraint(M.i,M.j, rule=clusterRelationRule) + # get objective + # Minimize the distance of every candidate to the cluster center + def objRule(M): + return sum(sum(M.d[i,j] * M.z[i,j] for j in M.j) for i in M.i) + M.obj=pyomo.Objective(rule=objRule) + # s.t. + # Assign all candidates to one clusters + def candToClusterRule(M,j): + return sum(M.z[i,j] for i in M.i) == 1 + M.candToClusterCon = pyomo.Constraint(M.j, rule=candToClusterRule) - # create optimization problem - optprob = opt.SolverFactory(self.solver) - if self.solver =='gurobi': - optprob.set_options("Threads=" + str(self.threads) + - " TimeLimit=" + str(self.timelimit)) + # Predefine the number of clusters + def noClustersRule(M): + return sum(M.z[i,i] for i in M.i) == M.no_k + M.noClustersCon = pyomo.Constraint(rule=noClustersRule) - results = optprob.solve(M,tee=False) - # check that it does not fail - if self.solver=='gurobi' and results['Solver'][0]['Termination condition'].index == 11: - print(results['Solver'][0]['Termination message']) - return False - elif self.solver=='gurobi' and not results['Solver'][0]['Termination condition'].index in [2,7,8,9,10]: # optimal - raise ValueError(results['Solver'][0]['Termination message']) + # Describe the choice of a candidate to a cluster + def clusterRelationRule(M,i,j): + return M.z[i,j] <= M.z[i,i] + M.clusterRelationCon = pyomo.Constraint(M.i,M.j, rule=clusterRelationRule) + return M - # Get results - r_x = np.array([[round(M.z[i,j].value) for i in range(length)] - for j in range(length)]) +def _solve_given_pyomo_model(M, solver="glpk"): + """Solves a given pyomo model clustering model an returns the clusters - r_y = np.array([round(M.y[j].value) for j in range(length)]) + Args: + M (pyomo.ConcreteModel): Concrete model instance that gets solved. + solver (str, optional): solver, defines the solver for the pyomo model. Defaults to "glpk". - r_obj = pyomo.value(M.obj) + Raises: + ValueError: [description] + + Returns: + [type]: [description] + """ + # create optimization problem + optprob = opt.SolverFactory(solver) + results = optprob.solve(M,tee=False) + # check that it does not fail + + # Get results + r_x = np.array([[round(M.z[i,j].value) for i in M.i] + for j in M.j]) + + r_y = np.array([round(M.z[j,j].value) for j in M.j]) + + r_obj = pyomo.value(M.obj) + + return (r_x, r_y, r_obj) - return (r_y, r_x.T, r_obj) From 99fe11f0c7641f02e09a63e3f1e14c34c9fb25dc Mon Sep 17 00:00:00 2001 From: l-kotzur Date: Fri, 17 Sep 2021 22:29:36 +0200 Subject: [PATCH 4/6] add k-medoids-contiguity with dynamic cut generation --- requirements.txt | 3 +- test/test_k_medoids_contiguity.py | 77 +++++++++++++++++++ tsam/utils/k_medoids_contiguity.py | 118 +++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 test/test_k_medoids_contiguity.py create mode 100644 tsam/utils/k_medoids_contiguity.py diff --git a/requirements.txt b/requirements.txt index dbbe8eb..ccb1572 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ sklearn>=0.0 pandas>=0.18.1 numpy>=1.11.0 -pyomo>=5.3 \ No newline at end of file +pyomo>=5.3 +networkx \ No newline at end of file diff --git a/test/test_k_medoids_contiguity.py b/test/test_k_medoids_contiguity.py new file mode 100644 index 0000000..6eb6760 --- /dev/null +++ b/test/test_k_medoids_contiguity.py @@ -0,0 +1,77 @@ +import os +import time + +import pandas as pd +import numpy as np + +from tsam.utils.k_medoids_exact import KMedoids +from tsam.utils.k_medoids_contiguity import k_medoids_contiguity, _contiguity_to_graph + +# similarity between node 0, 1 and 2 +# 0===1 +# \ / +# 2 +DISTANCES = np.array([ [0,1,4], + [1,0,5], + [4,5,0],]) + +# add the adjacency between node 0, 1 and 2 +# 0 1 +# \ / +# 2 - +adjacency = np.array([ [1,0,1], + [0,1,1], + [1,1,1],]) + + +import networkx as nx + +def test_node_cuts(): + """Tests the cutting of the nodes in the adjacency network + """ + G = _contiguity_to_graph(adjacency) + cuts = list(nx.all_node_cuts(G)) + # only one cut expected + assert len(cuts)==1 + + + +def test_k_medoids_simple(): + ''' + Checks that k-medoid brings the results as expected without the adjacency constraints + ''' + + n_clusters = 2 + cluster_instance = KMedoids(n_clusters=n_clusters) + r_y, r_x, r_obj = cluster_instance._k_medoids_exact(DISTANCES, n_clusters) + + labels_raw = r_x.argmax(axis=0) + + # check that node 0 and node 1 are in the same cluster + assert labels_raw[0] == labels_raw[1] + # check that node 1 and node 2 are in a different cluster + assert labels_raw[1] != labels_raw[2] + # check that the error/objective is the distance between node 0 and 1 + assert r_obj == 1 + + + +def test_k_medoids_simple_contiguity(): + ''' + Checks if the adjacency constraint holds true + ''' + + n_clusters = 2 + r_y, r_x, r_obj = k_medoids_contiguity(DISTANCES, n_clusters, adjacency) + labels_raw = r_x.argmax(axis=0) + # check that node 0 and node 2 are in the same cluster + assert labels_raw[0] == labels_raw[2] + # check that node 0 and node 1 are in a different cluster + assert labels_raw[0] != labels_raw[1] + # check that the error/objective is the distance between node 0 and 2 + assert r_obj == 4 + + +if __name__ == "__main__": + test_k_medoids_simple() + test_k_medoids_simple_contiguity() \ No newline at end of file diff --git a/tsam/utils/k_medoids_contiguity.py b/tsam/utils/k_medoids_contiguity.py new file mode 100644 index 0000000..0fbbca2 --- /dev/null +++ b/tsam/utils/k_medoids_contiguity.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import time +import pyomo.environ as pyomo +import pyomo.opt as opt +import networkx as nx +from tsam.utils.k_medoids_exact import _setup_k_medoids, KMedoids, _solve_given_pyomo_model + + +#class KMedoids_contiguity(KMedoids): + + +def k_medoids_contiguity(distances, n_clusters, adjacency, max_iter=500, solver='glpk'): + '''Declares a k-medoids model and iteratively adds cutting planes to hold on adjacency/contiguity + + The algorithm is based on: Oehrlein and Hauner (2017): A cutting-plane method for adjacency-constrained spatial aggregation + ''' + # First transform the network to a networkx instance which is required for cut generation + G = _contiguity_to_graph(adjacency, distances=distances) + + # check if inputs are correct + np.size(distances) == np.size(adjacency) + + # and test for connectivity + if not nx.is_connected(G): + raise ValueError("The give adjacency matrix is not connected.") + + # Initial setup of k medoids + M = _setup_k_medoids(distances, n_clusters) + + M.adjacency = adjacency + + # Add constraintlist for the cuts later added + M.cuts = pyomo.ConstraintList() + + + # Loop over the relaxed k-medoids problem and add cuts until the problem fits + _all_cluster_connected=False + _iter = 0 + _cuts_added=[] + while not _all_cluster_connected and _iter < max_iter: + # first solve instance + t_presolve = time.time() + print(str(_iter) + " iteration: Solving instance") + r_x, r_y, obj = _solve_given_pyomo_model(M, solver="gurobi") + t_aftersolve = time.time() + print("Total distance: " + str(obj) + " with solving time: " + str(t_aftersolve - t_presolve)) + + candidates ,labels = np.where(r_x == 1) + # claim that the resulting clusters are connected + _all_cluster_connected=True + _new_cuts_added=[] + for label in np.unique(labels): + # extract the cluster + cluster = G.subgraph(np.where(labels == label)[0]) + # Identify if the cluster is contineous, instead of validating the constraints such as Validi and Oehrlein. + if not nx.is_connected(cluster): + _all_cluster_connected=False + # if not add contiguity constraints based on c-v (Oehrlein) o a-b (Validi) separators + for candidate in cluster.nodes: + # It is not clear in Validi and Oehrlein, if cuts between all cluster candidates or just the center and the candidates shall be made. The latter one does not converge for the test system wherefore the first one is chosen. + for node in cluster.nodes: + # different to Validi et al. (2021) and Oehrlein and Haunert (2017), check first and just add continuity constraints for the not connected candidates to increase performance + if nx.node_connectivity(cluster, node, candidate) == 0: + # check that the cut was not added so far for the cluster + if (label, candidate, node) not in _cuts_added: + # include the cut in the cut list + _new_cuts_added.append((label,candidate,node)) + # Cuts to Separators - Appendix A Minimum-weight vertex separators (Oehrlein and Haunert, 2017) + # Validi uses an own cut generator and Oehrlein falls back to a Java library, here we use simple max flow cutting + # TODO: Check performance for large networks + cut_set = nx.minimum_node_cut(G,node,candidate) + # (Eq. 13 - Oehrlein and Haunert, 2017) + M.cuts.add( sum( M.z[u, node] for u in cut_set) >= M.z[candidate, node] ) + else: + raise ValueError("Minimal cluster,candidate separation/minimum cut does not seem sufficient. Adding additional separators is could help.") + # Total cuts + _cuts_added.extend(_new_cuts_added) + _iter += 1 + t_afteradding = time.time() + + print(str(len(_new_cuts_added)) + " contiguity constraints/cuts added, adding to a total number of " + str(len(_cuts_added)) + " cuts within time: " + str(t_afteradding - t_aftersolve)) + + labels = np.where(r_x == 1) + + return (r_y, r_x.T, obj) + + + + +def _contiguity_to_graph(adjacency, distances=None): + """Transforms a adjacency matrix to a networkx.Graph + + Args: + adjacency (np.ndarray): 2-diimensional adjacency matrix + distances (np.ndarray, optional): If provided, delivers the distances between the nodes. Defaults to None. + + Returns: + nx.Graph: Graph with every index as node name. + """ + rows, cols = np.where(adjacency == 1) + G = nx.Graph() + if distances is None: + edges = zip(rows.tolist(), cols.tolist()) + G.add_edges_from(edges) + else: + normed_distances=distances/np.max(distances) + weights=1-normed_distances + if np.any(weights<0) or np.any(weights>1): + raise ValueError("Weight calculation went wrong.") + + edge_weights=weights[rows,cols] + edges = zip(rows.tolist(), cols.tolist(), edge_weights.tolist()) + G.add_weighted_edges_from(edges) + return G + + From 43d1074f8ba501571c41b10cabb7600e7766665f Mon Sep 17 00:00:00 2001 From: l-kotzur Date: Fri, 17 Sep 2021 22:34:11 +0200 Subject: [PATCH 5/6] include solver --- tsam/utils/k_medoids_contiguity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsam/utils/k_medoids_contiguity.py b/tsam/utils/k_medoids_contiguity.py index 0fbbca2..3d4b175 100644 --- a/tsam/utils/k_medoids_contiguity.py +++ b/tsam/utils/k_medoids_contiguity.py @@ -43,7 +43,7 @@ def k_medoids_contiguity(distances, n_clusters, adjacency, max_iter=500, solver= # first solve instance t_presolve = time.time() print(str(_iter) + " iteration: Solving instance") - r_x, r_y, obj = _solve_given_pyomo_model(M, solver="gurobi") + r_x, r_y, obj = _solve_given_pyomo_model(M, solver=solver) t_aftersolve = time.time() print("Total distance: " + str(obj) + " with solving time: " + str(t_aftersolve - t_presolve)) From 69eb9b24438f3f9aac32c92304b591efd960699a Mon Sep 17 00:00:00 2001 From: l-kotzur Date: Fri, 17 Sep 2021 22:43:06 +0200 Subject: [PATCH 6/6] make a new version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 21de831..02a2cb1 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setuptools.setup( name='tsam', - version='1.1.1', + version='1.1.2', author='Leander Kotzur, Maximilian Hoffmann', author_email='l.kotzur@fz-juelich.de, max.hoffmann@fz-juelich.de', description='Time series aggregation module (tsam) to create typical periods',