## Notebook contains:

* Compare bellman ford to BF moore
* attempts for distance cost from the actual path
* Add emergency points only where necessary (dilation)
* Agglomerative clustering
* Build graph from clustered pos2node
* Watershed transform

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import rasterio
import networkx as nx
import time
from graph_tool.all import *
import pickle

In [None]:
from power_planner.utils.utils import get_half_donut

In [None]:
path = "../../data/instance_belgium/tif_layers/Buildingftp.tif"
with rasterio.open(path, 'r') as ds:
    arr = ds.read()[0]

In [None]:
np.unique(arr)

In [None]:
IOPATH = "../data/data_dump_5.dat"
with open(IOPATH, "rb") as infile:
    (instance, instance_corr, start_inds, dest_inds) = pickle.load(infile)

In [None]:
plt.hist(instance.flatten())
plt.show()

In [None]:
emerg = np.zeros((20,20))
max_dist = 1.5
for row in np.arange(1,20,max_dist).astype(int):
    emerg[row, np.arange(1,20,max_dist).astype(int)] = 1

### slow version dist arr

In [None]:
start_inds = np.asarray([163, 24])
dest_inds = np.asarray([94, 240])

In [None]:
dist_arr = np.zeros(instance.shape)
norm = np.linalg.norm(dest_inds-start_inds)
for i in range(len(dist_arr)):
    for j in range(len(dist_arr[0])):
        p3 = np.asarray([i,j])
        dist_arr[i,j] = np.abs(np.cross(dest_inds-start_inds, start_inds-p3)) / norm

In [None]:
plt.imshow(dist_arr)
plt.show()

### fast version for dist arr, but rotation padding problem

In [None]:
from scipy.ndimage import rotate

In [None]:
size = np.linalg.norm(dest_inds-start_inds)
size = int(size)
arr = np.zeros((size,size))
arr[:,:size//2] = np.stack([np.arange(size//2) for _ in range(size)])
arr[:,size//2:] = np.vstack([np.arange(size//2,0,-1).tolist() for _ in range(size)])
ang = 90 + 90 * angle([0,1], dest_inds-start_inds)/np.pi
w,h = instance.shape
dist_arr = np.pad(rotate(arr, ang), ((0, w-size), (0,h-size)))

## construct constraint

In [None]:
start_inds = [163, 24]
dest_inds = [94, 240]
start_dest_inds = np.array([start_inds, dest_inds])
inter_line = start_dest_inds[0]-start_dest_inds[1]
longer = np.argmin(np.abs(inter_line))

padding = [0,0]
percent_padding = 0.25
padding[longer] = abs(int(percent_padding*inter_line[longer]))

start_x, start_y = np.min(start_dest_inds, axis=0) - np.asarray(padding)
end_x, end_y = np.max(start_dest_inds, axis=0) + np.asarray(padding)

corr = np.zeros(instance.shape)
corr[start_x:end_x, start_y:end_y] = 1

In [None]:
hard_cons = corr * instance_corr # hard_constraints

In [None]:
max_dist = 10
w_inds = np.arange(start_x, end_x, max_dist).astype(int)
h_inds = np.arange(start_y, end_y, max_dist).astype(int)

# max_cost = np.max(costs)
for row in w_inds:
    hard_cons[row, h_inds] = 1
    # costs[row, h_inds] = max_cost


In [None]:
padding

In [None]:
start_x, start_y

In [None]:
plt.figure(figsize=(20,10))
plt.imshow(hard_cons) # possible)
plt.show()

## Test algorithms on instance

In [None]:
INP = "../../outputs/path_70055.weighted.edgelist"

In [None]:
g = nx.read_edgelist(
                INP,
                nodetype=float,
                create_using=nx.DiGraph,
                data=(('weight', float), )
            )

In [None]:
vec = np.linalg.norm([-173, 539])

In [None]:
cutoff = 4*vec/15
cutoff

In [None]:
class SP():
    def __init__(self, graph):
        self.graph = graph

    def bellman_ford_my(self, source, target, cutoff):
        """
        Actual BF algorithm, not SPFA
        """
        pred = {}
        dist = {source: 0}

        inf = float('inf')

        for i in range(int(cutoff)):
            print(i)
            for (u, v, w_dict) in self.graph.edges(data=True):
                w = w_dict["weight"]
                if dist.get(u, inf) + w < dist.get(v, inf):
                    dist[v] = dist[u] + w
                    pred[v] = u
        path = [target]
        curr = target
        while curr != source:
            curr = pred[curr]
            path.append(curr)
        path.append(source)
        return list(reversed(path))

    def bellman_ford_nx(
        self,
        source,
        target,
        pred=None,
        paths=None,
        dist=None,
    ):
        """Relaxation loop for Bellman–Ford algorithm.
        This is an implementation of the SPFA variant.
        See https://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm
        Parameters
        SEE https://github.com/networkx/networkx/blob/02a1721276b3a84d3be8558e4
        79a9cb6b0715488/networkx/algorithms/shortest_paths/weighted.py#L1203
        """
        G = self.graph
        weight = lambda u, v, data: data.get("weight", 1)
 
        # vertices_path = self.bellman_ford(source, target, cutoff)
        paths = {source: [source]}
        # dist, it_list = self._bellman_ford(self.graph, [source], weight, cutoff, paths = paths, target=target)
        # vertices_path = paths[target]
        source = [source]
        
        for s in source:
            if s not in G:
                raise nx.NodeNotFound(f"Source {s} not in G")

        if pred is None:
            pred = {v: [] for v in source}

        if dist is None:
            dist = {v: 0 for v in source}

        G_succ = G.succ if G.is_directed() else G.adj
        print("directed?", G.is_directed())
        inf = float('inf')
        n = len(G)

        # count = {}
        q = deque(source)
        in_q = set(source)
        iteration = 0
        it_list = []
        while q:
            u = q.popleft()
            in_q.remove(u)
            
            it_ind = 0
            
            # print("new vertex", u)

            # Skip relaxations if any of the predecessors of u is in the queue.
            if all(pred_u not in in_q for pred_u in pred[u]):
                dist_u = dist[u]
                # print(dist_u)
                for v, e in G_succ[u].items():
                    dist_v = dist_u + weight(u, v, e)  # TODO:replace function
                    # print(v)
                    if dist_v < dist.get(v, inf):
                        
                        if v not in in_q:
                            # print("update")
                            q.append(v)
                            in_q.add(v)
                            # count_v = count.get(v, 0) + 1
                            # if count_v == n:
                            #     raise nx.NetworkXUnbounded(
                            #         "Negative cost cycle detected."
                            #     )
                            # count[v] = count_v
                        dist[v] = dist_v
                        pred[v] = [u]

                    elif dist.get(v) is not None and dist_v == dist.get(v):
                        pred[v].append(u)
                    
                    it_ind += 1
            else:
                # pass
                print("err")
            
            it_list.append(it_ind)
            iteration += 1

            # TODO
            # if u == target:
            #     print("early stopping")
            #     break
            # if iteration > cutoff and dist.get(target, inf) < inf:
                # print("iteration more than cutoff")
                # if dist.get(target, inf) < inf:
                # print("early stopping")
                # break
        print("number of iterations:", iteration)
        if paths is not None:
            dsts = [target] if target is not None else pred
            for dst in dsts:

                path = [dst]
                cur = dst

                while pred[cur]:
                    cur = pred[cur][0]
                    path.append(cur)

                path.reverse()
                paths[dst] = path

        return paths[target], it_list # dist, it_list
from collections import deque


In [None]:
g_undir = g.to_undirected()

In [None]:
print("Start shortest path")
sp = SP(g)
out = sp.bellman_ford_nx(43104, 8447) # , it_list

## Test results:

* graph with 5.6 mio edges, 50 000 nodes
* directed case: 50757 terations, so slightly more than the nodes --> doch cycles --> maybe because of 180 degrees thing? (15 sec)
* undirected case: 59042 iterations why not more? though more iterations in each iteration probably, takes longer (26 sec)


* my algorithm: 10 minutes! (for 150 iterations) --> basically 150 times the runtime of the other ones
* with directed edges, other algorithm: 5 837 055, but just twice the time 12 753 072 if undirected


* because of Skip relaxations if any of the predecessors of u is in the queue?

sanity check was correct: inner iteration (over neighbors) is twice as much for undirected as for directed

In [None]:
len(g.edges)

In [None]:
len(g.nodes)

### total iterations for directed / undirected:

In [None]:
print(50757*115, 59042*216)

### Sanity check

In [None]:
np.mean(it_list)

In [None]:
plt.hist(it_list)
plt.show()

In [None]:
np.mean(it_list)

In [None]:
plt.hist(it_list)
plt.show()

## Random graph tests

In [None]:
comp = nx.complete_graph(100)

In [None]:
len(comp.edges())

In [None]:
sp = SP(comp)
out, its = sp.bellman_ford_nx(0, 5) # , it_list

In [None]:
sum(its)

In [None]:
g_new = nx.DiGraph()
e = []
# for i in range(20):
#    e.append([i,i+1,{"weight":i}])
for i in range(1,4):
    e.append([0,i,{"weight":i}])
for i in range(1,4):
    e.append([i,5, {"weight":i}])
g_new.add_edges_from(e)

In [None]:
sp = SP(g_new)
out = sp.get_shortest_path_nx(0, 5) # , it_list

In [None]:
nx.write_weighted_edgelist(
                g_new,'test.weighted.edgelist'
            )

In [None]:
in_g_new = nx.read_edgelist(
                'test.weighted.edgelist',
                nodetype=int,
    create_using=nx.DiGraph,
                data=(('weight', float), )
            )

In [None]:
in_g_new_dir = nx.DiGraph(in_g_new)

In [None]:
in_g_new.out_edges()

In [None]:
g_new.edges()

In [None]:
len(comp.nodes()), len(comp.edges())

In [None]:
g1 = Graph()
g1.add_vertex(20)
g1.add_edge_list([[i,i+1] for i in range(19)])

In [None]:
len(list(g1.vertices())), len(list(g1.edges()))

In [None]:
len(list(comp.nodes())), len(list(comp.edges()))

## subplots for paths

In [None]:
a = np.zeros((500,1000))
a[200:260, 100:500] = np.arange(24000).reshape(60,400)
a = np.swapaxes(a,1,0)

In [None]:
b = a[:, np.any(a>0, axis=0)]
b = b[np.any(b>0, axis=1), :]

In [None]:
b = np.arange(20).reshape(2,10)
b.shape

In [None]:
x

In [None]:
plt.figure(figsize=(25,15))
for i in range(3):
    plt.subplot(1,3,i+1)
    plt.imshow(b)
    plt.axis('scaled')
plt.tight_layout()
plt.show()

## Summarize points

In [None]:
inst = instance_corr * instance
test = inst[:, np.any(inst > 0, axis=0)]
test = test[np.any(test > 0, axis=1), :]

In [None]:
plt.imshow(test)
plt.show()

In [None]:
# can put same node in several places of pos2node
# summarize pixels: with similar value?
# from image compression
# put gaussian smoothing on top? --> then take similar ones together?
# problem clustering: what about different resistance classes
# distance constraint? take highest or lowest distance to cluster
# different resistance classes = colour channels
# graph representation: make edges strong between similar values, then find cuts
# take position and costs together and do kmeans clustering

In [None]:
from PIL import Image

In [None]:
def reduce(img, scale_factor):
    x_len_new = img.shape[0] // scale_factor
    y_len_new = img.shape[1] // scale_factor
    new_img = np.zeros((x_len_new, y_len_new))
    for i in range(x_len_new):
        for j in range(y_len_new):
            patch = img[i * scale_factor:(i + 1) *
                        scale_factor, j *
                        scale_factor:(j + 1) * scale_factor]
            new_img[i, j] = np.mean(patch)
    return np.swapaxes(new_img, 1, 0)

inst = reduce(test, 20)
# inst = (inst>0.4).astype(int)

In [None]:
from scipy.ndimage.filters import gaussian_filter

import matplotlib.pyplot as plt

from sklearn.feature_extraction.image import grid_to_graph
from sklearn.cluster import AgglomerativeClustering

In [None]:
smoothened_coins = inst # gaussian_filter(inst, sigma=1)

X = np.reshape(smoothened_coins, (-1, 1))

# #############################################################################
# Define the structure A of the data. Pixels connected to their neighbors.
connectivity = grid_to_graph(*smoothened_coins.shape)

# #############################################################################
# Compute clustering
print("Compute structured hierarchical clustering...")
st = time.time()
n_clusters = 20  # number of regions
ward = AgglomerativeClustering(n_clusters=None, linkage='complete', affinity ="l1", distance_threshold=0.2,
                               connectivity=connectivity) # n_clusters=n_clusters,
ward.fit(X)
label = np.reshape(ward.labels_, smoothened_coins.shape)

In [None]:
connectivity

In [None]:
plt.figure(figsize=(15, 5))
plt.subplot(1,2,1)
plt.imshow(inst)
plt.subplot(1,2,2)
plt.imshow(label, interpolation='nearest', cmap=plt.cm.nipy_spectral) # , cmap=plt.cm.gray)
# for l in range(n_clusters):
#     plt.contour(label == l,
#                 colors=[plt.cm.nipy_spectral(l / float(n_clusters)), ])
plt.colorbar()
plt.show()



In [None]:
def clustering(inst, n_clusters, plot=True):

    X = np.reshape(inst, (-1, 1))

    # Define the structure A of the data. Pixels connected to their neighbors.
    connectivity = grid_to_graph(*inst.shape)

    # print("Compute structured hierarchical clustering...")
    st = time.time()
    ward = AgglomerativeClustering(n_clusters=n_clusters, linkage='complete', affinity ="euclidean", # l1",
                                   connectivity=connectivity)
    ward.fit(X)
    label = np.reshape(ward.labels_, inst.shape)
    # print("time passed", time.time()-st)
    
    if plot:
        plt.figure(figsize=(15, 5))
        plt.subplot(1,2,1)
        plt.imshow(inst)
        plt.subplot(1,2,2)
        plt.imshow(label, interpolation='nearest', cmap=plt.cm.nipy_spectral)
        plt.colorbar()
        plt.show()
    return label

In [None]:
x,y = instance_corr.shape

In [None]:
# unterteile bild in several parts, compute clusters for each of them

In [None]:
pos2node  = label # just saying for each one where we have the corresponding
new cost instance = # mean of patch?

In [None]:
for i in range(x):
    for j in range(y):
        if instance_corr[i,j] == instance_corr[i,j]

In [None]:
import math
import numpy

def split_to_shape(a, chunk_shape, start_axis=0):
    if len(chunk_shape) != len(a.shape):
        raise ValueError('chunk length does not match array number of axes')

    if start_axis == len(a.shape):
        return a

    num_sections = math.ceil(a.shape[start_axis] / chunk_shape[start_axis])
    # print(num_sections)
    split = numpy.array_split(a, num_sections, axis=start_axis)
    return [split_to_shape(split_a, chunk_shape, start_axis + 1) for split_a in split]

full_split = split_to_shape(test, (3,3))
print({i2.shape for i in full_split for i2 in i})

In [None]:
n_vals = len(np.unique(test))
compress = 2

out = np.zeros(test.shape)
labs_start = 0
x_done = 0
for row_splits in full_split:
    y_done = 0
    for col_splits in row_splits:
        if np.any(col_splits):
            x,y = col_splits.shape
            # distinct_vals = len(np.unique(col_splits))
            # out_labs = int(np.ceil(distinct_vals/compress))
            # print("out labs", out_labs)
            out_labs = 2
            labs = clustering(col_splits, out_labs, plot=False)
            out[x_done:x_done+x, y_done:y_done+y] = labs + labs_start
            y_done += y
            labs_start += np.max(labs)+1
    x_done+= x
    # labs = clustering(p)

In [None]:
# plt.imshow(test)
# plt.colorbar()
# plt.show()
plt.figure(figsize=(20,10))
plt.imshow(out[99:102, 63:66])
plt.colorbar()
plt.show()

In [None]:
from power_planner.utils.utils import *

In [None]:
shifts = get_half_donut(3,5,[1,1])
# pos2node = out

In [None]:
shifts[0]

In [None]:
hard_constraints = np.zeros(out.shape)
hard_constraints[5:-5, 5:-5] = 1

In [None]:
prev_vertices = out[hard_constraints>0]

In [None]:
prev_vertices[:10]

In [None]:
costs_shifted = shift_surface(test, shifts[0])

In [None]:
weights = (costs_shifted + test)/2

In [None]:
nodes_shifted = shift_surface(pos2node.copy(), shifts[0])[hard_constraints>0]

In [None]:
print(len(prev_vertices), len(nodes_shifted))

In [None]:
weights_list = weights[hard_constraints>0]

In [None]:
inds_arr = np.asarray([prev_vertices, nodes_shifted, weights_list])
inds_arr.shape
# inds_weights = np.concatenate((inds_arr, weights_arr), axis=0)

In [None]:
import pandas as pd

In [None]:
# remove duplicates
df = pd.DataFrame(np.swapaxes(inds_arr, 1, 0), columns=["1", "2", "3"])

In [None]:
# df.drop_duplicates()
df = df[df["3"]>0] # cost greater zero --> cannot be both forbidden nodes

In [None]:
df = df[df["1"]!=df["2"]] # not from one node to itself

In [None]:
a, b = np.where(out==195)

In [None]:
df = df.groupby(["1","2"], as_index=False).agg({"3":"mean"})

In [None]:
grouped = np.array(df)

In [None]:
125*243

# Build graph

In [None]:
from power_planner.utils.utils import get_half_donut, shift_surface
import pandas as pd

In [None]:
pos2node = out.copy()
# instance = test.copy()
hard_constraints = (test>0).astype(int)
# instance[instance==0] = 1
shifts = get_half_donut(3,5,[1,1])
max_cost = np.max(instance)
print(max_cost)
d = 2
tic = time.time()
emerg_counter = np.max(pos2node)
print(emerg_counter)
for i in range(d, len(hard_constraints)-d):
    for j in range(d, len(hard_constraints[0])-d):
        if not np.any(hard_constraints[i - d:i + d, j - d:j + d]):
            hard_constraints[i, j] = 1
            test[i, j] = max_cost
            pos2node[i,j] = emerg_counter
            emerg_counter+=1

In [None]:
plt.imshow(pos2node)
plt.show()

In [None]:
g = Graph(directed=True)

In [None]:
start_vertex_ind = pos2node[100,25]
dest_vertex_ind = pos2node[20,240]

In [None]:
n_nodes

In [None]:
n_nodes = len(np.unique(pos2node))
_ = g.add_vertex(n_nodes)

In [None]:
weight = g.new_edge_property("float")

In [None]:
inds = pos2node>0
x_inds, y_inds = np.where(inds)
print(np.mean(x_inds), np.mean(y_inds))

np.mean(np.vstack([x_inds, y_inds]), axis = 1)

In [None]:
prev_vertices = pos2node[hard_constraints>0]
for shift in shifts[:1]:
    costs_shifted = shift_surface(test, shift)
    weights = (costs_shifted + test)/2
    nodes_shifted = shift_surface(pos2node.copy(), shift)[hard_constraints>0]
    weights_list = weights[hard_constraints>0]
    inds_arr = np.asarray([prev_vertices, nodes_shifted, weights_list])
    # print(inds_arr.shape)
    df = pd.DataFrame(np.swapaxes(inds_arr, 1, 0), columns=["1", "2", "3"])
    print(shift)
    print(df.head(20))
    df = df[df["3"]>0]
    df = df[df["2"]>0]
    df = df[df["1"]>0]
    df = df[df["1"]!=df["2"]]
    df = df.groupby(["1","2"], as_index=False).agg({"3":"sum"})
    # problem: only summing up the ones going to the same pair --> 163 to 195 can have low weights, but there can be 1000 points in cluster 163
    edges = np.array(df)
    g.add_edge_list(edges, eprops=[weight])

In [None]:
vertices_path, _ = shortest_path(
                g,
                g.vertex(start_vertex_ind),
                g.vertex(dest_vertex_ind),
                weights=weight,
                negative_weights=True
            )

In [None]:
path_map = np.zeros(out.shape)
col = 1
for v in vertices_path:
    v_ind = g.vertex_index[v]
    inds = np.where(pos2node==v_ind)
    min_val = 1
    for (i,j) in zip(inds[0], inds[1]):
        # print(test[i,j])
        if test[i,j] < min_val:
            min_val = test[i,j]
            min_ind_x = i
            min_ind_y = j
    # print(v_ind)
    path_map[min_ind_x, min_ind_y]= col
    col += 1

In [None]:
plt.figure(figsize=(20,10))
plt.imshow(path_map)
plt.show()

In [None]:
test[:10,:10]

### Testing

In [None]:
df.head()

In [None]:
df[df["1"]==143]

In [None]:
t = np.where(out==143)

In [None]:
new_inds = (t[0]-3, t[1]+3)

In [None]:
for (i,j) in zip(new_inds[0], new_inds[1]):
    o = out[i,j]
    if o==195:
        print(weights[i,j])

## fill random points for empty areas

In [None]:
example = (test<1).astype(int).copy()

In [None]:
# quick fix: only better emergency points for parcels:
plt.figure(figsize=(20,10))
plt.imshow(example)
plt.show()

In [None]:
# im verhältnis zu number of pixels in this part
dist = 3
x,y = example.shape
for i in range(dist,x-dist):
    for j in range(dist,y-dist):
        if ex2[i,j]:
            if not np.any(example[i-dist:i+dist, j-dist:j+dist]):
                example[i,j] = 1

In [None]:
from scipy.ndimage.morphology import binary_dilation

In [None]:
ex2 = binary_dilation(example, iterations=5)

## watershed segmentation

In [None]:
from scipy import ndimage as ndi

from skimage.segmentation import watershed
from skimage.feature import peak_local_max
from skimage import data, util, filters, color

In [None]:
def watershed_transform(img, n_clusters, compact=0.01):
    greater_zero = (img>0).astype(int)
    edges = filters.sobel(img)
    
    ratio_zero = np.sum(greater_zero)/len(greater_zero.flatten())
    n_seeds = n_clusters/ratio_zero
    
    grid = util.regular_grid(img.shape, n_points=n_seeds)
    seeds = np.zeros(img.shape, dtype=int)
    seeds[grid] = np.arange(seeds[grid].size).reshape(seeds[grid].shape) + 1
    seeds = seeds*greater_zero
    print("number seeds then", np.sum(seeds>0))
    
    w1 = watershed(edges, seeds, compactness=compact)
    
    w1_g_zero = (w1+1)*greater_zero
    labels = np.unique(w1_g_zero)
    transformed = np.zeros(w1.shape).astype(int)
    nr_members = np.zeros(w1.shape).astype(int)
    for i,lab in enumerate(labels):
        inds = w1_g_zero==lab
        transformed[inds] = i
        nr_members[inds] = np.sum(inds)
    
    return transformed, nr_members

In [None]:
def transform_cols(arr):
    uni = np.unique(arr)
    transformed = np.zeros(int(np.max(uni)+1))
    for i, u in enumerate(uni):
        transformed[int(u)] = i
    cols = np.random.rand(len(uni),3)
    x,y = arr.shape
    new = np.zeros((x,y,3))
    for i in range(x):
        for j in range(y):
            new[i,j] = cols[int(transformed[int(arr[i,j])])]
    return new   


In [None]:
inst = instance_corr * instance
test = inst[:, np.any(inst > 0, axis=0)]
test = test[np.any(test > 0, axis=1), :]

In [None]:
# take weighted sum of different cost surfaces --> this is what needs to be similar in the end?
out, members = watershed_transform(test,5000)
cols = transform_cols(out)

In [None]:
plt.imshow(out)
plt.show()

In [None]:
plt.imshow(cols[90:100, 60:70])
plt.show()
# print(w0[90:100, 60:70])
plt.imshow(test[90:100, 60:70])
plt.show()

In [None]:
np.max(out), len(np.unique(out))

# Compare standard deviation

In [None]:
uni, cou = np.unique(members, return_counts=True)

In [None]:
plt.plot(cou[:-1])

In [None]:
print("no nodes now", len(np.unique(out)), "previously <0", np.sum(test>0))

In [None]:
def reduce(img, scale_factor):
    x_len_new = img.shape[0] // scale_factor
    y_len_new = img.shape[1] // scale_factor
    new_img = np.zeros((x_len_new, y_len_new))
    std_img = np.zeros((x_len_new, y_len_new))
    for i in range(x_len_new):
        for j in range(y_len_new):
            patch = img[i * scale_factor:(i + 1) *
                        scale_factor, j *
                        scale_factor:(j + 1) * scale_factor]
            new_img[i, j] = np.mean(patch)
            std_img[i,j] = np.std(patch)
    return np.swapaxes(new_img, 1, 0), np.swapaxes(std_img, 1, 0)


In [None]:
red, stds = reduce(test, 2)

In [None]:
print("actual reduce function number >0:", np.sum(red>0))

In [None]:
members[members>1000] = 0
print(np.mean(members[members>0]))

In [None]:
np.sum(members)/5.5685

In [None]:
stds_out = []
for i, val in enumerate(np.unique(out)):
    if val==0:
        continue
    vals = test[out==val]
    stds_out.append(np.std(vals))

In [None]:
print(len(stds_out))

In [None]:
actual_stds = stds[red>0]

In [None]:
np.mean(actual_stds)

In [None]:
np.mean(stds_out)

In [None]:
plt.hist(actual_stds, bins = np.arange(0,0.25,0.01))
plt.show()

In [None]:
plt.hist(stds_out, bins = np.arange(0,0.25,0.01))
plt.show()

# Time of watershed

In [None]:
from power_planner.utils.utils import normalize

In [None]:
path_layers = "tif_layers"
cost_arr = []
for f in os.listdir(path_layers):
    if f[-3:]=="tif":
        with rasterio.open(path_layers+"/"+f, 'r') as ds:
            arr = ds.read()
        if arr.shape==(1, 1313, 1511):
            cost_arr.append(normalize(arr[0]))
cost_arr = np.array(cost_arr)

In [None]:
surface = normalize(np.sum(cost_arr, axis=0))

### Compute different scales

In [None]:
scale = 2

In [None]:
red5, _ = reduce(surface, scale)

In [None]:
len(np.unique(red5)) # much more because taken mean when reducing

In [None]:
plt.imshow(surface)
plt.show()

In [None]:
tic = time.time()
out, members = watershed_transform(surface, 250000, compact=0.01)
print("Time for scale", scale,":", round(time.time() - tic,3), "(shape:", red5.shape, ")")

### Results:

Seeds: 5000

* Time for scale 5 : 1.185 (shape: (302, 262) )
* Time for scale 4 : 1.165  )
* Time for scale 2 : 6.658 (shape: (755, 656) )
* Time for full : 30.538 (shape: (1313, 1511) )

Seeds: 10000

Time for scale 2 : 14.146 (shape: (755, 656) )

Seeds: 20000

Time for scale 2 : 25.454 (shape: (755, 656) )  (ACTUALLY: would be 500 000 vertices)

Seeds: 31000

Time for scale 2 : 41.408 (shape: (755, 656) )

Seeds: 40607

Time for scale 1 : 207.016 (shape: (755, 656) ) --> already 3 min (ACTUALLY 1.9 vertices) 

Seeds 220 000 --> 1/8 th of all vertices are kept:

Time for scale 1: 1138 sec = 19 min

--> Seems to scale linear with number of seeds

In [None]:
surface.shape

In [None]:
1313 * 1511

In [None]:
print(np.sum(test>0)/len(test.flatten()))

In [None]:
302*262

In [None]:
n_seeds = 3000
grid = util.regular_grid(test.shape, n_points=n_seeds)

In [None]:
seeds = np.zeros(test.shape, dtype=int)
seeds[grid] = np.arange(seeds[grid].size
                                ).reshape(seeds[grid].shape) + 1
plt.figure(figsize=(20,10))
plt.imshow(new)
plt.show()

In [None]:
cluster_scale = 5
lab = 0
x_len, y_len = test.shape
new = np.zeros(test.shape)
for i in np.arange(0,x_len, cluster_scale):
    for j in np.arange(0,y_len, cluster_scale):
        if test[i,j]:
            new[i,j] = lab
            lab += 1