In [1]:
from microagg1d.wilber import *
from microagg1d.wilber import _wilber

In [2]:
import numpy as np

In [3]:
from microagg1d.smawk_iter import _smawk_iter

In [4]:
@jitclass([('cumsum', float64[:]), ('cumsum2', float64[:]), ('k', int64), ("F_vals", float64[:]), ("G", float64[:,:]),("SMALL_VAL", float64), ("LARGE_VAL", float64)])
class MicroaggWilberCalculator_edu:
    """An educational variant of the microagg calculator which keeps track of all the states visited in matrix G"""
    def __init__(self, cumsum, cumsum2, k, F_vals):
        self.cumsum = cumsum
        self.cumsum2 = cumsum2
        self.k = k
        self.F_vals = F_vals
        n = len(cumsum) - 1
        self.G = -np.ones((n, n))
        self.SMALL_VAL = calc_objective_upper_inclusive(cumsum, cumsum2, 0, n-1) + 1
        self.LARGE_VAL = self.SMALL_VAL * (1 + n)

    def calc(self, j, i): # i <-> j interchanged is not a bug!
        print(j, i)
        if j < i:
            self.G[i,j]=np.inf
            #print(i, j, np.inf)
            return np.inf

        if not (j+1 - i >= self.k):
            print("A", i, j, self.LARGE_VAL + self.SMALL_VAL*i)
            self.G[i,j] = self.LARGE_VAL +  self.SMALL_VAL*i
            return self.LARGE_VAL + self.SMALL_VAL*i
        if not (j+1 - i <= 2 * self.k - 1):
            print("B", i, j)
            self.G[i,j] = self.LARGE_VAL - self.SMALL_VAL*i
            return self.LARGE_VAL - self.SMALL_VAL*i
        #if self.F_vals[i] >= self.SMALL_VAL: # bogus value
        #    #print("C", i, j, self.LARGE_VAL + self.SMALL_VAL*i)
        #    if j > i:
        #        self.G[i,j] = self.LARGE_VAL +  self.SMALL_VAL*i
        #    return self.LARGE_VAL + self.SMALL_VAL*i
        #print(i, j, self.calculator.calc(i, j) + self.F_vals[i])
        self.G[i,j] = calc_objective_upper_inclusive(self.cumsum, self.cumsum2, i, j) + self.F_vals[i]
        #print(" ", i, j, calc_objective_1(self.cumsum, self.cumsum2, i, j) + self.F_vals[i])
        return calc_objective_upper_inclusive(self.cumsum, self.cumsum2, i, j) + self.F_vals[i]

In [5]:
from microagg1d.wilber import _calc_objective
@jitclass([('cumsum', float64[:,:]), ('cumsum2', float64[:,:]), ('k', int64), ("F_vals", float64[:]), ("SMALL_VAL", float64), ("LARGE_VAL", float64), ("cell_size", int64), ("G", float64[:,:])])
class StableMicroaggWilberCalculator_edu:
    """A stable variant of the microagg calculator for wilbers method"""
    def __init__(self, x, k, F_vals, cell_size):
        self.cumsum = calc_cumsum_cell(x, cell_size)
        x_square = np.square(x)
        self.cumsum2 = calc_cumsum_cell(x_square, cell_size)
        self.k = k
        self.F_vals = F_vals
        n = len(x)
        self.SMALL_VAL = _calc_objective(np.sum(x), np.sum(x_square), n)
        self.LARGE_VAL = self.SMALL_VAL * (1 + n)
        self.cell_size = cell_size
        self.G = -np.ones((n, n))

    def calc(self, j, i): # i <-> j interchanged is not a bug!
        assert i < self.G.shape[0]
        assert i>= 0
        assert j < self.G.shape[0]
        assert j>= 0
        if j < i:
            self.G[i,j]=np.inf
            return np.inf

        if not (j+1 - i >= self.k):
            self.G[i,j] = self.LARGE_VAL +  self.SMALL_VAL*i
            print(i,j, self.G[i,j])
            return self.LARGE_VAL + self.SMALL_VAL*i
        if not (j+1 - i <= 2 * self.k - 1):
            self.G[i,j] = self.LARGE_VAL - self.SMALL_VAL*i
            print(i,j, self.G[i,j])
            return self.LARGE_VAL - self.SMALL_VAL*i
        self.G[i,j] = calc_objective_cell(self.cumsum, self.cumsum2, self.cell_size, i, j) + self.F_vals[i]
        print(i,j, self.G[i,j])
        return calc_objective_cell(self.cumsum, self.cumsum2, self.cell_size, i, j) + self.F_vals[i]

In [6]:
#@njit()
def __galil_park(n, wil_calculator):
    """Solves Univariate Microaggregation problem in O(n)
    this is an implementation of the proposed algorithm
    from "The concave least weight subsequence problem revisited" by Robert Wilber 1987
    """
    F = -np.ones(n, dtype=np.int32)
    F_vals = wil_calculator.F_vals
    E = -np.ones(n, dtype=np.int32)
    E_vals = -np.ones(n+1, dtype=np.float64)
    E_vals[0]=0
    c = 1 # columns [0,c] have correct F_vals
    r = 0 # rows [r,c] may contain column minima


    while c < n:
        p = min(2*c-r+1, n)
        print("p", p)
        print("F_input", r, c, c, p+1)
        _smawk_iter(np.arange(c, p), np.arange(r, c+1), wil_calculator, F)
        #print("F", F)
        for j in range(c, p):
            F_vals[j+1] = wil_calculator.calc(j, F[j])
        
        E_vals[c] = F_vals[c]
        print(c)
        print("F", F)
        print("F_vals", F_vals)
        print("E_vals", E_vals)

        #print("H", c+1, p, c+1,p)
        #_smawk_iter(np.arange(c+1, p), np.arange(c+1, p), wil_calculator, H)
        #for j in range(c+1, p):
        #    H_vals[j+1] = wil_calculator.calc(j, H[j])

        j=c+1
        for j in range(c+1, p):
            if wil_calculator.calc(j-1, j) < F_vals[j]:
                E_vals[j] = wil_calculator.calc(j-1, j)
                # Need to assign actual column of E
                break
            else:
                E[j] = F[j]
                if wil_calculator.calc(j-1, p) < F[p]:
                    N[j+1:p]=F[j+1:p]
                    print("Assigning", F[j+1:p])
                    break
                    
        if j<=p+1: # we were right all along
            # F_vals up to p (inclusive) are correct
            c = j+1
            r = j-1
        else: # our guessing strategy failed
            #F_vals[j0] = H_vals[j0]
            c = p+1
            r = max(r, F[p])
        print()
            

    return F

In [7]:
import time

In [72]:
#@njit()
def __galil_park2(n, wil_calculator):
    """Solves Univariate Microaggregation problem in O(n)
    this is an implementation of the proposed algorithm
    from "The concave least weight subsequence problem revisited" by Robert Wilber 1987
    """
    F = np.empty(n, dtype=np.int32)
    F_vals = wil_calculator.F_vals
    H = np.empty(n, dtype=np.int32)
    H_vals = np.empty(n+1, dtype=np.float64)
    N = np.empty(n, dtype=np.int32)
    N_vals =  np.inf * np.ones(n+1, dtype=np.float64)
    F_vals[0]=0
    c = 0 # columns [0,c] have correct F_vals
    r = 0 # rows [r,c] may contain column minima


    while c < n:
        p = min(2*c-r+1, n)
        assert r>=0
        assert c >=r
        assert p>c
        print("F_input r c+1, c, p", r, c+1, c, p)
        _smawk_iter(np.arange(c, p), np.arange(r, c+1), wil_calculator, F)
        #print("F", F)
        for j in range(c, p):
            val = wil_calculator.calc(j, F[j])
            if val < N_vals[j+1]: 
                F_vals[j+1] = val
            else:
                F_vals[j+1] = N_vals[j+1]
                F[j] = N[j]

        #print("H", c+1, p, c+1,p)
        #_smawk_iter(np.arange(c+1, p), np.arange(c+1, p), wil_calculator, H)
        for j in range(c+1, p+1):
            #print("H")
            wil_calculator.G[:F[j-1],j-1:]=-2
            #wil_calculator.G[:F[j-1]-1,j-1]=-2
            wil_calculator.G[F[j-1]+1:c+1,j-1]=-2
            #wil_calculator.G[j,j]=-1
            pass
        
        with np.printoptions(linewidth=200, precision=3, suppress=True):
            print(wil_calculator.G)
        #wil_calculator.G[:F[c],c:]=-2
            #wil_calculator.G[:F[j-1]-1,j-1]=-2
        #wil_calculator.G[F[c]+1:c+1,j-1]=-2
        with np.printoptions(linewidth=200, precision=3, suppress=True):
            print(wil_calculator.G)
        j0=p+1
        for j in range(c+2, p+1):
            print("loop", j, wil_calculator.F_vals[j])
            #print(F)
            #wil_calculator.G[:F[j-1],j-1:]=-2
            #wil_calculator.G[:F[j-1]-1,j-1]=-2
            #wil_calculator.G[F[j-1]+1:c+1,j-1]=-2
            with np.printoptions(linewidth=200, precision=3, suppress=True):
                print(wil_calculator.G)
            print("first", j, j, F_vals[j], wil_calculator.calc(j-1, j-1))
            if wil_calculator.calc(j-1, j-1) < F_vals[j]:
                # the H value considered was smaller, may not continue
                print("Abreak")
                
                F[j-1] = j-1
                wil_calculator.G[0:F[j-1],j-1:]=-2
                j0 = j
                F_vals[j0] = wil_calculator.calc(j-1, j-1)
                r = c
                assert r>0
                c = j0
                break
            
            #print(j, j+1, wil_calculator.G[j,j])
            wil_calculator.G[j-1,j-1]=-2
            
            with np.printoptions(linewidth=200, precision=3, suppress=True):
                print(wil_calculator.G)
            #val = wil_calculator.calc(j, p-1)
            
            print("second", j-1, p-1, F_vals[p], wil_calculator.calc(p-1, j-1))
            with np.printoptions(linewidth=200, precision=3, suppress=True):
                print(wil_calculator.G)
            if F_vals[p] <= wil_calculator.calc(p-1, j-1):
                print("A")
                wil_calculator.G[j-1,j-1:p]=-2

                # we did just eliminate row j entries c:p
                # => may continue as usual
                pass
            else:
                print("B")
                # need to break because it is not guaranteed that the following 
                # F values, (F[j+1:]) are correct as well, they might lie in row j
                wil_calculator.G[:j-1,p-1]=-2
                j0=j
                #F[j-1]=j
                #F_vals[j-1] = wil_calculator.calc(p-1, j-1)
                #F_vals[j0] = H_vals[j0]
                N[j-1:p+1] = F[j-1:p+1]
                N_vals[j-1:p] = F_vals[j-1:p]
                r = c+1
                assert r>0
                c = j
                break
            with np.printoptions(linewidth=200, precision=3, suppress=True):
                print(wil_calculator.G)
            #elif wil_calculator.calc(p, j) < F_vals[p]:
            #    print("break2")
            #    j0 = j
            #    break
        with np.printoptions(linewidth=200, precision=3, suppress=True):
            print(wil_calculator.G)
        print("j0 p", j0, p)
        if j0==p+1: # we were right all along


            # F_vals up to p (inclusive) are correct
            r = max(r, F[p-1])
            c = p
        else: # our guessing strategy failed
            print("else")
            #F_vals[j0] = H_vals[j0]
            #r = c
            assert r>0
            #c = j0
            
        print("rc", r, c)
        time.sleep(0.3)
    return F

In [73]:
#@njit([(float64[:], int64, int64)])
def _galil_park(v, k, stable=1):
    n = len(v)
    if stable==1:
        wil_calculator = StableMicroaggWilberCalculator_edu(v, k, -np.ones(n+1, dtype=np.float64), 2*k)

    elif stable==0:
        cumsum = calc_cumsum(v)
        cumsum2 = calc_cumsum(np.square(v))
        wil_calculator = MicroaggWilberCalculator_edu(cumsum, cumsum2, k, -np.ones(n+1, dtype=np.float64))
    else:
        raise NotImplementedError("Only stable in (0,1) supported")
    out = __galil_park2(n, wil_calculator)
    with np.printoptions(linewidth=200, precision=3, suppress=True):
        print(wil_calculator.G)
    print(out)
    return relabel_clusters_plus_one(out)

In [74]:
arr = np.array([1,1,1,5,5,5], dtype=np.float64)
arr2 = np.array([1.1,1.2,5.1,5.2], dtype=np.float64)
arr3 =  np.arange(6, dtype=np.float64)
arr4 = np.array([1.14374817e-04, 2.73875932e-02, 9.23385948e-02, 1.46755891e-01,
       1.86260211e-01, 2.04452250e-01, 3.02332573e-01, 3.45560727e-01,
       3.96767474e-01, 4.17022005e-01, 4.19194514e-01, 5.38816734e-01,
       6.85219500e-01, 7.20324493e-01, 8.78117436e-01])

In [77]:
_galil_park(arr3, 4, stable=1)

F_input r c+1, c, p 0 1 0 1
0 0 122.5
[[122.5  -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]]
[[122.5  -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]]
[[122.5  -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]]
j0 p 2 1
rc 0 1
F_input r c+1, c, p 0 2 1 3
0 2 122.5
1 2 140.0
0 1 122.5
0 2 122.5
[[122.5 122.5 122.5  -1.   -1.   -1. ]
 [ -1.   -2.   -2.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1.   -1.   -1.   -1. ]
 [ -1.   -1.   -1

array([0, 0, 0, 0, 0, 0])

In [12]:
from microagg1d.wilber import __wilber
def _wilber(v, k, stable=1):
    n = len(v)
    if stable==1:
        wil_calculator = StableMicroaggWilberCalculator(v, k, -np.ones(n+1, dtype=np.float64), k)
        return relabel_clusters_plus_one(__galil_park(n, wil_calculator))
    elif stable==0:
        cumsum = calc_cumsum(v)
        cumsum2 = calc_cumsum(np.square(v))
        wil_calculator = MicroaggWilberCalculator_edu(cumsum, cumsum2, k, -np.ones(n+1, dtype=np.float64))
        out = __wilber(n, wil_calculator)
        with np.printoptions(linewidth=200, precision=3, suppress=True):
            print(wil_calculator.G)
        return relabel_clusters_plus_one(out)
    else:
        raise NotImplementedError("Only stable in (0,1) supported")
_wilber(arr3, 2, stable=0)

0 0
A 0 0 232.0
2 0
2 1
1 0
2 0
2 2
A 2 2 290.0
4 0
B 0 4
4 1
B 1 4
3 0
B 0 3
3 1
4 1
B 1 4
4 2
3 0
B 0 3
3 2
3 2
3 3
A 3 3 319.0
6 2
B 2 6
6 3
B 3 6
4 2
4 3
6 2
B 2 6
6 3
B 3 6
5 2
B 2 5
5 3
3 2
4 2
5 3
6 3
B 3 6
5 4
5 5
A 5 5 377.0
6 5
6 6
A 6 6 406.0
5 4
5 5
A 5 5 377.0
6 4
6 4
6 5
4 4
A 4 4 348.0
5 4
6 4
6 4
6 5
6 4
6 6
A 6 6 406.0
6 4
[[232.    0.5   2.  232.  232.   -1.   -1. ]
 [ -1.   -1.  232.5 234.  203.   -1.   -1. ]
 [ -1.   -1.  290.    1.    2.5 174.  174. ]
 [ -1.   -1.   -1.  319.    2.5   4.  145. ]
 [ -1.   -1.   -1.   -1.  348.    1.5   3. ]
 [ -1.   -1.   -1.   -1.   -1.  377.    3. ]
 [ -1.   -1.   -1.   -1.   -1.   -1.  406. ]]


array([0, 0, 1, 1, 2, 2, 2])

In [64]:
print("\x1b[31m\"red\"\x1b[0m")

[31m"red"[0m
