In [1]:
import numpy as np
from time import time
from matplotlib import pyplot as plt
import pandas as pd

import scipy.sparse as scs
from sklearn.utils.extmath import randomized_svd

In [78]:
def random_matrix(n,m,density):
    A = scs.random(n,m,density)
    return A.toarray()


Rekurencyjna kompresja

In [72]:
class Node:
    def __init__(self, t_min, t_max, s_min, s_max):
        self.t_min = t_min
        self.t_max = t_max
        
        self.s_min = s_min
        self.s_max = s_max
        
        self.rank = None
        
        #[A11, A12]
        #[A21, A22]
        
        self.U = None
        self.S = None
        self.VT = None
        
        self.zeros = False
        
        
        self.children = []
        
        


In [27]:
def is_admissible(matrix, S, r, eps):
    if min(matrix.shape) <= r+1:
        return True
    
    if S[-1] < eps:
        return True
    
    return False     

In [73]:
def create_tree(matrix, r, eps):
    n, m = matrix.shape

    
    def create_tree_r(t_min, t_max, s_min, s_max):
        nonlocal matrix, r, eps
        
        if not np.any(matrix[t_min:t_max, s_min:s_max]):
            v = Node(t_min, t_max, s_min, s_max)
            v.rank = 0
            v.zeros = True
            return v
    
        U,S,V = randomized_svd(matrix[t_min:t_max, s_min:s_max], r+1)
#naprawić odcinanie wartości poniżej eps
#         S = S[S >= eps]  
#         rank = len(S)
        
#         U = U[:rank, :]
#         V = V[:, :rank]
        rank = r
        if is_admissible(matrix[t_min:t_max, s_min:s_max], S, r, eps):
            v = Node(t_min, t_max, s_min, s_max) #compress matrix
            v.rank = rank
            v.U = U
            v.S = S
            v.V = V
            
        else:
            v = Node(t_min, t_max, s_min, s_max)
            
            t_newmax = (t_min + t_max)//2
            s_newmax = (s_min + s_max)//2
            
            v1 = create_tree_r(t_min, t_newmax, s_min, s_newmax)
            v2 = create_tree_r(t_min, t_newmax, s_newmax, s_max)
            v3 = create_tree_r(t_newmax, t_max, s_min, s_newmax)
            v4 = create_tree_r(t_newmax, t_max, s_newmax, s_max)
            
            v.children = [v1, v2, v3, v4]
        return v
    return create_tree_r(0, n, 0, m)

In [74]:
def decompress(node):
    if node.rank is not None:
        if node.rank > 0:
            return node.U@np.diag(node.S)@node.V
        else:
            return np.zeros((node.t_max - node.t_min, node.s_max - node.s_min))
    else:
        return np.vstack(
            (
                np.hstack((decompress(node.children[0]), decompress(node.children[1]))),
                np.hstack((decompress(node.children[2]), decompress(node.children[3]))),
            )
        )   

In [85]:
s = 2**8
a = scs.random(s,s,density = 0.25)
a = a.toarray()
print(a)

[[0.35910112 0.         0.         ... 0.         0.         0.48851481]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.90117764 0.8770269 ]
 ...
 [0.         0.         0.         ... 0.08280862 0.         0.        ]
 [0.         0.05504324 0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.89723959 0.         0.        ]]


In [86]:
tree = create_tree(a, 1, 10**(-8))
#print(decompress(tree))

In [87]:
print(np.allclose(a, decompress(tree)))

True


Pomiary czasu

In [91]:
sizes = [2**i for i in range(6,8)]
densities = [0.01, 0.05, 0.1, 0.2]
r = [1,4]
for density in densities:
    for size in sizes:
        matrix = random_matrix(size,size,density)
        
        _, S, _ = np.linalg.svd(matrix)
        singular_values = [S[0], S[size//2], S[-1]]
        
        for b in [1,4]:
            for singular_value in singular_values:
                start = time()
                tree = create_tree(matrix, b, singular_value)
                end = time()
                print(density, size, b, singular_value, end-start)
                           
                #zrobić coś z wynikami
        

0.01 64 1 1.291273001369248 0.015003204345703125
0.01 64 1 1.2902409154772732e-16 0.019003629684448242
0.01 64 1 3.639342498754476e-48 0.0180051326751709
0.01 64 4 1.291273001369248 0.002000093460083008
0.01 64 4 1.2902409154772732e-16 0.01000213623046875
0.01 64 4 3.639342498754476e-48 0.012002229690551758
0.01 128 1 1.785934515846019 0.0010004043579101562
0.01 128 1 0.35892861166374884 0.07301640510559082
0.01 128 1 1.9648713888404396e-28 0.08902120590209961
0.01 128 4 1.785934515846019 0.0020003318786621094
0.01 128 4 0.35892861166374884 0.03200721740722656
0.01 128 4 1.9648713888404396e-28 0.04501008987426758
0.05 64 1 2.31173910516653 0.0010001659393310547
0.05 64 1 0.7708983786184911 0.03400754928588867
0.05 64 1 1.414117222403896e-16 0.08701920509338379
0.05 64 4 2.31173910516653 0.0009999275207519531
0.05 64 4 0.7708983786184911 0.016003847122192383
0.05 64 4 1.414117222403896e-16 0.030006885528564453
0.05 128 1 3.9996890008700188 0.0010001659393310547
0.05 128 1 1.077810506642