In [250]:
import numpy as np # type: ignore
import sys
import os

from scipy.linalg import qr # type: ignore
from scipy.linalg import svd # type: ignore
import numpy as np # type: ignore
import transforms # type: ignore

import time
import random

print(sys.executable)

/opt/homebrew/Caskroom/miniconda/base/envs/rand_dec/bin/python


In [251]:
def my_matmul(A, B):
  n, m = A.shape
  _, k = B.shape
  C = np.zeros((n, k))
  for i in range(0, n):
    for j in range(0, k):
      for l in range(0, m):
        C[i][j] += A[i][l] * B[l][j]
  return C

In [252]:
def matmul(A, B):
  return A @ B

In [253]:
def hadamard_projection(A, k, p=5):
    m, n = A.shape
    power = 1
    l = k + p
    start = time.time()
    Y = A
    for i in range(0, m):
      Y[i, :] = transforms.fast_hadamard_transform(Y[i, :])
    random_cols = np.random.choice(n, l, replace = False)
    Y = Y[:, random_cols]
    return Y

In [254]:
def randomized_subspace_iteration_with_hadamar(A, k, p=3, q=3):
    _, n = A.shape

    l = k + p
    Y0 = hadamard_projection(A, k, p)

    Q0, _ = np.linalg.qr(Y0)
    Qj = Q0
    for j in range(q):
        Y_tilde_j = A.conj().T @ Qj
        Q_tilde_j, _ = np.linalg.qr(Y_tilde_j)

        Yj = A @ Q_tilde_j
        Qj, _ = np.linalg.qr(Yj)

    return Qj

In [255]:
def randomized_subspace_iteration(A, k, p, q=3, is_custom_mul=False):
    _, n = A.shape
    l = k + p
    Omega = np.random.randn(n, l)
    if is_custom_mul:
        Y0 = my_matmul(A, Omega)
    else:
        Y0 = A @ Omega

    Q0, _ = np.linalg.qr(Y0)
    Qj = Q0
    for j in range(q):
        Y_tilde_j = A.conj().T @ Qj
        Q_tilde_j, _ = np.linalg.qr(Y_tilde_j)

        Yj = A @ Q_tilde_j
        Qj, _ = np.linalg.qr(Yj)

    return Qj

In [256]:
def get_loss(A, Q):
  return np.linalg.norm(Q @ (Q.T @ A) - A) / np.linalg.norm(A)

In [257]:
accuracy = 1e-8

In [258]:
def get_array_projections(A, seed=10):
    m, n = A.shape
    projections = list()
    for i in range(seed):
        rand_vec = np.random.randn(n)
        projection = A @ rand_vec
        projections.append(projection/np.linalg.norm(projection))
    return projections

In [259]:
def get_random_loss(A, projections_arr, Q):
    max_loss = 0
    for projection in projections_arr:
        result = Q @ (Q.T @ projection)
        if np.linalg.norm(result - projection) > max_loss:
            max_loss = np.linalg.norm(result - projection)
    return max_loss
########## !!!!!!!!!!!!!!!!!!!!!!

In [260]:
def check_loss_rand(A, get_Q_func, projections_arr, k, p=3, q=3, max_loss = accuracy):
    Q = get_Q_func(A, k, p, q)
    loss = get_random_loss(A, projections_arr, Q)
    return loss < max_loss

In [261]:
def check_loss_k(A, get_Q_func, k, p=3, q=3, max_loss = accuracy):
    Q = get_Q_func(A, k, p, q)
    loss = get_loss(A, Q)
    return loss < max_loss

In [262]:
def get_rank_binary_search(A, left, right, get_Q_func, p=5, q=3, max_loss = accuracy):
  while right - left > 1: 
    k = (right + left) // 2
    if (check_loss_k(A, get_Q_func, k, p, q, max_loss)):
      right = k
    else:
      left = k
  return right

In [263]:
def get_rank(A, get_Q_func, p=5, q=3, debug=False, max_loss=accuracy):
  k = 1
  while True:
    Q = get_Q_func(A, k, p, q)
    loss = get_loss(A, Q)
    if debug:
      print(f'now k = {k}, now loss = {loss}')
    if (check_loss_k(A, get_Q_func, k, p, q, max_loss)):
      return get_rank_binary_search(A, k // 2, k, get_Q_func, p, q, max_loss)
    k *= 2


In [264]:
def get_rand_rank(A, get_Q_func, p=5, q=3, seed=10, debug=False, max_loss=accuracy):
    projections_arr = get_array_projections(A, seed)
    k = 1
    while True:
        Q = get_Q_func(A, k, p, q)
        loss = get_loss(A, Q)
        if debug:
            print(f'now k = {k}, now loss = {loss}')
        if (check_loss_rand(A, get_Q_func, projections_arr, k, p, q, max_loss)):
            return get_rank_binary_search(A, k // 2, k, get_Q_func, p, q, max_loss)
        k *= 2

In [265]:
# def get_rank(A, get_Q, p=5, q=3, max_loss = 1e-14):
#   k = 1
#   while True:
#     Q = get_Q(A, k, p, q)
#     loss = get_loss(A, Q)
#     print(f'now k = {k}, now loss = {loss}')
#     if loss > max_loss:
#       k *= 2
#     else:
#       return k


In [266]:
def random_rank_k_matrix(m, n, k, random_state=None):
    B = np.random.randn(m, k)
    C = np.random.randn(k, n)
    A: np.matrix = B @ C
    return A

In [267]:
k = 45
p = 5
q = 3

In [268]:
m, n = 2**11, 2**11

In [269]:
A = random_rank_k_matrix(m, n, k)

In [270]:
def get_time(A, iteration):
    time_start = time.time()
    rank = get_rank(A, iteration)
    time_finish = time.time()
    return (time_finish - time_start, rank)

In [271]:
def get_rand_time(A, iteration, seed=10):
    time_start = time.time()
    rank = get_rand_rank(A, iteration, seed)
    time_finish = time.time()
    return (time_finish - time_start, rank)

In [None]:
def get_classic_time(A):
    time_start = time.time()
    rank = np.linalg.matrix_rank(A)
    time_finish = time.time()
    return (time_finish - time_start, rank)

In [272]:
time_start = time.time()
rank = get_rank(A, randomized_subspace_iteration_with_hadamar)
time_finish = time.time()
print(rank)
print(f'time = {time_finish - time_start}')
randomized_subspace_iteration_with_hadamar_time = time_finish - time_start

40
time = 1.1720728874206543


In [273]:
# time_start = time.time()
# rank = get_rand_rank(A, randomized_subspace_iteration_with_hadamar)
# time_finish = time.time()
# print(rank)
# print(f'time = {time_finish - time_start}')
# randomized_subspace_iteration_with_hadamar_rand_time = time_finish - time_start

In [274]:
time_start = time.time()
rank = get_rank(A, randomized_subspace_iteration)
time_finish = time.time()
print(rank)
print(f'time = {time_finish - time_start}')
randomized_subspace_iteration_time = time_finish - time_start

40
time = 1.0008320808410645


In [275]:
time_start = time.time()
rank = get_rand_rank(A, randomized_subspace_iteration)
time_finish = time.time()
print(rank)
print(f'time = {time_finish - time_start}')
randomized_subspace_iteration_rand_time = time_finish - time_start

40
time = 0.9147109985351562


In [276]:
time_start = time.time()
np_rank = np.linalg.matrix_rank(A)
time_finish = time.time()
print(np_rank)
print(f'time = {time_finish - time_start}')
classic_rank_time = time_finish - time_start

45
time = 1.0511832237243652


In [277]:
print(randomized_subspace_iteration_with_hadamar_time)
# print(randomized_subspace_iteration_with_hadamar_rand_time)
print(randomized_subspace_iteration_time)
print(randomized_subspace_iteration_rand_time)
print(classic_rank_time)

1.1720728874206543
1.0008320808410645
0.9147109985351562
1.0511832237243652


In [278]:
from tqdm import tqdm

all_time_subspace_rand = 0
all_time_classic = 0
for i in tqdm(range(100), desc="running"):
    
    all_time_classic += get_classic_time(A)[0]
    all_time_subspace_rand += get_rand_time(A, randomized_subspace_iteration)[0]
    
print(f"classic rank = {all_time_classic}")
print (f"our fast rank = {all_time_subspace_rand}")


running:   0%|          | 0/100 [00:00<?, ?it/s]


TypeError: matrix_rank() takes from 1 to 3 positional arguments but 4 were given