In [224]:
import numpy as np # type: ignore
import sys
import os

from scipy.linalg import qr # type: ignore
from scipy.linalg import svd # type: ignore
import numpy as np # type: ignore
import transforms # type: ignore

import time
import random

print(sys.executable)

/opt/homebrew/Caskroom/miniconda/base/envs/rand_dec/bin/python


In [225]:
def my_matmul(A, B):
  n, m = A.shape
  _, k = B.shape
  C = np.zeros((n, k))
  for i in range(0, n):
    for j in range(0, k):
      for l in range(0, m):
        C[i][j] += A[i][l] * B[l][j]
  return C

In [226]:
def matmul(A, B):
  return A @ B

In [227]:
def hadamard_projection(A, k, p=5):
    m, n = A.shape
    power = 1
    l = k + p
    start = time.time()
    Y = A
    for i in range(0, m):
      Y[i, :] = transforms.fast_hadamard_transform(Y[i, :])
    random_cols = np.random.choice(n, l, replace = False)
    Y = Y[:, random_cols]
    return Y

In [228]:
def randomized_subspace_iteration_with_hadamar(A, k, p=3, q=3):
    _, n = A.shape

    l = k + p
    Y0 = hadamard_projection(A, k, p)

    Q0, _ = np.linalg.qr(Y0)
    Qj = Q0
    for j in range(q):
        Y_tilde_j = A.conj().T @ Qj
        Q_tilde_j, _ = np.linalg.qr(Y_tilde_j)

        Yj = A @ Q_tilde_j
        Qj, _ = np.linalg.qr(Yj)

    return Qj

In [229]:
def randomized_subspace_iteration(A, k, p, q=3, is_custom_mul=False):
    _, n = A.shape
    l = k + p
    Omega = np.random.randn(n, l)
    if is_custom_mul:
        Y0 = my_matmul(A, Omega)
    else:
        Y0 = A @ Omega

    Q0, _ = np.linalg.qr(Y0)
    Qj = Q0
    for j in range(q):
        Y_tilde_j = A.conj().T @ Qj
        Q_tilde_j, _ = np.linalg.qr(Y_tilde_j)

        Yj = A @ Q_tilde_j
        Qj, _ = np.linalg.qr(Yj)

    return Qj

In [230]:
def get_loss(A, Q):
  return np.linalg.norm(Q @ Q.T @ A - A) / np.linalg.norm(A)

In [231]:
accuracy = 1e-8

In [232]:
def check_loss_k(A, get_Q_func, k, p=3, q=3, max_loss = accuracy):
    Q = get_Q_func(A, k, p, q)
    loss = get_loss(A, Q)
    return loss < max_loss

In [233]:
def get_rank_binary_search(A, left, right, get_Q_func, p=5, q=3, max_loss = accuracy):
  while right - left > 1: 
    k = (right + left) // 2
    if (check_loss_k(A, get_Q_func, k, p, q, max_loss)):
      right = k
    else:
      left = k
  return right

In [234]:
def get_rank(A, get_Q_func, p=5, q=3, max_loss = accuracy):
  k = 1
  while True:
    Q = get_Q_func(A, k, p, q)
    loss = get_loss(A, Q)
    print(f'now k = {k}, now loss = {loss}')
    if (check_loss_k(A, get_Q_func, k, p, q, max_loss)):
      return get_rank_binary_search(A, k // 2, k, get_Q_func, p, q, max_loss)
    k *= 2


In [235]:
# def get_rank(A, get_Q, p=5, q=3, max_loss = 1e-14):
#   k = 1
#   while True:
#     Q = get_Q(A, k, p, q)
#     loss = get_loss(A, Q)
#     print(f'now k = {k}, now loss = {loss}')
#     if loss > max_loss:
#       k *= 2
#     else:
#       return k


In [236]:
def random_rank_k_matrix(m, n, k, random_state=None):
    B = np.random.randn(m, k)
    C = np.random.randn(k, n)
    A: np.matrix = B @ C
    return A

In [237]:
k = 45
p = 5
q = 3

In [238]:
m, n = 2**11, 2**11

In [239]:
A = random_rank_k_matrix(m, n, k)

In [240]:
time_start = time.time()
rank = get_rank(A, randomized_subspace_iteration_with_hadamar)
time_finish = time.time()
print(rank)
print(f'time = {time_finish - time_start}')

now k = 1, now loss = 0.9140430975796153
now k = 2, now loss = 0.8977581071648827
now k = 4, now loss = 0.8681318582890202
now k = 8, now loss = 0.8138608169991794
now k = 16, now loss = 0.6887606472490588
now k = 32, now loss = 0.3800735891648309
now k = 64, now loss = 1.9857671326476787e-15
40
time = 2.409019947052002


In [241]:
time_start = time.time()
rank = get_rank(A, randomized_subspace_iteration)
time_finish = time.time()
print(rank)
print(f'time = {time_finish - time_start}')

now k = 1, now loss = 0.9150830629424979
now k = 2, now loss = 0.8990238596556899
now k = 4, now loss = 0.8700437962558706
now k = 8, now loss = 0.813387360323234
now k = 16, now loss = 0.6858295502331584
now k = 32, now loss = 0.38032276545472093
now k = 64, now loss = 1.6890836938491187e-15
40
time = 2.4024012088775635


In [242]:
time_start = time.time()
np_rank = np.linalg.matrix_rank(A)
time_finish = time.time()
print(np_rank)
print(f'time = {time_finish - time_start}')

45
time = 1.0723912715911865
