In [6]:
import numpy as np
import TensorFrost as tf

tf.initialize(tf.cpu, "/O2 /fp:fast /openmp")

def modified_gram_schmidt(A):
    """
    Implements the Modified Gram-Schmidt orthogonalization to get the QR decomposition of matrix A.
    A = QR
    """
    A = A.astype(float)  # Ensure A is of float type
    m, n = A.shape
    Q = np.zeros((m, n))
    R = np.zeros((n, n))
    
    for i in range(n-1):
        R[i, i] = np.linalg.norm(A[:, i])
        Q[:, i] = A[:, i] / R[i, i]
        R[i, i+1:n] = np.dot(Q[:, i].T, A[:, i+1:n])
        A[:, i+1:n] -= np.outer(Q[:, i], R[i, i+1:n])
    R[n-1, n-1] = np.linalg.norm(A[:, n-1])
    Q[:, n-1] = A[:, n-1] / R[n-1, n-1]
    return Q, R

#dynamic size QR decomposition
def QRDecomposition():
    A = tf.input([-1, -1], tf.float32)

    m, n = A.shape
    Q = tf.zeros([m, n])
    R = tf.zeros([n, n])

    j = tf.index(0, [m])

    def loop_body(i):
        norm2 = tf.zeros([1], tf.float32)
        def loop_body1(it):
            norm2.set(norm2 + A[it, i] ** 2)
        tf.loop(loop_body1, 0, m, 1)
        R[i, i] = tf.sqrt(norm2)
        Q[j, i] = A[j, i] / R[i, i]
        
        t, = tf.index_grid([i+1], [n])
        dotprod = tf.zeros(t.shape, tf.float32)
        def loop_body2(it):
            dotprod.set(dotprod + Q[it, i] * A[it, t])
        tf.loop(loop_body2, 0, m, 1)
        R[i, t] = dotprod
        
        p, k = tf.index_grid([0, i+1], [m, n])
        A[p, k] -= Q[p, i] * R[i, k]

    tf.loop(loop_body, 0, n-1, 1)

    norm2 = tf.zeros([1], tf.float32)
    def loop_body1(it):
        norm2.set(norm2 + A[it, n-1] ** 2)
    tf.loop(loop_body1, 0, m, 1)
    R[n-1, n-1] = tf.sqrt(norm2)
    Q[j, n-1] = A[j, n-1] / R[n-1, n-1]

    return [Q, R]

qr = tf.compile(QRDecomposition)


QRDecomposition:
  Kernel count: 8
  Intermediate buffers: 0
  Lines of generated code: 578



In [7]:
#generate random matrix
A = np.random.rand(5, 5)

#compute QR decomposition
Q, R = modified_gram_schmidt(A)

#compute QR decomposition using TensorFrost
Atf = tf.tensor(A)
Qtf, Rtf = qr(Atf)
Qnp = Qtf.numpy
Rnp = Rtf.numpy

#check if QR decomposition is correct
print("QR decomposition is correct:", np.allclose(A, np.dot(Q, R)))
print("QR decomposition using TensorFrost is correct:", np.allclose(A, np.dot(Qnp, Rnp)))

#check error
print("Error:", np.linalg.norm(A - np.dot(Q, R)))
print("Error using TensorFrost:", np.linalg.norm(A - np.dot(Qnp, Rnp)))

#print Q and R
print("Q:\n", Qnp)
print("R:\n", Rnp)


QR decomposition is correct: True
QR decomposition using TensorFrost is correct: True
Error: 2.4552312978479334e-16
Error using TensorFrost: 1.0842518350953723e-07
Q:
 [[ 0.47427282  0.36220378  0.457387   -0.564107    0.3412536 ]
 [ 0.7475643  -0.41951546 -0.158624   -0.063334   -0.48577937]
 [ 0.03641628  0.7128565  -0.5785942  -0.19003461 -0.345869  ]
 [ 0.23954163  0.4284115   0.45740804  0.70089716 -0.24208356]
 [ 0.39687106  0.03338427 -0.4707889   0.38781714  0.68508   ]]
R:
 [[ 1.2178518   1.1900272   0.7325122   0.97588885  0.80657625]
 [ 0.          0.86835593  0.90782845  0.36916694  0.52878106]
 [ 0.          0.          0.5208241  -0.03697312 -0.06439757]
 [ 0.          0.          0.          0.32581264 -0.18506089]
 [ 0.          0.          0.          0.          0.69983953]]


In [8]:
#performance test
import time
A = np.random.rand(1000, 1000)

#naive NumPy QR decomposition
start = time.time()
Q, R = modified_gram_schmidt(A)
print("Time for naive NumPy QR decomposition:", time.time() - start)

#TensorFrost QR decomposition
Atf = tf.tensor(A)
start = time.time()
Qtf, Rtf = qr(Atf)
print("Time for TensorFrost QR decomposition:", time.time() - start)

#built-in NumPy QR decomposition
start = time.time()
Q, R = np.linalg.qr(A)
print("Time for built-in NumPy QR decomposition:", time.time() - start)

print("Error:", np.linalg.norm(A - np.dot(Q, R)))
print("Error using TensorFrost:", np.linalg.norm(A - np.dot(Qtf.numpy, Rtf.numpy)))

Time for naive NumPy QR decomposition: 2.3672475814819336
Time for TensorFrost QR decomposition: 0.42299795150756836
Time for built-in NumPy QR decomposition: 0.13300085067749023
Error: 5.257813726875746e-13
Error using TensorFrost: 0.00024719972344781324
