In [19]:
import TensorFrost as tf
import numpy as np
import matplotlib.pyplot as plt
import time

tf.initialize(tf.cpu, "H:/cl_compile.bat /O2 /fp:fast /openmp")

def matmul():
    A = tf.input([-1, -1], tf.float32)
    B = tf.input([-1, -1], tf.float32)

    N, M = A.shape
    K = B.shape[1]
    
    C = tf.zeros([N, K])

    i, j, k = tf.indices([N, K, M])

    tf.scatterAdd(C[i, j], A[i, k] * B[k, j])

    return [C]

mmul = tf.program(matmul)

In [20]:
Anp = np.random.rand(8, 8).astype(np.float32)
Bnp = np.random.rand(8, 8).astype(np.float32)

A = tf.memory(np.transpose(Anp))
B = tf.memory(np.transpose(Bnp))
C, = mmul(A, B)

Cnp = C.numpy

#compare to numpy
Cnp2 = np.dot(Bnp, Anp)

print(Cnp - Cnp2)


[[ 0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  2.3841858e-07  0.0000000e+00  0.0000000e+00]
 [ 0.0000000e+00  1.1920929e-07 -1.1920929e-07  0.0000000e+00
   1.1920929e-07  0.0000000e+00 -5.9604645e-08  0.0000000e+00]
 [ 0.0000000e+00  0.0000000e+00 -2.3841858e-07  0.0000000e+00
   0.0000000e+00  0.0000000e+00 -1.1920929e-07  0.0000000e+00]
 [ 0.0000000e+00  1.1920929e-07 -1.1920929e-07  1.1920929e-07
   2.3841858e-07  0.0000000e+00  1.1920929e-07 -1.1920929e-07]
 [ 2.3841858e-07  0.0000000e+00  0.0000000e+00  0.0000000e+00
  -1.1920929e-07  0.0000000e+00  0.0000000e+00  0.0000000e+00]
 [-2.3841858e-07  0.0000000e+00  1.1920929e-07  0.0000000e+00
  -1.1920929e-07  0.0000000e+00  5.9604645e-08  0.0000000e+00]
 [-4.7683716e-07  2.3841858e-07  2.3841858e-07  5.9604645e-08
   2.3841858e-07  2.3841858e-07  0.0000000e+00  1.1920929e-07]
 [ 0.0000000e+00  0.0000000e+00  0.0000000e+00 -5.9604645e-08
   0.0000000e+00  1.1920929e-07  5.9604645e-08  0.0000000e+00]]

In [21]:
Anp = np.random.rand(1024, 1024).astype(np.float32)
Bnp = np.random.rand(1024, 1024).astype(np.float32)

A = tf.memory(np.transpose(Anp))
B = tf.memory(np.transpose(Bnp))

# Performance test
t0 = time.time()
for i in range(50):
    C, = mmul(A, B)
t1 = time.time()

tf_time = t1 - t0
print("TensorFrost: ", t1 - t0)

t0 = time.time()
for i in range(50):
    Cnp2 = np.dot(Bnp, Anp)
t1 = time.time()

np_time = t1 - t0
print("Numpy: ", t1 - t0)

print("TF/NP: ", tf_time / np_time)

TensorFrost:  33.77503323554993
Numpy:  0.33500027656555176
TF/NP:  100.8209114983848
