<a href="https://colab.research.google.com/github/pflashgary/Numba/blob/master/Numba.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!apt-get install nvidia-cuda-toolkit
!pip3 install numba

import os
os.environ['NUMBAPRO_LIBDEVICE'] = "/usr/lib/nvidia-cuda-toolkit/libdevice"
os.environ['NUMBAPRO_NVVM'] = "/usr/lib/x86_64-linux-gnu/libnvvm.so"


In [1]:
"""
Created on 4 Dec 2019
@author: Pegah Faegh
"""

from numba import guvectorize
import numpy as np
from timeit import default_timer as timer

@guvectorize(['void(int64[:,:], int64[:,:], int64[:,:])'],
             '(m,n),(n,p)->(m,p)')
def AstarBwith(A, B, C):
    m, n = A.shape
    n, p = B.shape
    for i in range(m):
        for j in range(p):
            C[i, j] = 0
            for k in range(n):
                C[i, j] += A[i, k] * B[k, j]

n = 10000
A = np.random.randint(n,size=(n, n))
B = np.random.randint(n,size=(n, n))

start = timer()
C = AstarBwith(A, B)
gpu_time = timer() - start
print("with GPU took %f seconds" % gpu_time)


print("C = A*B")
print(":\n%s" % C)


# def AstarBwithout(A, B):
#     m, n = A.shape
#     n, p = B.shape
#     for i in range(m):
#         for j in range(p):
#             C[i, j] = 0
#             for k in range(n):
#                 C[i, j] += A[i, k] * B[k, j]
#     return C

# start = timer()
# C = AstarBwithout(A, B)
# without_gpu_time = timer() - start
# print("without GPU took %f seconds" % without_gpu_time)

# print("C = A*B")
# print(":\n%s" % C)


start = timer()
C = np.matmul(A,B)
numpy_time = timer() - start
print("numpy took %f seconds" % numpy_time)

print("C = A*B")
print(":\n%s" % C)




with GPU took 4294.036430 seconds
C = A*B
:
[[247605807431 248508382753 248009470693 ... 250975160530 248207234271
  247488833557]
 [250580815975 251505204658 248107987977 ... 254484528248 250766315569
  250840045208]
 [250168268882 251150430529 247958021684 ... 254607243668 252481129224
  251693164815]
 ...
 [246214696672 248232600698 246712292138 ... 250704421501 248999823102
  245525831408]
 [248007767957 250187880446 248667159019 ... 251907080199 250796485279
  249099611772]
 [246566796766 248811335860 249224363613 ... 251553326327 248380733790
  249623250761]]
numpy took 4755.848899 seconds
C = A*B
:
[[247605807431 248508382753 248009470693 ... 250975160530 248207234271
  247488833557]
 [250580815975 251505204658 248107987977 ... 254484528248 250766315569
  250840045208]
 [250168268882 251150430529 247958021684 ... 254607243668 252481129224
  251693164815]
 ...
 [246214696672 248232600698 246712292138 ... 250704421501 248999823102
  245525831408]
 [248007767957 250187880446 248667