In [6]:
import numpy as np
import threading
import timeit
import time
from scipy.linalg import blas as FB

# matrix multiplication speed

SIZE = 100
A = np.random.rand(SIZE,SIZE)
B = np.random.rand(SIZE,SIZE)

# Try the pure way in Python
start = timeit.default_timer()

C = np.zeros((SIZE,SIZE))
for i in range(SIZE):
  for j in range(SIZE):
    for k in range(SIZE):
      C[i,k] += A[i,j]*B[j,k]

time_spent = timeit.default_timer() - start
print("Pure way in python")
print(time_spent)

# NUMPY
start = timeit.default_timer()

C2 = A.dot(B)

time_spent = timeit.default_timer() - start
print("Using Numpy")
print(time_spent)

# My Way

# Count positions to count
class pos_count():
    def __init__(self):
        self.curr_col = 0
        self.curr_row = 0

    def set_size(self, cols, rows):
        self.cols = cols
        self.rows = rows

    # Increment the currently pointed-to position
    def increment(self):
        # Check if at end of column
        if self.curr_row + 1 == self.rows:
            # # If also at end of row
            # if self.curr_col + 1 == self.cols:
            #     return None
            self.curr_col += 1
            self.curr_row = 0
        else:
            self.curr_row += 1

        # Success
        return 1

    # Return the current position in the new matrix to be calculated
    def get_pos(self):
        return self.curr_col, self.curr_row

    # Get pos and increment
    def get_inc(self):
        # Save pos
        temp = self.get_pos()

        self.increment()

        # Return pos
        return temp


def multi_mat_mul(A, B):

    # Convert to float

    # Number of columns in the new array
    new_cols = np.size(A, axis=0)
    # Number of rows in the new array
    new_rows = np.size(B, axis=1)

    count = pos_count()

    # Counter object for distributing positions to calculate
    count.set_size(new_cols, new_rows)

    # Number of threads to create
    num_threads = 16

    # List containing threads
    threads = []

    # Result matrix
    C = np.zeros((new_cols, new_rows))

    for i in range(num_threads):
        new_thread = MyThread("thread-" + str(i), i, count, A, B, C)
        threads.append(new_thread)
        threads[i].start()

    for i in range(num_threads):
        threads[i].join()

    return C


# Thread definition
class MyThread(threading.Thread):
    # Prevent race conditions
    pos_lock = threading.Lock()

    def __init__(self, threadID, i, count, A, B, C):
        super(MyThread, self).__init__()
        self.threadID = threadID
        self.i = i
        self.count = count
        self.A = A
        self.B = B
        self.C = C

    def run(self):

        while True:
            # Get the position of the next element to calculate
            # Lock the shared counter object
            self.pos_lock.acquire()

            # Get pos from counter
            pos = self.count.get_inc()

            # Release counter object
            self.pos_lock.release()

            # Not finished
            # Split position into two variables
            col = pos[0]
            row = pos[1]

            # End of matrix, thread is done
            if col >= np.size(self.C, axis=0):
                break

            # Calculate value for pos in result matrix
            self.C[col, row] = np.dot(self.A[col, :], self.B[:, row])


start = time.time()
#print(multi_mat_mul(A, B))
multi_mat_mul(A, B)
end = time.time()
print("Own threading way")
print(end - start)


Pure way in python
1.3647644000000128
Using Numpy
0.0006633000000419997
Own threading way
0.10854268074035645
