In [6]:
import numpy as np
import time
import polars as pl
import random

def dist(A, B, GramMatrix):
    return (
        np.diag(np.matmul(
            np.matmul(A, GramMatrix), 
            A.T)) +
        np.diag(np.matmul(
            np.matmul(B.T, GramMatrix), 
            B)).reshape(-1, 1) - 
        2 * (np.matmul(
            np.matmul(B.T, GramMatrix), 
            A.T))) 

In [7]:
def performance_test(n: int, k: int, m: int, iterations: int):
    A = np.random.rand(m, n)
    B = np.random.rand(n, k)
    
    # Gram matrix must be symmetric
    GramMatrix = np.random.rand(n, n)
    GramMatrix = (GramMatrix + GramMatrix.T) * 0.5
    
    assert A.shape[0] == m and B.shape[1] == k
    
    naive_time = 0
    new_time = 0
    for it in range(iterations):
        result1 = np.zeros((k, m))
        
        point0 = time.time()
        
        for j in range(m):
            for i in range(k):
                result1[i, j] = np.dot(np.dot(A[j] - B[:,i].T, GramMatrix), A[j].T - B[:,i]).item()
                
        point1 = time.time()   
          
        result2 = dist(A, B, GramMatrix)
        
        point2 = time.time()
        
        assert np.allclose(result1, result2)
        
        naive_time += (point1 - point0)
        new_time += (point2 - point1)
   
    return naive_time, new_time, iterations
        

In [8]:
print(performance_test(3, 4, 6, 10000))

(1.1890602111816406, 0.2111952304840088, 10000)


In [9]:
df1 = pl.DataFrame(schema=['n', 'k', 'm', 'naive_version_time', 'matrix_version_time', 'iterations_count'])
df1 = df1.cast({
    "n": pl.Int64,   
    "k" : pl.Int64,
    "m" : pl.Int64,
    "naive_version_time": pl.Float64,  
    "matrix_version_time": pl.Float64,
    "iterations_count" :  pl.Int64   
})


launches_count = 500
iterations = 10

k = random.randint(1, 1000)
m = random.randint(1, 1000)

import random
for n in range(1, launches_count):
    naive_version_time, matrix_version_time, iterations_count = performance_test(n, k, m, iterations)
    print(f'{n=}', f'{k=}', f'{m=}', f'{naive_version_time=}', f'{matrix_version_time=}', f'{iterations_count=}', sep="\n", end="\n\n")
    df1 = df1.vstack(pl.DataFrame({\
        'n' : n,
        'k' : k,
        'm' : m,
        'naive_version_time' : naive_version_time, 
        'matrix_version_time' : matrix_version_time, 
        'iterations_count' : iterations_count}))

df1.write_csv("different_n.csv")

n=1
k=676
m=150
naive_version_time=4.7676239013671875
matrix_version_time=0.01801776885986328
iterations_count=10

n=2
k=676
m=150
naive_version_time=4.769074201583862
matrix_version_time=0.015012025833129883
iterations_count=10

n=3
k=676
m=150
naive_version_time=4.946204900741577
matrix_version_time=0.022019386291503906
iterations_count=10

n=4
k=676
m=150
naive_version_time=4.856281995773315
matrix_version_time=0.021022319793701172
iterations_count=10

n=5
k=676
m=150
naive_version_time=4.766377210617065
matrix_version_time=0.022020816802978516
iterations_count=10

n=6
k=676
m=150
naive_version_time=4.924515962600708
matrix_version_time=0.0250244140625
iterations_count=10

n=7
k=676
m=150
naive_version_time=4.8151326179504395
matrix_version_time=0.027026891708374023
iterations_count=10

n=8
k=676
m=150
naive_version_time=4.854886531829834
matrix_version_time=0.02402472496032715
iterations_count=10

n=9
k=676
m=150
naive_version_time=4.835525989532471
matrix_version_time=0.0220222473

In [17]:
df2 = pl.DataFrame(schema=['n', 'k', 'm', 'naive_version_time', 'matrix_version_time', 'iterations_count'])
df2 = df2.cast({
    "n": pl.Int64,   
    "k" : pl.Int64,
    "m" : pl.Int64,
    "naive_version_time": pl.Float64,  
    "matrix_version_time": pl.Float64,
    "iterations_count" :  pl.Int64   
})    
 
launches_count = 1000
iterations = 10 
 
n = random.randint(1, 100)

file_path = "same_n.csv"
df2.write_csv(file_path)

for i in range(1, launches_count + 1):
    k = random.randint(1, 1000)
    m = random.randint(1, 1000)
    naive_version_time, matrix_version_time, iterations_count = performance_test(n, k, m, iterations)
    print(f'{i=}', f'{n=}', f'{k=}', f'{m=}', f'{naive_version_time=}', f'{matrix_version_time=}', f'{iterations_count=}', sep="\n")
    df2 = df2.vstack(pl.DataFrame({\
        'n' : n,
        'k' : k,
        'm' : m,
        'naive_version_time' : naive_version_time, 
        'matrix_version_time' : matrix_version_time, 
        'iterations_count' : iterations_count}))
    
    
    
    if i % 10 == 0:
        df2.write_csv(file_path)
            
    
    
    


i=1
n=99
k=252
m=801
naive_version_time=12.297613859176636
matrix_version_time=0.06005549430847168
iterations_count=10
i=2
n=99
k=618
m=634
naive_version_time=23.44107151031494
matrix_version_time=0.08107423782348633
iterations_count=10
i=3
n=99
k=33
m=662
naive_version_time=1.3084468841552734
matrix_version_time=0.024021625518798828
iterations_count=10
i=4
n=99
k=261
m=937
naive_version_time=14.546519756317139
matrix_version_time=0.07406878471374512
iterations_count=10
i=5
n=99
k=93
m=986
naive_version_time=5.566151142120361
matrix_version_time=0.05304908752441406
iterations_count=10
i=6
n=99
k=430
m=960
naive_version_time=24.81802797317505
matrix_version_time=0.10209155082702637
iterations_count=10
i=7
n=99
k=898
m=960
naive_version_time=51.6851532459259
matrix_version_time=0.1801614761352539
iterations_count=10
i=8
n=99
k=576
m=857
naive_version_time=29.639358043670654
matrix_version_time=0.10409140586853027
iterations_count=10
i=9
n=99
k=250
m=873
naive_version_time=12.897385358810