In [1]:
%load_ext Cython
%load_ext memory_profiler

In [2]:
# Create Some Basic Testing Data
import numpy as np

train_data = np.array([[1., 2., 3.], [5., 6., 7.], [8., 9., 10.]])
test_data = np.array([[5., 10., 15.], [10., 20., 30.]])

print("Train Data: \n" + str(train_data))
print("Test Data: \n" + str(test_data))

Train Data: 
[[ 1.  2.  3.]
 [ 5.  6.  7.]
 [ 8.  9. 10.]]
Test Data: 
[[ 5. 10. 15.]
 [10. 20. 30.]]


## Manhattan Distance

In [3]:
%%cython -a
import cython
import numpy as np
cimport numpy as np

@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False)  # turn off negative index wrapping for entire function
def manhattan_dist(np.ndarray[double, ndim=2] vectors_a, np.ndarray[double, ndim=2] vectors_b):
        
    cdef int numb_vectors_a = vectors_a.shape[0]
    cdef int numb_vectors_b = vectors_b.shape[0]
    cdef int numb_dims = vectors_a.shape[1]
    cdef np.ndarray[double, ndim=2] distance = np.zeros([numb_vectors_b, numb_vectors_a], dtype=np.float)
    
    cdef size_t i, j, k
    cdef double score
    
    for i in range(numb_vectors_b):
        for j in range(numb_vectors_a):
            score = 0
            for k in range(numb_dims):
                score += abs(vectors_a[j,k] - vectors_b[i,k])
                
            distance[i,j] = score
            
    return distance

In [4]:
def manhattan(vectors_a, vectors_b):
    return np.sum(np.abs(vectors_a-vectors_b[:, np.newaxis]), axis=2)

print("Cython Implementation:\n" + str(manhattan_dist(train_data, test_data)))
print("Numpy Implementation:\n" + str(manhattan(train_data, test_data)))

Cython Implementation:
[[24. 12.  9.]
 [54. 42. 33.]]
Numpy Implementation:
[[24. 12.  9.]
 [54. 42. 33.]]


In [6]:
%timeit -r 15 -n 100000 manhattan_dist(train_data, test_data)
%timeit -r 15 -n 100000 manhattan(train_data, test_data)

2.8 µs ± 122 ns per loop (mean ± std. dev. of 15 runs, 100000 loops each)
7.09 µs ± 197 ns per loop (mean ± std. dev. of 15 runs, 100000 loops each)


In [5]:
rand_train = np.random.randn(2500,10)
rand_test = np.random.randn(2500,10)

%timeit manhattan_dist(rand_train, rand_test)
%memit manhattan_dist(rand_train, rand_test)
%timeit manhattan(rand_train, rand_test)
%memit manhattan(rand_train, rand_test)

75.3 ms ± 3.25 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
peak memory: 111.36 MiB, increment: 0.04 MiB
709 ms ± 15.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
peak memory: 999.57 MiB, increment: 888.12 MiB


## Hamming Distance

In [7]:
%%cython -a
import cython
import numpy as np
cimport numpy as np

@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False)  # turn off negative index wrapping for entire function
def hamming_dist(np.ndarray[double, ndim=2] vectors_a, np.ndarray[double, ndim=2] vectors_b):
        
    cdef int numb_vectors_a = vectors_a.shape[0]
    cdef int numb_vectors_b = vectors_b.shape[0]
    cdef int numb_dims = vectors_a.shape[1]
    cdef np.ndarray[double, ndim=2] distance = np.zeros([numb_vectors_b, numb_vectors_a], dtype=np.float)
    
    cdef size_t i, j, k
    cdef int score
    
    for i in range(numb_vectors_b):
        for j in range(numb_vectors_a):
            score = 0
            for k in range(numb_dims):
                score += vectors_a[j,k] != vectors_b[i,k]
                
            distance[i,j] = score
            
    return distance

In [8]:
def hamming(vectors_a, vectors_b):
    return np.sum(np.abs(vectors_a != vectors_b[:, np.newaxis]), axis=2)

print("Cython Implementation:\n" + str(hamming_dist(train_data, test_data)))
print("Numpy Implementation:\n" + str(hamming(train_data, test_data)))

Cython Implementation:
[[3. 2. 3.]
 [3. 3. 3.]]
Numpy Implementation:
[[3 2 3]
 [3 3 3]]


In [10]:
rand_train = np.random.randn(2500,10)
rand_test = np.random.randn(2500,10)

%timeit hamming_dist(rand_train, rand_test)
%memit hamming_dist(rand_train, rand_test)
%timeit hamming(rand_train, rand_test)
%memit hamming(rand_train, rand_test)

67.7 ms ± 1.12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
peak memory: 116.15 MiB, increment: 0.00 MiB
309 ms ± 6.28 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
peak memory: 235.43 MiB, increment: 0.00 MiB


## Chi-Squared

In [11]:
%%cython -a
import cython
import numpy as np
cimport numpy as np

@cython.boundscheck(False) # turn off bounds-checking for entire function
@cython.wraparound(False)  # turn off negative index wrapping for entire function
def chisqr_dist(np.ndarray[double, ndim=2] vectors_a, np.ndarray[double, ndim=2] vectors_b):
        
    cdef int numb_vectors_a = vectors_a.shape[0]
    cdef int numb_vectors_b = vectors_b.shape[0]
    cdef int numb_dims = vectors_a.shape[1]
    cdef np.ndarray[double, ndim=2] distance = np.zeros([numb_vectors_b, numb_vectors_a], dtype=np.float)
    
    cdef size_t i, j, k
    
    cdef np.ndarray[double, ndim=1] sum_a = np.zeros([numb_vectors_a], dtype=np.float)
    cdef np.ndarray[double, ndim=1] sum_b = np.zeros([numb_vectors_b], dtype=np.float)
    
    for i in range(numb_vectors_a):
        for j in range(numb_dims):
            sum_a[i] += vectors_a[i, j]
            
    for i in range(numb_vectors_b):
        for j in range(numb_dims):
            sum_b[i] += vectors_b[i, j]
            
    cdef double score
    cdef double col_sum
            
    for i in range(numb_vectors_b):
        for j in range(numb_vectors_a):
            score = 0
            for k in range(numb_dims):
                col_sum = vectors_a[j, k] + vectors_b[i, k]
                if(col_sum == 0):
                    continue
                
                score += (1.0/col_sum) * ((vectors_a[j, k]/sum_a[j])-(vectors_b[i, k]/sum_b[i]))**2
            distance[i, j] = score**(0.5)
            
            
    return distance

In [12]:
def chisqr(vectors_a, vectors_b):
    all_col_sum = vectors_a + vectors_b[:, np.newaxis]
    all_col_sum_recip = np.reciprocal(all_col_sum, where=(all_col_sum != 0.0))
    vector_train_sum = np.sum(vectors_a, axis=1)
    vector_test_sum = np.sum(vectors_b, axis=1)

    rel_freq_train = np.divide(vectors_a, vector_train_sum[:, np.newaxis],
                               out=np.full([vectors_a.shape[0], vectors_a.shape[1]], np.nan),
                               where=(vector_train_sum[:, np.newaxis] != 0))

    rel_freq_test = np.divide(vectors_b, vector_test_sum[:, np.newaxis],
                              out=np.full([vectors_b.shape[0], vectors_b.shape[1]], np.nan),
                              where=(vector_test_sum[:, np.newaxis] != 0))

    diff_rel_freq_squared = np.square(rel_freq_train-rel_freq_test[:, np.newaxis])
    chisqr = np.sqrt(np.sum(all_col_sum_recip * diff_rel_freq_squared, axis=2))
    return chisqr

print("Cython Implementation:\n" + str(chisqr_dist(train_data, test_data)))
print("Numpy Implementation:\n" + str(chisqr(train_data, test_data)))

Cython Implementation:
[[0.         0.04237612 0.04432558]
 [0.         0.03401047 0.03679188]]
Numpy Implementation:
[[0.         0.04237612 0.04432558]
 [0.         0.03401047 0.03679188]]


In [14]:
rand_train = np.random.randint(11, size=(1000, 10)).astype(np.float)
rand_test = np.random.randint(11, size=(1000, 10)).astype(np.float)

%timeit chisqr_dist(rand_train, rand_test)
%memit chisqr_dist(rand_train, rand_test)
%timeit chisqr(rand_train, rand_test)
%memit chisqr(rand_train, rand_test)

126 ms ± 2.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
peak memory: 4059.63 MiB, increment: 0.00 MiB
220 ms ± 3.08 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
peak memory: 4374.70 MiB, increment: 0.06 MiB
