Author: Aqib Javaid,
Reg. No. 2019-EE-307,
Course: Intro. to Machine Learning,
Course Instructor: Sir. Umar Rashid

In [61]:
import numpy as np
from math import sqrt

# Regular (but poor) programming using loops

In [62]:
def euc_loops(X,Y):
    Nx = X.shape[0]
    dx = X.shape[1]
    
    Ny = Y.shape[0]
    dy = Y.shape[1]
    
    #generating an empty matrix to store final values
    E = np.zeros((Nx,Ny))
    B = 0
    
    #computing euclidean Distances between matrices
    for i in range(Nx):
        for j in range(Ny):
            for k in range(dx):
                B += (X[i,k]-Y[j,k])**2
            E[i,j] = sqrt(B)
            B = 0
            
    return E        

# Vectorized code 

In [63]:
def euc_vec(X,Y):
    return np.round(np.sqrt(np.sum(X**2, axis = 1) [:, np.newaxis] + np.sum(Y**2, axis = 1) + -2*np.dot(X,Y.T)),3)

# Generating random sample and comparing running speed of both implementations

In [64]:
X = np.random.normal(size=(5,7))

%timeit euc_loops(X,X)
%timeit euc_vec(X,X)

S1 = euc_loops(X,X)
S2 = euc_vec(X,X)

print( np.trace(S1), np.trace(S2))
print("Computation using loops:")
print(S1)
print("Computation using vectorization:")
print(S2)

126 µs ± 5.8 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


  return np.round(np.sqrt(np.sum(X**2, axis = 1) [:, np.newaxis] + np.sum(Y**2, axis = 1) + -2*np.dot(X,Y.T)),3)


29.8 µs ± 790 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
0.0 nan
Computation using loops:
[[0.         3.69752382 2.61674959 3.47297239 4.60950226]
 [3.69752382 0.         4.23080991 5.21848596 2.80536939]
 [2.61674959 4.23080991 0.         2.63421457 3.94632403]
 [3.47297239 5.21848596 2.63421457 0.         5.20595263]
 [4.60950226 2.80536939 3.94632403 5.20595263 0.        ]]
Computation using vectorization:
[[0.    3.698 2.617 3.473 4.61 ]
 [3.698 0.    4.231 5.218 2.805]
 [2.617 4.231   nan 2.634 3.946]
 [3.473 5.218 2.634   nan 5.206]
 [4.61  2.805 3.946 5.206 0.   ]]


Summary:

1. Vectorized code is much more concise and easier to read.
2. Vectorization code is faster than loops (about 4-5 times faster, 126 µs vs 29.8 µs)