### Distance and angles between two vectors

The distance between two vectors is defined as the length of the vector subtraction.

Formula that the distance is defined by the dot product is 
$$ d(\boldsymbol{x},\boldsymbol{y}) = \lVert \boldsymbol{x} - \boldsymbol{y} \rVert, $$
and the angle is defined by the dot product is 
$$ \boldsymbol{x}^T \boldsymbol{y} = \lVert \boldsymbol{x} \rVert \lVert \boldsymbol{y} \rVert cos \theta$$

As an example, I will use a MNIST dataset

In [1]:
# imports
import numpy as np
from scipy.spatial import distance
import matplotlib.pyplot as plt
import sklearn
from sklearn.datasets import fetch_mldata

In [2]:
def distance(x, y):
    x = np.array(x, dtype=np.float).ravel()
    y = np.array(y, dtype=np.float).ravel()
    distance = np.linalg.norm(x - y)
    return distance

def angle(x, y):
    angle = np.arccos(np.dot(x, y)  / (np.sqrt(np.dot(x.T, x) * np.dot(y.T, y))))
    return angle

def pairwise_distance_matrix(X, Y):
    N, D = X.shape
    M, _ = Y.shape
    distance_matrix = np.zeros((N, M), dtype=np.float)
    for i in range(N):
        for j in range(M):
            distance_matrix[i,j] = distance(X[i], Y[j])
    return distance_matrix

In [3]:
a = np.array([1,0])
b = np.array([0,1])
np.testing.assert_almost_equal(distance(a, b), np.sqrt(2))
assert((angle(a,b) / (np.pi * 2) * 360.) == 90)
print('correct')

correct


And now I check out that the same digit have different distance and how are different classes different for MNIST digit.

In [5]:
MNIST = fetch_mldata('MNIST original', data_home='./MNIST')
distances = []
for i in range(len(MNIST.data[:500])):
    for j in range(len(MNIST.data[:500])):
        distances.append(distance(MNIST.data[i], MNIST.data[j]))

In [6]:
def similarity():
    min_distance = distance(MNIST.data[0], MNIST.data[1])
    for index,image in enumerate(MNIST.data[2:]):
        actual_distance = distance(image, MNIST.data[0])
        if actual_distance < min_distance:
            min_distance = actual_distance
            min_index = index
    
    return index

In [7]:
means = {}
for n in np.unique(MNIST.target).astype(np.int):
    means[n] = np.mean(MNIST.data[MNIST.target==n], axis=0)

In [8]:
MD = np.zeros((10, 10))
AG = np.zeros((10, 10))
for i in means.keys():
    for j in means.keys():
        MD[i, j] = distance(means[i], means[j])
        AG[i, j] = angle(means[i], means[j])