In [1]:
import numpy as np

In [2]:
# THE FUNCTION CALCULATES THE EUCLIDEAN DISTANCE BETWEEN TWO N-DIMENSIONAL POINTS.

def Euclideandistance(x_1,x_2):
    # Input:  Vector n-dimensional x_1.
    #         Vector n-dimensional x_2.
    # Output: Euclidean distance between input points x_1 and x_2.
    
    distance = np.sqrt(np.sum(np.square(x_1-x_2)))
    
    return distance

In [3]:
# THE FUNCTION CALCULATES THE MANHATTAN DISTANCE BETWEEN TWO N-DIMENSIONAL POINTS.

def Manhattandistance(x_1,x_2):
    # Input:  Vector n-dimensional x_1.
    #         Vector n-dimensional x_2.
    # Output: Manhattan distance between input points x_1 and x_2.
    
    distance = np.sum(np.absolute(x_1-x_2))
    
    return distance

In [4]:
# THE FUNCTION CALCULATES THE MINKOWSKI DISTANCE BETWEEN TWO N-DIMENSIONAL POINTS.

def Minkowskidistance(x_1,x_2,p):
    # Input:  Vector n-dimensional x_1.
    #         Vector n-dimensional x_2.
    # Output: Minkowski distance between input points x_1 and x_2.
    
    distance = np.power(np.sum(np.power(np.absolute(x_1-x_2),p)),(1/p))
    
    return distance

In [5]:
# THE FUNCTION CALCULATES THE HAMMING DISTANCE BETWEEN TWO N-DIMENSIONAL POINTS.

def Hammingdistance(x_1,x_2):
    # Input:  Vector n-dimensional x_1.
    #         Vector n-dimensional x_2.
    # Output: Hamming distance between input points x_1 and x_2.
    
    distance = np.sum([x_1!=x_2])
    
    return distance

In [6]:
# THE FUNCTION CALCULATES THE JACCARD DISTANCE BETWEEN TWO N-DIMENSIONAL POINTS.

def Jaccarddistance(x_1,x_2):
    # Input:  Vector n-dimensional x_1.
    #         Vector n-dimensional x_2.
    # Output: Jaccard distance between input points x_1 and x_2.
    
    distance = 1 - (len(np.intersect1d(x_1,x_2))/len(np.union1d(x_1,x_2)))
    
    return distance

In [7]:
# THE FUNCTION CALCULATES THE COSINE DISTANCE BETWEEN TWO N-DIMENSIONAL POINTS.

def Cosinedistance(x_1,x_2):
    # Input:  Vector n-dimensional x_1.
    #         Vector n-dimensional x_2.
    # Output: Cosine distance between input points x_1 and x_2.
    
    distance = 1 - ((np.sum(np.multiply(x_1,x_2)))/(np.sqrt(np.sum(np.square(x_1)))*np.sqrt(np.sum(np.square(x_2)))))
    
    return distance

In [8]:
# THE FUNCTION COMPUTES THE DISTANCES BETWEEN A TEST EXAMPLE AND THE ENTIRE TRAINING SET.

def distancesCalculator(x_test,X_train,distance):
    # Input:  Single test example x_test.
    #         Matrix of the entire training set X_train.
    # Output: List of distances between the test example x_test and the entire training set X_train.
    
    distances = []
    
    for j in range(X_train.shape[1]):
        x_train = X_train[:,j]
        
        if distance == "Euclidean":
            dist = Euclideandistance(x_test,x_train)
        elif distance == "Manhattan":
            dist = Manhattandistance(x_test,x_train)
        elif distance == "Minkowski":
            dist = Minkowskidistance(x_test,x_train,p=3)
        elif distance == "Hamming":
            dist = Hammingdistance(x_test,x_train)
        elif distance == "Jaccard":
            dist = Jaccarddistance(x_test,x_train)
        elif distance == "Cosine":
            dist = Cosinedistance(x_test,x_train)
        
        distances.append(dist)
    
    return distances

In [9]:
# GIVEN THE DISTANCES BETWEEN A TEST EXAMPLE AND THE TRAINING EXAMPLE, THE FUNCTIONS ESTIMATES ITS LABEL.

def labelEstimator(distances,X_train,y_train,K):
    # Input:  List of distances between one test example and the entire training set X_train.
    #         Matrix of the entire training set X_train.
    #         Vector of labels for the training set y_train.
    #         Hyper-parameter K.
    # Output: Estimated label for one test example.
    
    tmp_1 = np.concatenate((y_train,X_train,np.array([distances])),axis=0)
    tmp_sorted = tmp_1[:,tmp_1[-1,:].argsort()]
    candidates = np.array(tmp_sorted[0,0:K])
    (values,counts) = np.unique(candidates,return_counts=True)
    label = values[counts.argmax()]
    
    return label

In [10]:
# THE FUNCTION CLASSIFIES THE TEST/VALIDATION EXAMPLES.

def KNNModel(X_train,y_train,X_test,K=3,distance="Euclidean"):
    # Input:  Matrix of the entire training set X_train.
    #         Vector of labels for the training set y_train.
    #         Matrix of the entire test set X_test.
    #         Hyper-parameter K.
    #         Type of distance which will be used for the K-NN algorithm.
    # Output: Vector of KNN predicted labels y.
    
    y_predict = []
    
    for i in range(X_test.shape[1]):
        x_test = X_test[:,i]
        distances = distancesCalculator(x_test,X_train,distance)
        label = labelEstimator(distances,X_train,y_train,K)
        y_predict.append(int(label))
    
    return y_predict

In [11]:
# THE FUNCTION GIVES DIFFERENT EVALUATION METRICS.

def evalModelKNN(y_predicted,y_gt):
    # Input:  Vector of Logistic Regression labels Y predicted.
    #         Vector of labels Y.
    # Output: Precision of the results.
    #         Recall of the results.
    #         F1 of the results.
    #         Accuracy of the results.
    
    TP = (y_predicted * y_gt == 1).sum()
    FP = (y_predicted - y_gt == 1).sum()
    TN = (y_predicted + y_gt == 0).sum()
    FN = (y_predicted - y_gt == -1).sum()
    
    Precision = np.round((TP/(TP+FP))*100,decimals=2)
    Recall = np.round((TP/(TP+FN))*100,decimals=2)
    F1 = np.round(2/((1/Precision)+(1/Recall)),decimals=2)
    Accuracy = np.round(((TP+TN)/(TP+TN+FP+FN))*100,decimals=2)
    
    return Precision, Recall, F1, Accuracy