## This is where I import the libaries I am going to use:

In [1]:
import pandas as pd
import numpy as np
import math

## Importing data:

In [2]:
##Reading both CSV files
data = pd.read_csv('Sonar/sonar_train.csv')
test = pd.read_csv('Sonar/sonar_test.csv')

##Seperating the labels and datapoints from files
x_train = data.drop(['Class'],axis=1)
y_train = data['Class']


x_test = test.drop(['Class'],axis=1)
y_test = test['Class']




## Euclidean distance algorithm

In [3]:
def euclidean(point_one,point_two):
    '''Working out the euclidean distance'''
    x = 0
    for i in range(len(point_one)):
        x += math.pow(abs(point_one[i] - point_two[i]),2)
    return math.sqrt(x)


## Manhatten Distance 

In [4]:
def manhattan(point_one,point_two):
    '''Working out manhattan distance'''
    x = 0
    for i in range(len(point_one)):
        x += abs(point_one[i] - point_two[i])
    return x

## Minkowski

In [5]:
def minkowski(point_one, point_two, q=3):
    '''Working out minkoski distance'''
    x = 0
    for i in range(len(point_one)):
        x += math.pow(abs(point_one[i] - point_two[i]),q)
    return math.pow(x,1/q)


In [6]:
def finder(arr):
    '''Function finding the lowest distance in array of distances'''
    smallest = np.zeros(2) #Creating array to store the smallest index and class
    smallest[0] = arr[0][0]   #Storing the first class and index within it
    smallest[1] = arr[0][1]
    
    for i in range(1,arr.shape[0]): #Searching to find the smallest distance and storing the index and class
        if(arr[i][1] < smallest[1]):
            smallest[0] = arr[i][0]
            smallest[1] = arr[i][1]
    return smallest #Returning the smallest element
            
    

In [7]:
def nearestpoint(test_point,train_x,train_y,algo):
    '''Function finding the lowest distance by finding the distance in all points then finding then selecting the
    smallest one'''
    distances = np.zeros((len(train_x),2))#The place that the nearest neaigbour is and the index
    for i in range(len(distances)):
        distances[i][0] = i
    
    
    for i in range(len(train_x)): #Going though all the elements in train and using the the chosen equation
        if(algo[0] == "Manhattan"):#and finding the distances from every point
            distances[i][1] = manhattan(test_point,train_x.iloc[i]) 
          
        elif(algo[0] == "Minkowski"):#Doing this to prevent errors
            try:
                distances[i][1] = minkowski(test_point,train_x.iloc[i],algo[1])
            except:
                distances[i][1] = minkowski(test_point,train_x.iloc[i])
        elif(algo[0] == "Euclidean"):
            distances[i][1] = euclidean(test_point,train_x.iloc[i])
            
    closest = finder(distances) #finding the smallest distance
    
    return train_y.iloc[int(closest[0])] #Returning the class 
    
            ##Finish nearest point and finder

## Nearest Neigbour

In [8]:
def nn(train_x,train_y,test_x,algo):
    '''going through all samples in the array and finding the nearest neigbour'''
    output = np.empty(len(test_x),dtype='str') #Place to store output predictions of the algorithm on the test data
    
    if(algo[0] == 'Minkowski' and len(algo) != 2): #Something for if user inputs minkoski and there is no order                                                     
        print("No q was specified therefore default of 3 is applied") #stated
    
    for i in range(len(test_x)):  #Going through each record in the test data and finding the nearest neigbour
        output[i] = nearestpoint(test_x.iloc[i],train_x,train_y,algo)#Storing it in output
    
    return output
    

In [9]:
def accuracy(actual,pred):
    correct = 0
    for i in range(len(pred)):
        if(actual[i] == pred[i]):
            correct += 1
    return correct/len(pred)




In [10]:
def sensitivity(a_class,p_class,Class):
    tp = 0 #True pos
    tpfn = 0 #true pos + false neg
    for i in range(len(p_class)):#Going through all elements in both data sets 
        if(p_class[i] == a_class[i] == Class):
            tp += 1
        if(a_class[i] == Class):
            tpfn += 1
    sens = tp / tpfn
    return sens

In [11]:
def specifity(a_class,p_class,Class):
    tn = 0 #true negative
    tnfp = 0 #true negative + false postive
    for i in range(len(p_class)):
        if(p_class[i] == a_class[i] != Class):
            tn += 1
        if(a_class[i] != Class):
            tnfp += 1
    return tn/tnfp

In [12]:
def precision(a_class,p_class,Class):
    tp = 0 #True postivr
    tpfp = 0 #True pos + false pos
    for i in range(len(p_class)):
        if(p_class[i] == a_class[i] == Class):
            tp += 1
        if(p_class[i] == Class):
            tpfp += 1
    return tp/tpfp

## Manhatten Distance

In [13]:
man = nn(x_train,y_train,x_test,["Manhattan"])

In [14]:
print('The accuracy of manhattan distance is ' + str(accuracy(y_test,man))+ '.')
print('The sensitivity of manhattan distance is ' + str(sensitivity(y_test,man,'M'))+ '.')
print('The specificity of manhattan distance is ' + str(specifity(y_test,man,'M'))+ '.')
print('The precision of manhattan distance is ' + str(precision(y_test,man,'M'))+ '.')

The accuracy of manhattan distance is 0.8840579710144928.
The sensitivity of manhattan distance is 0.9459459459459459.
The specificity of manhattan distance is 0.8125.
The precision of manhattan distance is 0.8536585365853658.


## Euclidean Distance

In [15]:
euc = nn(x_train,y_train,x_test,["Euclidean"])

In [16]:
print('The accuracy of euclidean distance is ' + str(accuracy(y_test,euc))+ '.')
print('The sensitivity of euclidean distance is ' + str(sensitivity(y_test,euc,'M'))+ '.')
print('The specificity of euclidean distance is ' + str(specifity(y_test,euc,'M'))+ '.')
print('The precision of euclidean distance is ' + str(precision(y_test,euc,'M'))+ '.')

The accuracy of euclidean distance is 0.8985507246376812.
The sensitivity of euclidean distance is 0.972972972972973.
The specificity of euclidean distance is 0.8125.
The precision of euclidean distance is 0.8571428571428571.


## Minkowski Distance (Q = 1)

In [17]:
mink_1 = nn(x_train,y_train,x_test,["Minkowski",1])

In [18]:
print('The accuracy of Minkowski distance is ' + str(accuracy(y_test,mink_1))+ '.')
print('The sensitivity of Minkowski distance is ' + str(sensitivity(y_test,mink_1,'M'))+ '.')
print('The specificity of Minkowski distance is ' + str(specifity(y_test,mink_1,'M'))+ '.')
print('The precision of Minkowski distance is ' + str(precision(y_test,mink_1,'M'))+ '.')

The accuracy of Minkowski distance is 0.8840579710144928.
The sensitivity of Minkowski distance is 0.9459459459459459.
The specificity of Minkowski distance is 0.8125.
The precision of Minkowski distance is 0.8536585365853658.


## Minkowski Distance (Q = 2)

In [19]:
mink_2 = nn(x_train,y_train,x_test,["Minkowski",2])

In [20]:
print('The accuracy of Minkowski distance is ' + str(accuracy(y_test,mink_2))+ '.')
print('The sensitivity of Minkowski distance is ' + str(sensitivity(y_test,mink_2,'M'))+ '.')
print('The specificity of Minkowski distance is ' + str(specifity(y_test,mink_2,'M'))+ '.')
print('The precision of Minkowski distance is ' + str(precision(y_test,mink_2,'M'))+ '.')

The accuracy of Minkowski distance is 0.8985507246376812.
The sensitivity of Minkowski distance is 0.972972972972973.
The specificity of Minkowski distance is 0.8125.
The precision of Minkowski distance is 0.8571428571428571.


### Testing


In [21]:
#import k-nn classifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
import operator


knn=KNeighborsClassifier(n_neighbors=1, metric='manhattan')

knn.fit(data.loc[:,data.columns != 'Class'],data.loc[:,'Class'])

pred = knn.predict(test.loc[:,test.columns  != 'Class'])

print(accuracy_score(pred,test.loc[:,'Class']))

print(precision_score(y_test,pred,average='binary',pos_label='M'))

print(recall_score(y_test,pred,average='binary',pos_label='M'))

0.8840579710144928
0.8536585365853658
0.9459459459459459
