In [1]:
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

#### Loading the dataset

In [2]:
df = pd.read_csv('train.csv')
tf = pd.read_csv('test.csv')

#### Adding the values

In [3]:
X_train = df.iloc[:, :-1].values
y_train = df.iloc[:, -1].values

In [4]:
X_test = tf.iloc[:, 1:-1].values
y_test = tf.iloc[:, -1].values

### Manhattan Distance

In [5]:
classifier = KNeighborsClassifier(n_neighbors=3, p=1)
classifier.fit(X_train, y_train)
y_pred_manhattan = classifier.predict(X_test)
y_pred_manhattan_p = classifier.predict_proba(X_test)
print(y_pred_manhattan_p)

[[0.         1.        ]
 [0.66666667 0.33333333]
 [1.         0.        ]
 [0.         1.        ]
 [1.         0.        ]
 [0.         1.        ]
 [1.         0.        ]
 [1.         0.        ]
 [1.         0.        ]
 [1.         0.        ]
 [0.         1.        ]
 [1.         0.        ]
 [1.         0.        ]
 [1.         0.        ]
 [0.66666667 0.33333333]
 [0.         1.        ]
 [0.         1.        ]
 [1.         0.        ]
 [1.         0.        ]
 [1.         0.        ]]


In [6]:
print("Metrics for Manhattan Distance\n")
print("Confusion Matrix :\n",metrics.confusion_matrix(y_test, y_pred_manhattan),"\n")
print("Accuracy : ", round(metrics.accuracy_score(y_test, y_pred_manhattan)*100,2),"%\n")
print("Precision : ", round(metrics.precision_score(y_test, y_pred_manhattan)*100,2),"%\n")
print("F Score : ", round(metrics.f1_score(y_test, y_pred_manhattan)*100,2),"%\n")

Metrics for Manhattan Distance

Confusion Matrix :
 [[14  0]
 [ 0  6]] 

Accuracy :  100.0 %

Precision :  100.0 %

F Score :  100.0 %



### Euclidean Distance

In [7]:
classifier = KNeighborsClassifier(n_neighbors=3, p=2)
classifier.fit(X_train, y_train)
y_pred_Euclidian = classifier.predict(X_test)
y_pred_Euclidian_p = classifier.predict_proba(X_test)
print(y_pred_Euclidian_p)

[[0.         1.        ]
 [0.66666667 0.33333333]
 [1.         0.        ]
 [0.         1.        ]
 [1.         0.        ]
 [0.         1.        ]
 [1.         0.        ]
 [1.         0.        ]
 [1.         0.        ]
 [1.         0.        ]
 [0.         1.        ]
 [1.         0.        ]
 [1.         0.        ]
 [1.         0.        ]
 [0.33333333 0.66666667]
 [0.         1.        ]
 [0.         1.        ]
 [1.         0.        ]
 [1.         0.        ]
 [1.         0.        ]]


In [8]:
print("Metrics for Euclidean Distance\n")
print("Confusion Matrix :\n",metrics.confusion_matrix(y_test, y_pred_Euclidian),"\n")
print("Accuracy : ", round(metrics.accuracy_score(y_test, y_pred_Euclidian)*100,2),"%\n")
print("Precision : ", round(metrics.precision_score(y_test, y_pred_Euclidian)*100,2),"%\n")
print("F Score : ", round(metrics.f1_score(y_test, y_pred_Euclidian)*100,2),"%\n")

Metrics for Euclidean Distance

Confusion Matrix :
 [[13  1]
 [ 0  6]] 

Accuracy :  95.0 %

Precision :  85.71 %

F Score :  92.31 %



## From this output we can say that both the methods have different outputs. Mahatten Distance gives accuracy, precision and F score of 100% with the given dataset compared to Euclidian. 

# But when we use standardScalar we obtain the same results. With the help of below Accuracy Precision and F score we can say that using Manhattan or Euclidean yeilds same performance when StandardScalar is used 

In [9]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [10]:
classifier = KNeighborsClassifier(n_neighbors=3, p=1)
classifier.fit(X_train, y_train)
y_pred_manhattan = classifier.predict(X_test)
y_pred_manhattan_p = classifier.predict_proba(X_test)
print("Metrics for Manhattan Distance\n")
print("Confusion Matrix :\n",metrics.confusion_matrix(y_test, y_pred_manhattan),"\n")
print("Accuracy : ", round(metrics.accuracy_score(y_test, y_pred_manhattan)*100,2),"%\n")
print("Precision : ", round(metrics.precision_score(y_test, y_pred_manhattan)*100,2),"%\n")
print("F Score : ", round(metrics.f1_score(y_test, y_pred_manhattan)*100,2),"%\n")

Metrics for Manhattan Distance

Confusion Matrix :
 [[14  0]
 [ 0  6]] 

Accuracy :  100.0 %

Precision :  100.0 %

F Score :  100.0 %



In [11]:
classifier = KNeighborsClassifier(n_neighbors=3, p=2)
classifier.fit(X_train, y_train)
y_pred_Euclidian = classifier.predict(X_test)
y_pred_Euclidian_p = classifier.predict_proba(X_test)
print("Metrics for Euclidean Distance\n")
print("Confusion Matrix :\n",metrics.confusion_matrix(y_test, y_pred_Euclidian),"\n")
print("Accuracy : ", round(metrics.accuracy_score(y_test, y_pred_Euclidian)*100,2),"%\n")
print("Precision : ", round(metrics.precision_score(y_test, y_pred_Euclidian)*100,2),"%\n")
print("F Score : ", round(metrics.f1_score(y_test, y_pred_Euclidian)*100,2),"%\n")

Metrics for Euclidean Distance

Confusion Matrix :
 [[14  0]
 [ 0  6]] 

Accuracy :  100.0 %

Precision :  100.0 %

F Score :  100.0 %

