In [1]:
# Classification using KNN

import numpy as np
import matplotlib.pyplot as plt

X_train = np.array([
    [158, 64],
    [170, 86],
    [183, 84],
    [191, 80],
    [155, 49],
    [163, 59],
    [180, 67],
    [158, 54],
    [170, 67]
])

y_train = ['male', 'male', 'male', 'male', 
           'female', 'female', 'female', 'female', 'female']

plt.figure()
plt.title('Human Heights and Weights by Sex')
plt.xlabel('Height in cm')
plt.ylabel('Weight in kg')

for i, x in enumerate(X_train):
    plt.scatter(x[0], x[1], c='k', marker='x' if y_train[i] == 'male' else 'D')

plt.grid(True)
plt.show()

<Figure size 640x480 with 1 Axes>

In [2]:
# calculating the euclidean distance of a sample instance (155cm, 70kg)
# to figure out the nearest neighbors

X_test = np.array([[155, 70]])
distances = np.sqrt(np.sum((X_train - X_test)**2, axis=1))
print(distances)

[ 6.70820393 21.9317122  31.30495168 37.36308338 21.         13.60147051
 25.17935662 16.2788206  15.29705854]


In [4]:
# identify the indices and response values of the nearest neighbors
nearest_neighbor_indices = distances.argsort()[:3]
nearest_neighbor_genders = np.take(y_train, nearest_neighbor_indices)
print(nearest_neighbor_genders)

['male' 'female' 'female']


In [5]:
# use code to count and identify the feature vector class with maximum counts
from collections import Counter
b = Counter(np.take(y_train, nearest_neighbor_indices))
# print(b.most_common(1)[0][0])
print(b.most_common(1)[0][0])

female


In [6]:
# implementing the KNN classifier using scikit-learn
from sklearn.preprocessing import LabelBinarizer
from sklearn.neighbors import KNeighborsClassifier

lb = LabelBinarizer()
y_train_binarized = lb.fit_transform(y_train)
print(y_train_binarized)

[[1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]]


In [7]:
# implementing KNN now
K = 3
clf = KNeighborsClassifier(n_neighbors=K)
clf.fit(X_train, y_train_binarized.reshape(-1))

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')

In [12]:
y_pred_binarized = clf.predict(np.array([155, 70]).reshape(1, -1))[0]
y_pred = lb.inverse_transform(y_pred_binarized)
print(y_pred[0])

female
