### k-Nearest Neighbors (kNN)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from io import StringIO

### Make fake data

In [None]:
data_string = """
x1,   x2, y
 .1, -.1, 0
-.3,  .2, 1
-.4,   0, 1
-.7,  .3, 0
 .1,  .7, 0
-.7,  .9, 1
-.8,  .8, 1
"""
df = pd.read_csv(StringIO(data_string), sep='\\s*,\\s*', engine='python')
X = df[['x1', 'x2']].iloc[:, 0:2].to_numpy()
y = df['y'].to_numpy()
print(f'df=\n{df},\nX=\n{X},\ny={y}')

### Plot data

In [None]:
# draw points
plt.plot(df.x1[df.y == 0], df.x2[df.y == 0], '^r', label='0') # red triangles
plt.plot(df.x1[df.y == 1], df.x2[df.y == 1], 'sb', label='1') # blue squares
plt.plot(0, 0, 'og', label='unknown') # green dot
plt.text(x=0, y=.07, s='?', color='green', fontsize='x-large') # green question mark
             
# draw circles to contain 1, 3, and 5 points
theta = np.linspace(start=0, stop=2*np.pi, num=100)
radius = [.25, .5, 1]
linestyle = ['solid', 'dashed', 'dashdot', 'dotted']
circle_color = ['red', 'blue', 'red', 'blue']

for i in range(len(radius)):
    plt.plot(radius[i] * np.cos(theta), radius[i] * np.sin(theta),
             linestyle=linestyle[i], color=circle_color[i])
plt.axis('square')
plt.legend(loc='lower right')
plt.savefig(fname='kNN.png')
plt.show(block=False)

### kNN classifier

In [None]:
k_values = [1, 3, 5, 7]
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k, metric='euclidean')
    knn.fit(X, y)
    print(f'For k={k}, predict green is {knn.predict([[0, 0]])[0]}.')

### kNN regressor

In [None]:
k_values = [1, 3, 5, 7]
for k in k_values:
    knn = KNeighborsRegressor(n_neighbors=k, metric='euclidean')
    knn.fit(X, y)
    print(f'For k={k}, predict green is {knn.predict([[0, 0]])[0]:.3}.')

### Weighted kNN classifier
Recall that with unweighted kNN classifier, above, we saw "For k=3, predict green is 1."

In [None]:
k = 3
knn = KNeighborsClassifier(n_neighbors=k, weights='distance', metric='euclidean')
knn.fit(X, y)
print(f'For k={k}, predict green is {knn.predict([[0, 0]])[0]}.')

Inspect the distances:

In [None]:
distances, indices = knn.kneighbors([[0, 0]]) # retrieve distances to and indices of kNN
with np.printoptions(precision=1): # set precision for this block only
    print(f'indices={indices}\n' + f'y[indices]={y[indices]}\n' +
          f'distances={distances}\n' + f'1/distances={1/distances}')

With unweighted 3-NN, we get 1 (blue). With weighted 3-NN, we get 0 (red) because the red's weight is greater than the sum of the two blue weights.

### Here is a more natural k-NN regression example.


In [None]:
# Here is a more natural example of kNN regression.
x = np.array([1, 2, 3, 5])
y = np.array([1, 3, 2, 4])

k_values = [1, 2, 3, 4]
for k in k_values:
    plt.plot(x, y, 'o')
    plt.title(f'k={k}')
    kNN = KNeighborsRegressor(n_neighbors=k, metric='euclidean')
    X = x.reshape(-1, 1)
    kNN.fit(X, y)
    xplot = np.linspace(start=0, stop=6)
    yplot = kNN.predict(xplot.reshape(-1, 1))
    plt.plot(xplot, yplot)
    plt.show(block=False)