# 15.1 Finding an Observation's Nearest Neighbors

In KNN classification, 

an observation's class is predicted from the classes of its 'k' neighbors.

In [7]:
from sklearn import datasets
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

# Load data
iris = datasets.load_iris()
features = iris.data

# Create standardizer
standardizer =StandardScaler()

# Standardize features
features_standardized = standardizer.fit_transform(features)

# Two nearest neighoblrs
nearest_neighbors = NearestNeighbors(n_neighbors=2).fit(features_standardized)

# Create an observation
new_observation = [ 1, 1, 1, 1]

# Find distances and indices of the observation's nearest neighbors
distances, indices = nearest_neighbors.kneighbors([new_observation])

# View the nearest neighbors
features_standardized[indices]

array([[[1.03800476, 0.55861082, 1.10378283, 1.18556721],
        [0.79566902, 0.32841405, 0.76275827, 1.05393502]]])

In [10]:
# Find two nearest neighbors based on euclidean distance
nearest_neighbors_euclidean = NearestNeighbors(n_neighbors=2, 
                                               metric='euclidean').fit(features_standardized)
# View distances
distances

array([[0.49140089, 0.74294782]])

In [16]:
# Find each observation's three nearest neighbors
# based on euclidean distance 
nearest_neighbors_euclidean = NearestNeighbors(n_neighbors=3,
                                               metric='euclidean').fit(
    features_standardized)

# List of lists indicating each observation's 3 nearest neighbors
# including itself
nearest_neighbors_with_self = nearest_neighbors_euclidean.kneighbors_graph(
    features_standardized).toarray()

# Remove 1's marking an observation is a nearest neighbor to itself
for i, x in enumerate(nearest_neighbors_with_self):
    x[i] = 0

# View first observation's two nearest neighbors
nearest_neighbors_with_self[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

# 15.2 Creating a K-Nearest Neighbor Classifier

Given an observation of unknown class,

I need to predict its class based on the class of its neighbors.

In [26]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import datasets

# Load data
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Create standardizer
standardizer = StandardScaler()

# Standardize features
X_std = standardizer.fit_transform(X)

# Train a KNN classifier with 5 neighbors
knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1).fit(X_std, y)

# Create two observations
new_observations = [[0.75, 0.75, 0.75, 0.75],
                    [1, 1, 1, 1]]

# Predict the class of two observations
knn.predict(new_observations)

array([1, 2])

The class with the highest probability becomes the predicted class.

In [27]:
# View probability each observation is one of three classes
knn.predict_proba(new_observations)

array([[0. , 0.6, 0.4],
       [0. , 0. , 1. ]])

In [28]:
knn.predict(new_observations)

array([1, 2])

# 15.3 Identifying the Best Neighborhood Size

I want to select the best values for k in knn classifier.

In [44]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Create standardizer
standardizer = StandardScaler()

# Standardized features
features_standardized = standardizer.fit_transform(features)

# Create a KNN classifier 
knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1)

# Create a pipeline
pipe = Pipeline([('standardizer', standardizer), ('knn', knn)])

# Create space of candidate values
search_space = [{'knn__n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]

# Create grid search
classifier = GridSearchCV(pipe, search_space, cv=5, verbose=0).fit(features_standardized, target)

# Best neighborhood size (k)
classifier.best_estimator_.get_params()['knn__n_neighbors']

6

# 15.4 Creating a Radius-Based Nearest Neighbor Classifier

Given an observation of unknown class, 

you need to predict its class based on the class of all observations 

within a certain distance.

In RNN classification, 

an observation's class is predicted from the classes of all observations within a given radius  'r'. 

In [47]:
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import datasets

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Create standardizer
standardizer = StandardScaler()

# Standardize features
features_standardized = standardizer.fit_transform(features)

# Train a radius neighbors classifier
rnn = RadiusNeighborsClassifier(radius=0.5, n_jobs=-1).fit(
    features_standardized, target)

# Create two observations
new_observations = [[1, 1, 1, 1]]

# Predict the class of two observations
rnn.predict(new_observations)

array([2])