In [21]:
# K-NEAREST NEIGHBORS

from sklearn import datasets
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV

In [6]:
# FINDING AN OBSERVATION'S NEAREST NEIGHBORS

# load data
iris = datasets.load_iris()
features = iris.data

# create standardizer
standardizer = StandardScaler()

# standardize features
features_standardized = standardizer.fit_transform(features)

# two nearest neighbors 
nearest_neighbors = NearestNeighbors(n_neighbors=2).fit(features_standardized)

# create a new observation
new_observation = [1, 1, 1, 1]

# find distances and indices of the observation's nearest neighbors
distances, indices = nearest_neighbors.kneighbors([new_observation])

# view the nearest neighbors
features_standardized[indices]

array([[[1.03800476, 0.55861082, 1.10378283, 1.18556721],
        [0.79566902, 0.32841405, 0.76275827, 1.05393502]]])

In [8]:
# change distance metric from minkowski to euclidean
nearestneighbors_euclidean = NearestNeighbors(n_neighbors=2,
                                             metric='euclidean')
nearestneighbors_euclidean = nearestneighbors_euclidean.fit(features_standardized)

In [14]:
# CREATING A K-NEAREST NEIGHBOR CLASSIFIER

# Load data
iris = datasets.load_iris()
X = iris.data
y = iris.target

# create standardizer
standardizer = StandardScaler()

# standardize features
X_std = standardizer.fit_transform(X)

# train a KNN classifier with 5 neighbors
knn = KNeighborsClassifier(n_neighbors=5,
                                 n_jobs=-1)
knn = knn.fit(X_std, y)

# create two observations
new_observations = [[0.75, 0.75, 0.75, 0.75],
                   [1, 1, 1, 1]]

# predict the class of two observations
knn.predict(new_observations)

array([1, 2])

In [15]:
knn.predict_proba(new_observations)

array([[0. , 0.6, 0.4],
       [0. , 0. , 1. ]])

In [20]:
# IDENTIFYING THE BEST NEIGHBORHOOD SIZE

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# create standardizer
standardizer = StandardScaler()

# standardize features
features_standardized = standardizer.fit_transform(features)

# create a KNN classifier
knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1)

# create a pipeline
pipe = Pipeline([('standardizer', standardizer), ('knn', knn)])

# create space of candidate values
search_space = [{'knn__n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]

# create grid search
classifier = GridSearchCV(pipe,
                         search_space,
                         cv=5,
                         verbose=0)
classifier = classifier.fit(features_standardized, target)

# view best neighborhood size
classifier.best_estimator_.get_params()['knn__n_neighbors']

6

In [23]:
# CREATE A RADIUS-BASED NEAREST NEIGHBOR CLASSIFIER

# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target

# Create standardizer
standardizer = StandardScaler()
# Standardize features
features_standardized = standardizer.fit_transform(features)

# train radius neighbors classifier
rnn = RadiusNeighborsClassifier(radius=0.5,
                               n_jobs=-1)
rnn = rnn.fit(features_standardized, target)

# Create two observations
new_observations = [[ 1, 1, 1, 1]]

# predict the class
rnn.predict(new_observations)

array([2])