In [6]:
# 15.1 Finding an Observation’s Nearest Neighbors

from sklearn import datasets
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

iris = datasets.load_iris()
features = iris.data
standardizer = StandardScaler()


features_standardized = standardizer.fit_transform(features)
# Two nearest neighbors
nearest_neighbors = NearestNeighbors(n_neighbors=2).fit(features_standardized)

new_observations = [1,1,1,1]
# Find distances and indices of the observation's nearest neighbors
distances, indices = nearest_neighbors.kneighbors([new_observations])
# View the nearest neighbors
features_standardized[indices]

array([[[1.03800476, 0.55861082, 1.10378283, 1.18556721],
        [0.79566902, 0.32841405, 0.76275827, 1.05393502]]])

In [7]:
#  15.2 Creating a K-Nearest Neighbor Classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1).fit(features_standardized, iris.target)
new_observations = [[ 0.75, 0.75, 0.75, 0.75],
[ 1, 1, 1, 1]]
# Predict the class of two observations
knn.predict(new_observations)

array([1, 2])

In [10]:
from sklearn.model_selection import GridSearchCV

# 15.3 Identifying the Best Neighborhood Size

from sklearn.pipeline import Pipeline, FeatureUnion

iris = datasets.load_iris()
features = iris.data
target = iris.target

std_scaler = StandardScaler()
features_standardized = std_scaler.fit_transform(features)
knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1)
pipe = Pipeline([('standardizer', std_scaler), ('knn',knn)])
params_grid = {'knn__n_neighbors':[1,2,3,4,5,6,7,8,9,10]}
clf = GridSearchCV(pipe, param_grid=params_grid, cv = 5, verbose=0).fit(features_standardized, target)
# Best neighborhood size (k)
clf.best_estimator_.get_params()["knn__n_neighbors"]

6

In [11]:
# 15.4 Creating a Radius-Based Nearest Neighbor Classifier
# Load libraries
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
# Load data
iris = datasets.load_iris()
features = iris.data
target = iris.target
# Create standardizer
standardizer = StandardScaler()
# Standardize features
features_standardized = standardizer.fit_transform(features)
# Train a radius neighbors classifier
rnn = RadiusNeighborsClassifier(
radius=.5, n_jobs=-1).fit(features_standardized, target)
# Create two observations
new_observations = [[ 1, 1, 1, 1]]
# Predict the class of two observations
rnn.predict(new_observations)

array([2])