# KNN

In [1]:
#Setting the dataset
from sklearn.datasets import load_breast_cancer
import pandas as pd
pd.set_option('display.max_columns', None)
data = load_breast_cancer()
features = pd.DataFrame(data.data, columns=[data.feature_names])
label = pd.Series(data.target, name='Target')

In [2]:
label.head(100)

0     0
1     0
2     0
3     0
4     0
     ..
95    0
96    1
97    1
98    1
99    0
Name: Target, Length: 100, dtype: int32

In [15]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

#Normalizing predicting variables
minMaxNorm = MinMaxScaler(feature_range=(0,1))
features_norm = minMaxNorm.fit_transform(features)

#Splitting test and train data
features_train, features_test, label_train, label_test = train_test_split(features_norm, label, test_size=0.3, random_state=16)

#Setting the model and training it
model = KNeighborsClassifier(n_neighbors=5)
model.fit(features_train, label_train)

#Score
result = model.score(features_test, label_test)
print(f'Accuracy {result:>10.3f}')

Accuracy      0.965


### FINE TUNING THE PARAMETERS

In [19]:
import numpy as np
from sklearn.model_selection import GridSearchCV

#Defining the variables values to be used in the KNN
k_values = np.array([3,5,7,9,11])
distance_type_metric = ['minkowski','chebyshev']
p_values = np.array([1,2,3,4])
grid_values = {'n_neighbors': k_values, 'metric': distance_type_metric, 'p':p_values}

#Model setting
model2 = KNeighborsClassifier()

#Grid Creation
gridKNN = GridSearchCV(estimator=model2, param_grid=grid_values, cv=5)
gridKNN.fit(features_norm, label)

#Printing results
print(f'Best Accuracy: {gridKNN.best_score_:.3f}')
print(f'Best K: {gridKNN.best_estimator_.n_neighbors}')
print(f'Best Distance: {gridKNN.best_estimator_.metric}')
print(f'Best P Value: {gridKNN.best_estimator_.p}')

Best Accuracy: 0.970
Best K: 3
Best Distance: minkowski
Best P Value: 1
