In [77]:
import pandas 
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import sklearn.metrics as metrics
# dataset's link : https://github.com/mwaskom/seaborn-data/blob/master/penguins.csv

In [78]:
# check if there are null values in our dataset.
dataset=pandas.read_csv('penguins.csv')
dataset.isnull().sum()

species               0
island                0
bill_length_mm        2
bill_depth_mm         2
flipper_length_mm     2
body_mass_g           2
sex                  11
dtype: int64

In [79]:
# use dropna() function to delete rows with null values.
dataset=dataset.dropna()
dataset.isnull().sum()

species              0
island               0
bill_length_mm       0
bill_depth_mm        0
flipper_length_mm    0
body_mass_g          0
sex                  0
dtype: int64

In [80]:
# choose labels and features
labels=dataset['species']
features=dataset[['bill_length_mm','bill_depth_mm']]

In [81]:
train_features, test_features, train_labels, test_labels = train_test_split( 
            features, labels, test_size=0.2, random_state=123)

In [82]:
# now let's train our model with neighbors count 1-10
# create array which will contain worst and best accuracy
accuracy_array=[]
for neighbors  in range(1,11):
    for weight_type in ['uniform','distance']:
        knn_penguins=KNeighborsClassifier(n_neighbors=neighbors,weights=weight_type)
        knn_penguins.fit(train_features,train_labels)
        predicted_labels=knn_penguins.predict(test_features)
        accuracy_array.append(metrics.accuracy_score(test_labels,predicted_labels))

In [83]:
# print best and worst accuracy
print('Best accuracy: {:.6f}'.format(max(accuracy_array))) 
print('Worst accuracy: {:.6f}'.format(min(accuracy_array)))

Best accuracy: 0.940299
Worst accuracy: 0.895522
