<a href="https://colab.research.google.com/github/Temerius/ML/blob/main/KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [104]:
import torch
import numpy as np
import pandas as pd
import random

In [105]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()

In [111]:
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']
X.shape, y.shape, names, feature_names

((150, 4),
 (150,),
 array(['setosa', 'versicolor', 'virginica'], dtype='<U10'),
 ['sepal length (cm)',
  'sepal width (cm)',
  'petal length (cm)',
  'petal width (cm)'])

In [112]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [113]:
d = {feature_names[i]: X_train[:,i] for i in range(4)}
iris_info = pd.DataFrame(d)
iris_info

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,4.9,3.1,1.5,0.1
1,5.9,3.2,4.8,1.8
2,5.8,2.8,5.1,2.4
3,4.6,3.6,1.0,0.2
4,5.5,2.4,3.8,1.1
...,...,...,...,...
115,5.2,3.5,1.5,0.2
116,5.0,2.0,3.5,1.0
117,6.7,2.5,5.8,1.8
118,7.1,3.0,5.9,2.1


In [114]:
target_names = [names[y_train[i]] for i in range(X_train.shape[0])]
iris_info['labels'] = y_train
iris_info['labels_name'] = target_names
iris_info

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),labels,labels_name
0,4.9,3.1,1.5,0.1,0,setosa
1,5.9,3.2,4.8,1.8,1,versicolor
2,5.8,2.8,5.1,2.4,2,virginica
3,4.6,3.6,1.0,0.2,0,setosa
4,5.5,2.4,3.8,1.1,1,versicolor
...,...,...,...,...,...,...
115,5.2,3.5,1.5,0.2,0,setosa
116,5.0,2.0,3.5,1.0,1,versicolor
117,6.7,2.5,5.8,1.8,2,virginica
118,7.1,3.0,5.9,2.1,2,virginica


In [115]:
iris_info.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),labels
count,120.0,120.0,120.0,120.0,120.0
mean,5.859167,3.0575,3.813333,1.2225,1.016667
std,0.796262,0.412751,1.730154,0.757535,0.809433
min,4.4,2.0,1.0,0.1,0.0
25%,5.175,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.4,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.7,4.4,6.7,2.5,2.0


In [116]:
def knn_classifier(object_to_classify, k, plan_matrix, targets):
    distances = torch.sum((object_to_classify.unsqueeze(0) - plan_matrix) ** 2, dim=1)
    _, min_indices = torch.topk(distances, k=k, largest=False)
    k_labels = targets[min_indices]
    label, _ = torch.mode(torch.tensor(k_labels))
    #print(min_indices, k_labels, label, distances)
    return label

In [117]:
def fn_accuracy(true_labels, pred_labels):
    tmp1 = torch.eq(true_labels, pred_labels).sum()
    tmp2 = tmp1.item() / len(pred_labels)
    return tmp2 * 100

In [121]:
pred_labels = torch.ones(len(y_test))
for i , x in enumerate(zip(X_test, y_test)):
    object_to_classify, object_label = x
    pred_labels[i] = knn_classifier(torch.tensor(object_to_classify), 10, X_train, y_train)

In [122]:
fn_accuracy(torch.tensor(y_test), pred_labels)

96.66666666666667

In [120]:
for i in range(32):
    object_to_classify = torch.tensor([random.uniform(4.2,8),
                                       random.uniform(2,4.5),
                                       random.uniform(1,7),
                                       random.uniform(0,2.6)])
    print(f'object {i+1} features: {object_to_classify}')
    object_label = knn_classifier(object_to_classify, 10, X_train, y_train)
    print(f'object {i+1} label: {names[object_label]}', end='\n-----------------\n')

object 1 features: tensor([4.7480, 3.8368, 1.0272, 2.5683])
object 1 label: setosa
-----------------
object 2 features: tensor([6.1175, 3.4774, 4.1844, 0.4554])
object 2 label: versicolor
-----------------
object 3 features: tensor([5.7958, 3.0835, 1.1858, 1.6313])
object 3 label: setosa
-----------------
object 4 features: tensor([6.1910, 4.2392, 6.3958, 2.3592])
object 4 label: virginica
-----------------
object 5 features: tensor([4.5471, 3.0125, 5.7204, 1.0933])
object 5 label: versicolor
-----------------
object 6 features: tensor([7.5250, 3.9452, 2.9982, 1.0831])
object 6 label: versicolor
-----------------
object 7 features: tensor([6.7674, 2.8664, 4.2589, 2.5667])
object 7 label: virginica
-----------------
object 8 features: tensor([6.3659, 4.0473, 1.4018, 1.1021])
object 8 label: setosa
-----------------
object 9 features: tensor([5.2021, 2.3212, 6.8623, 1.6248])
object 9 label: virginica
-----------------
object 10 features: tensor([7.1692, 3.8623, 1.8031, 2.4865])
object 10