In [36]:
from collections import Counter
import pandas as pd
import numpy as np

In [37]:
df = pd.read_csv("iris_dataset.csv")
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [38]:
all_features = ["sepal length (cm)", "sepal width (cm)", 
                "petal length (cm)", "petal width (cm)"]
x = df[all_features].values
y = df["target"].values

In [39]:
x_train = x[:-1]
y_train = y[:-1]
x_test = x[-1]
y_test_actual = y[-1]

In [40]:
for i, feature in enumerate(all_features):
    print(f"{feature}: {x_test[i]:.2f}")
print(f"Real class: {y_test_actual}")

sepal length (cm): 5.90
sepal width (cm): 3.00
petal length (cm): 5.10
petal width (cm): 1.80
Real class: 2


In [44]:
def knn_predict(func_x_train, func_x_test, k):
    distances = []
    for index in range(len(func_x_train)):
        func_dist = np.sqrt(np.sum((func_x_test - func_x_train[index])**2))
        distances.append((func_dist, y_train[index]))

    distances.sort(key=lambda x: x[0])
    k_nearest = distances[:k]

    k_classes = [neighbor[1] for neighbor in k_nearest]
    vote_counts = Counter(k_classes)
    predicted_class = vote_counts.most_common(1)[0][0]

    return predicted_class, k_nearest

predicted_no_norm, neighbors_no_norm = knn_predict(x_train, x_test, 3)

In [46]:
print("Three nearest neighbors predictions:")
for idx, (dist, class_label) in enumerate(neighbors_no_norm, 1):
    print(f"{idx}. distance: {dist:.4f}, class: {class_label}")

Three nearest neighbors predictions:
1. distance: 0.2828, class: 2
2. distance: 0.3162, class: 2
3. distance: 0.3317, class: 2


In [49]:
print(f"prediction: {predicted_no_norm}")
print(f"real class: {y_test_actual}")
print("correct" if predicted_no_norm == y_test_actual else "wrong")

prediction: 2
real class: 2
correct


In [53]:
x_mean = np.mean(x_train, axis=0)
standard_deviation = np.std(x_train, axis=0)

In [57]:
for i, feature in enumerate(all_features):
    print(f"{feature}:")
    print(f"mean: {x_mean[i]:.4f}")
    print(f"standard deviation: {standard_deviation[i]:.4f}")
    print("")

sepal length (cm):
mean: 5.8430
standard deviation: 0.8281

sepal width (cm):
mean: 3.0577
standard deviation: 0.4358

petal length (cm):
mean: 3.7490
standard deviation: 1.7618

petal width (cm):
mean: 1.1953
standard deviation: 0.7606

