markdown
# K-Nearest Neighbors (KNN) Algorithm

K-Nearest Neighbors (KNN) is a simple, non-parametric, supervised machine learning algorithm used for classification and regression tasks. It works by finding the "k" nearest data points to a given query point in a feature space and making predictions based on their majority class (for classification) or their average (for regression).

## Use Cases

### Classification:
- Medical Diagnosis: Classifying diseases based on patient symptoms.
- Image Recognition: Identifying objects or patterns in images.
- Spam Detection: Classifying emails as spam or non-spam.

### Regression:
- Stock Price Prediction: Predicting future stock prices based on historical data.
- Real Estate: Estimating house prices based on nearby properties features.

### Recommendation Systems:
- Finding similar users or items in collaborative filtering systems.

### Anomaly Detection:
- Detecting unusual patterns in fraud detection or network intrusion.

In [49]:
import numpy as np
import pandas as pd

In [55]:
data = pd.read_csv("iris.csv")
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [57]:
data.shape

(150, 6)

In [61]:
data['Species'].value_counts()

Species
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: count, dtype: int64

In [67]:
data.columns

Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')

In [69]:
data.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [71]:
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [79]:
x = data.iloc[: , 1:5]
x.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [83]:
y = data.iloc[: , -1]
y.head()

0    Iris-setosa
1    Iris-setosa
2    Iris-setosa
3    Iris-setosa
4    Iris-setosa
Name: Species, dtype: object

In [87]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [89]:
x = scaler.fit_transform(x)
x[0:5]

array([[-0.90068117,  1.03205722, -1.3412724 , -1.31297673],
       [-1.14301691, -0.1249576 , -1.3412724 , -1.31297673],
       [-1.38535265,  0.33784833, -1.39813811, -1.31297673],
       [-1.50652052,  0.10644536, -1.2844067 , -1.31297673],
       [-1.02184904,  1.26346019, -1.3412724 , -1.31297673]])

In [91]:
from sklearn.model_selection import train_test_split
x_train, x_test , y_train , y_test = train_test_split(x,y,test_size=0.2)

In [93]:
x_train.shape

(120, 4)

In [97]:
x_test.shape

(30, 4)

In [103]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors = 1)
model.fit(x_train,y_train)

In [107]:
pred = model.predict(x_test)

In [109]:
pred[0:5]

array(['Iris-setosa', 'Iris-virginica', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-setosa'], dtype=object)

In [111]:
y_test[0:5]

38         Iris-setosa
144     Iris-virginica
74     Iris-versicolor
87     Iris-versicolor
20         Iris-setosa
Name: Species, dtype: object

In [117]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test,pred)
accuracy

1.0

In [121]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,pred)
cm

array([[11,  0,  0],
       [ 0,  9,  0],
       [ 0,  0, 10]], dtype=int64)

In [123]:
result = pd.DataFrame(data=[y_test.values,pred], index = ['y_test','pred'])
result.transpose()

Unnamed: 0,y_test,pred
0,Iris-setosa,Iris-setosa
1,Iris-virginica,Iris-virginica
2,Iris-versicolor,Iris-versicolor
3,Iris-versicolor,Iris-versicolor
4,Iris-setosa,Iris-setosa
5,Iris-setosa,Iris-setosa
6,Iris-setosa,Iris-setosa
7,Iris-setosa,Iris-setosa
8,Iris-setosa,Iris-setosa
9,Iris-versicolor,Iris-versicolor


In [127]:
correct_sum = []
for i in range(1,20):
    model = KNeighborsClassifier(n_neighbors = i)
    model.fit(x_train,y_train)
    pred = model.predict(x_test)
    correct = np.sum(pred == y_test)
    correct_sum.append(correct)

correct_sum

[30, 29, 29, 28, 29, 28, 29, 28, 29, 28, 30, 30, 30, 29, 29, 29, 29, 29, 29]

In [129]:
result = pd.DataFrame(data=correct_sum)
result.index = result.index+1
result.T

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,30,29,29,28,29,28,29,28,29,28,30,30,30,29,29,29,29,29,29
