# Steps for KNN Classification Model:
- Import Required Libraries
- Load and Preprocess the Data
- Split the Data into Training and Testing Sets
- Train the KNN Model
- Make Predictions
- Evaluate the Model

# 1. Import Required Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# 2. Load and Preprocess the Data

In [12]:
# Load dataset
data = pd.read_csv('IRIS.csv')

# View first few rows
print(data.head())


   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [14]:
# Separate features (X) and target (y)
X = data.drop('species', axis=1)
y = data['species']

# Normalize feature data for better performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


# 3. Split the Data into Training and Testing Sets

In [16]:
# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


# 4. Train the KNN Model

In [18]:
# Initialize the KNN classifier with k neighbors
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model
knn.fit(X_train, y_train)


# 5. Make Predictions

In [20]:
# Make predictions on the test set
y_pred = knn.predict(X_test)


# 6. Evaluate the Model

In [22]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Confusion Matrix
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

# Classification Report (Precision, Recall, F1-Score)
print('Classification Report:')
print(classification_report(y_test, y_pred))


Accuracy: 100.00%
Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



# Hyperparameter Tuning for k

In [24]:
# Try different values of k and check performance
for k in range(1, 21):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy with k={k}: {accuracy * 100:.2f}%')


Accuracy with k=1: 96.67%
Accuracy with k=2: 100.00%
Accuracy with k=3: 100.00%
Accuracy with k=4: 100.00%
Accuracy with k=5: 100.00%
Accuracy with k=6: 100.00%
Accuracy with k=7: 100.00%
Accuracy with k=8: 100.00%
Accuracy with k=9: 100.00%
Accuracy with k=10: 100.00%
Accuracy with k=11: 100.00%
Accuracy with k=12: 100.00%
Accuracy with k=13: 100.00%
Accuracy with k=14: 100.00%
Accuracy with k=15: 100.00%
Accuracy with k=16: 100.00%
Accuracy with k=17: 100.00%
Accuracy with k=18: 96.67%
Accuracy with k=19: 96.67%
Accuracy with k=20: 96.67%


# Optional: Cross-Validation for Robust Performance

In [26]:
from sklearn.model_selection import cross_val_score

# Perform 5-fold cross-validation
knn = KNeighborsClassifier(n_neighbors=5)
cv_scores = cross_val_score(knn, X_scaled, y, cv=5)

print(f'Cross-validation scores: {cv_scores}')
print(f'Mean cross-validation score: {np.mean(cv_scores) * 100:.2f}%')


Cross-validation scores: [0.96666667 0.96666667 0.93333333 0.93333333 1.        ]
Mean cross-validation score: 96.00%
