In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [3]:
# Load the Iris dataset
iris = datasets.load_iris()

# Create a DataFrame
# X contains the features (measurements)
# y contains the target (species)
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target_names[iris.target], name='species')

# Display the first 5 rows of data
print("Features (X):")
display(X.head())
print("\nTarget labels (y) counts:")
print(y.value_counts())


Features (X):


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2



Target labels (y) counts:
species
setosa        50
versicolor    50
virginica     50
Name: count, dtype: int64


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")


Training samples: 120
Testing samples: 30


In [5]:
# Initialize the model (we choose K=3 neighbors)
model = KNeighborsClassifier(n_neighbors=3)

# Train the model using the training data
model.fit(X_train, y_train)

print("Model training complete.")


Model training complete.


In [6]:
# Use the trained model to predict the species of the test data
predictions = model.predict(X_test)

# Display the first few predictions vs the actual true values
results_df = pd.DataFrame({'Actual': y_test, 'Predicted': predictions})
print("Comparison of first 5 predictions:")
display(results_df.head())


Comparison of first 5 predictions:


Unnamed: 0,Actual,Predicted
73,versicolor,versicolor
18,setosa,setosa
118,virginica,virginica
78,versicolor,versicolor
76,versicolor,versicolor


In [17]:
# Calculate the accuracy score
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy Score: {accuracy * 100:.2f}%")

# Print a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, predictions))

# Print the confusion matrix (shows which specific flowers were misclassified)
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, predictions))


Accuracy Score: 100.00%

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
