In [None]:
!pip install scikit-learn
!pip install pandas
!pip install matplotlib
!pip install seaborn

# Import libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# The Iris dataset is a classic and very easy multi-class classification
# dataset that has been widely used for machine learning testing and
# experimentation. It was introduced by the British statistician and
# biologist Ronald Fisher in 1936.
#
# Content: The dataset contains 150 observations of iris flowers.
# There are 50 observations for each of the three species of iris:
#    Iris setosa, Iris virginica, and Iris versicolor.
#
# Features: For each observation, the dataset includes four features:
#    Sepal length (in cm)
#    Sepal width (in cm)
#    Petal length (in cm)
#    Petal width (in cm)

# Load the Iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
          test_size=0.3, random_state=42)  # 70% training and 30% testing

# Initialize the KNN classifier with k=5 neighbors
knn = KNeighborsClassifier(n_neighbors=5)

# Fit the classifier to the training data
knn.fit(X_train, y_train)

# Predict the labels of the test data
y_pred = knn.predict(X_test)

# Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Print the confusion matrix
print("Confusion Matrix:")
# print(cm)

# Plotting the confusion matrix using seaborn for better visualization
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=iris.target_names,
            yticklabels=iris.target_names)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Matrix Shape: The matrix is 3x3, indicating that there are three distinct
# classes in the classification problem.

# Diagonal Elements (19, 13, 13): These numbers represent the True Positives
# for each class:
# The first row and column intersection [19] means that 19 instances of
# the first class were correctly predicted as the first class.

# The second [13] in the middle of the matrix shows that 13 instances of
# the second class were correctly predicted as the second class.

# The third [13] indicates that 13 instances of the third class were
# correctly identified as the third class.

# Off-Diagonal Elements (0s): All the off-diagonal elements are 0,
# indicating there were no misclassifications. In other words, no instance
# of one class was mistakenly predicted as another class.
