In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
# Load the Iris dataset
iris = datasets.load_iris()

# Create a DataFrame
# X contains the features (measurements)
# y contains the target (species)
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target_names[iris.target], name='species')

# Display the first 5 rows of data
print("Features (X):")
display(X.head())
print("\nTarget labels (y) counts:")
print(y.value_counts())


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")


In [None]:
# Initialize the model (we choose K=3 neighbors)
model = KNeighborsClassifier(n_neighbors=3)

# Train the model using the training data
model.fit(X_train, y_train)

print("Model training complete.")


In [None]:
# Use the trained model to predict the species of the test data
predictions = model.predict(X_test)

# Display the first few predictions vs the actual true values
results_df = pd.DataFrame({'Actual': y_test, 'Predicted': predictions})
print("Comparison of first 5 predictions:")
display(results_df.head())


In [None]:
# Calculate the accuracy score
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy Score: {accuracy * 100:.2f}%")

# Print a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, predictions))

# Print the confusion matrix (shows which specific flowers were misclassified)
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, predictions))
