In [None]:
# Cell 1: Setup, Import Libraries, and Load Data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# 1. Load the Dataset
# 'iris' is a dictionary-like object holding the data
iris = load_iris()

# 2. Convert to a Pandas DataFrame
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

# 3. Add the target (species) columns for clarity
df['species'] = iris.target
df['species_name'] = df['species'].apply(lambda x: iris.target_names[x])

# Display the first 5 rows to inspect the data
print("--- Dataset Head ---")
print(df.head())
print("\n--- Species Distribution ---")
print(df['species_name'].value_counts())

In [None]:
# Cell 2: Exploratory Data Analysis (EDA)

# Plotting Petal Length vs. Petal Width, colored by species
plt.figure(figsize=(9, 6))
sns.scatterplot(
    x='petal length (cm)',
    y='petal width (cm)',
    hue='species_name',
    data=df,
    palette='viridis',
    s=120  # Size of the dots
)
plt.title('Iris Data: Petal Dimensions by Species')
plt.xlabel('Petal Length (cm)')
plt.ylabel('Petal Width (cm)')
plt.legend(title='Species')
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

In [None]:
# Cell 3: Data Preprocessing and Splitting

from sklearn.model_selection import train_test_split

# X = Features (all 4 measurements)
X = df.drop(['species', 'species_name'], axis=1)

# y = Target (the numerical species label)
y = df['species']

# Split the data: 80% for training the model, 20% for testing its accuracy
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Verify the sizes of the resulting datasets
print(f"Total samples: {len(X)}")
print(f"Training set size: {len(X_train)} samples")
print(f"Testing set size: {len(X_test)} samples")

In [None]:
# Cell 4: Model Training (K-Nearest Neighbors)

from sklearn.neighbors import KNeighborsClassifier

# 1. Initialize the KNN model
knn_model = KNeighborsClassifier(n_neighbors=3)

# 2. Train the model using the training data
knn_model.fit(X_train, y_train)

print("K-Nearest Neighbors Model training complete!")

In [None]:
# Cell 5: Evaluation and Prediction

from sklearn.metrics import accuracy_score, confusion_matrix

# 1. Make predictions on the test set
y_pred = knn_model.predict(X_test)

# 2. Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy on Test Data: {accuracy * 100:.2f}%")

# 3. View the Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\n--- Confusion Matrix ---")
print(cm)

# 4. Demonstrate a single prediction (Predict a new, unseen flower)
# Example measurements (Sepal L, Sepal W, Petal L, Petal W)
new_flower_data = np.array([[6.3, 3.3, 6.0, 2.5]]) # This is typically a Virginica

prediction_result = knn_model.predict(new_flower_data)
predicted_species = iris.target_names[prediction_result[0]]

print(f"\nPrediction for new flower (measurements: {new_flower_data[0]}):")
print(f"-> Predicted Species: {predicted_species}")