In [None]:
Import Required Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
Define the Problem Statement
# The Iris dataset contains 150 samples of iris flowers classified into three species: 
# - Setosa
# - Versicolor
# - Virginica
# The goal is to build a classification model that predicts the species based on four features: 
# - Sepal length
# - Sepal width
# - Petal length
# - Petal width

In [None]:
Load the Dataset
iris = datasets.load_iris()
X = iris.data  # Features
y = iris.target  # Target labels


In [None]:
# Convert to DataFrame for better visualization
df = pd.DataFrame(X, columns=iris.feature_names)
df['species'] = y
df['species'] = df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})


In [None]:
Exploratory Data Analysis (EDA)
print(df.head())
print("\nDataset Information:")
print(df.info())
print("\nSummary Statistics:")
print(df.describe())


In [None]:
# Pairplot to visualize the relationship between variables
sns.pairplot(df, hue='species', markers=['o', 's', 'D'])
plt.show()


In [None]:
# Correlation Heatmap
plt.figure(figsize=(8,6))
sns.heatmap(df.iloc[:, :-1].corr(), annot=True, cmap='coolwarm', linewidths=0.5)
plt.title("Feature Correlation Heatmap")
plt.show()


In [None]:
Data Preprocessing
# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
# Standardizing the data (important for KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
Train a Classification Model (KNN)
knn = KNeighborsClassifier(n_neighbors=5)  # Choosing k=5
knn.fit(X_train, y_train)


In [None]:
Model Evaluation
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


In [None]:
# Confusion Matrix
plt.figure(figsize=(5,4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, cmap='Blues', fmt='d', xticklabels=iris.target_names, yticklabels=iris.target_names)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()


In [None]:
Making Predictions
sample = [[5.1, 3.5, 1.4, 0.2]]  # Example input
sample_scaled = scaler.transform(sample)  # Scale input
prediction = knn.predict(sample_scaled)
print(f"Predicted species: {iris.target_names[prediction][0]}")
