# Part 2 - Task 1: Classical ML with Scikit-learn (Iris Dataset)
**Goal:** Preprocess data, train a Decision Tree classifier to predict Iris species, and evaluate using accuracy, precision, and recall.


In [ ]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

# Load dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target).map({i:n for i,n in enumerate(iris.target_names)})

# Quick look
display(X.head())
print("Target samples distribution:\n", y.value_counts())


In [ ]:
# Preprocessing
print("Missing values per column:\n", X.isnull().sum())
le = LabelEncoder()
y_enc = le.fit_transform(y)


In [ ]:
# Train / Test split
X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42, stratify=y_enc)
print("Train shape:", X_train.shape, "Test shape:", X_test.shape)


In [ ]:
# Train Decision Tree
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)


In [ ]:
# Predict & Evaluate
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='macro')
rec = recall_score(y_test, y_pred, average='macro')
print(f"Accuracy: {acc:.4f}")
print(f"Precision (macro): {prec:.4f}")
print(f"Recall (macro): {rec:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))


In [ ]:
# Confusion Matrix Visualization
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,5))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(le.classes_))
plt.xticks(tick_marks, le.classes_, rotation=45)
plt.yticks(tick_marks, le.classes_)
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, cm[i,j], ha="center", va="center", color="white" if cm[i,j] > cm.max()/2 else "black")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
plt.show()


**Screenshot instructions for report:**

- Figure 1: `display(X.head())` showing dataset head.
- Figure 2: Accuracy / metrics printed.
- Figure 3: Confusion matrix plot.
