# Task 1: Classical ML with Scikit-learn (Iris Dataset)

**Goal:**
- Preprocess the data (handle missing values, encode labels).
- Train a Decision Tree Classifier to predict iris species.
- Evaluate using accuracy, precision, and recall.

In [1]:
# 1. Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

In [2]:
# 2. Load the Iris dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target, name='species')

In [3]:
# Optional: convert numeric target to labels (setosa, versicolor, virginica)
label_encoder = LabelEncoder()
y_labels = label_encoder.fit_transform(y)

In [4]:
# 3. Simulate missing data (for demo purposes)
X.iloc[0, 0] = np.nan
X.iloc[10, 2] = np.nan

In [5]:
# 4. Handle missing values using mean imputation
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

In [6]:
# 5. Train-test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(
    X_imputed, y_labels, test_size=0.2, random_state=42
)

In [7]:
# 6. Initialize and train a Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

DecisionTreeClassifier(random_state=42)

In [8]:
# 7. Make predictions
y_pred = clf.predict(X_test)

In [9]:
# 8. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

In [10]:
# 9. Print evaluation results
print("Model Evaluation Metrics:")
print(f"Accuracy : {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall   : {recall:.2f}")

Model Evaluation Metrics:
Accuracy : 1.00
Precision: 1.00
Recall   : 1.00
