In [1]:
# Iris Species Classification with Decision Tree
#
# This script loads the Iris dataset, preprocesses it (handles missing values and encodes labels),
# trains a Decision Tree Classifier, and evaluates it using accuracy, precision, and recall.

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

# 1. Load the Iris Dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target, name="species")

# For demonstration, let's assume there could be missing values. Introduce some randomly:
np.random.seed(42)
missing_mask = np.random.rand(*X.shape) < 0.05
X[missing_mask] = np.nan

# 2. Preprocessing

# 2.1 Handle missing values (using mean imputation)
imputer = SimpleImputer(strategy='mean')
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# 2.2 Encode labels (not needed for sklearn's decision tree, but for generality)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 3. Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_imputed, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# 4. Train Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# 5. Predict on Test Data
y_pred = clf.predict(X_test)

# 6. Evaluation
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # macro: equal weight per class
recall = recall_score(y_test, y_pred, average='macro')

print("Decision Tree Classifier on Iris Dataset")
print("Accuracy : {:.4f}".format(accuracy))
print("Precision: {:.4f}".format(precision))
print("Recall   : {:.4f}".format(recall))

# If you want to see a classification report for all classes:
from sklearn.metrics import classification_report
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=iris.target_names))

# --- End of Script ---

# This script can be run as a standalone Python file.
# If you wish to use a Jupyter notebook, copy and paste the code into a cell.

Decision Tree Classifier on Iris Dataset
Accuracy : 0.9333
Precision: 0.9364
Recall   : 0.9333

Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      0.90      0.95        10
  versicolor       0.90      0.90      0.90        10
   virginica       0.91      1.00      0.95        10

    accuracy                           0.93        30
   macro avg       0.94      0.93      0.93        30
weighted avg       0.94      0.93      0.93        30

