In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

print("--- Task 1: Scikit-learn Decision Tree ---")

# 1. Load the Iris dataset
# The Iris dataset is built into scikit-learn
iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

# 2. Preprocessing
# Data is clean, no missing values.
# Labels are already encoded (0, 1, 2).
# We just need to split the data.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

# 3. Train the Decision Tree Classifier
model = DecisionTreeClassifier(random_state=42)
print("\nTraining the model...")
model.fit(X_train, y_train)
print("Model training complete.")

# 4. Make predictions
y_pred = model.predict(X_test)

# 5. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
# For multi-class, we must specify an 'average' method for precision and recall
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')

print("\n--- Model Evaluation ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (Weighted): {precision:.4f}")
print(f"Recall (Weighted): {recall:.4f}")

# Display a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=target_names))

print("------------------------------------------\n")

--- Task 1: Scikit-learn Decision Tree ---
Training data shape: (105, 4)
Testing data shape: (45, 4)

Training the model...
Model training complete.

--- Model Evaluation ---
Accuracy: 1.0000
Precision (Weighted): 1.0000
Recall (Weighted): 1.0000

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

------------------------------------------

