In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

In [2]:
# Load the Iris dataset
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['species'] = iris.target

In [3]:
# Map numeric target to actual species names for clarity
df['species'] = df['species'].map({i: name for i, name in enumerate(iris.target_names)})

In [5]:
# Check for missing values
print("Missing values before imputation:\n", df.isnull().sum())

Missing values before imputation:
 sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
species              0
dtype: int64


In [6]:
# Encode target labels (species)
label_encoder = LabelEncoder()
df['species_encoded'] = label_encoder.fit_transform(df['species'])

In [7]:
# Prepare features and target
X = df[iris.feature_names]
y = df['species_encoded']

In [8]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Train a Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

In [10]:
# Make predictions
y_pred = clf.predict(X_test)

In [12]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')  # weighted accounts for label imbalance
recall = recall_score(y_test, y_pred, average='weighted')

In [13]:
# Print evaluation metrics
print("\nModel Evaluation Metrics:")
print(f"Accuracy:  {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall:    {recall:.2f}")


Model Evaluation Metrics:
Accuracy:  1.00
Precision: 1.00
Recall:    1.00
