In [1]:
#Initial setup
import pandas as pd

In [5]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder

#Load dataset
Iris = load_iris()
df = pd.DataFrame(data=Iris.data, columns=Iris.feature_names)
df['species'] = Iris.target


In [7]:
# Optional: Map target integers to species names (for readability)
species_map = dict(enumerate(Iris.target_names))
df['species'] = df['species'].map(species_map)


In [8]:
# Simulate missing values (if needed for demo)
# df.loc[5:10, 'sepal length (cm)'] = np.nan


In [9]:
#  Handle missing values (if any)
df.fillna(df.mean(numeric_only=True), inplace=True)  # Replace NaNs with column mean


In [10]:
# Encode labels (target variable)
label_encoder = LabelEncoder()
df['species_encoded'] = label_encoder.fit_transform(df['species'])


In [11]:
#  Split dataset: features and target
X = df[Iris.feature_names]
y = df['species_encoded']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [12]:
#  Train Decision Tree classifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)



In [13]:
#  Predict and evaluate
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')


In [14]:
# Display metrics
print(f" Accuracy: {accuracy:.2f}")
print(f" Precision: {precision:.2f}")
print(f" Recall: {recall:.2f}")


 Accuracy: 1.00
 Precision: 1.00
 Recall: 1.00
