# üìò Step 1: Import Libraries

In [2]:
# Import core libraries
import numpy as np
import pandas as pd

# Scikit-learn modules
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.impute import SimpleImputer

# üå∏ Step 2: Load and Explore the Iris Datase

In [8]:
# Load iris dataset
iris = load_iris()

In [9]:
X = iris.data
y = iris.target

In [10]:
# Convert to DataFrame for easier inspection
df = pd.DataFrame(X, columns=iris.feature_names)
df['species'] = y
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [11]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [12]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [13]:
df.shape

(150, 5)

# Simulate and Handle Missing Values

In [15]:
# Randomly introduce missing values for practice
np.random.seed(42)
X_missing = X.copy()
for _ in range(5):
    i = np.random.randint(0, X_missing.shape[0])
    j = np.random.randint(0, X_missing.shape[1])
    X_missing[i, j] = np.nan


In [16]:
# Impute missing values using column mean
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X_missing)

In [19]:
X_imputed.shape

(150, 4)

# üîÄ Step 3: Split Data into Train/Test Sets


In [20]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

In [21]:
# üå≥ Step 4: Train Decision Tree Classifier

In [22]:
# Initialize and train the decision tree model
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


# üîç Step 4: Make Prediction

In [24]:
# Predict on the test set
y_pred = clf.predict(X_test)

In [25]:
y_pred

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [26]:
# üìä Step 5: Evaluate the Model

In [27]:
# Evaluate using accuracy, precision, and recall
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision (macro):", precision_score(y_test, y_pred, average='macro'))
print("Recall (macro):", recall_score(y_test, y_pred, average='macro'))

Accuracy: 1.0
Precision (macro): 1.0
Recall (macro): 1.0
