In [1]:
# Step 1: Import Libraries

In [2]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.impute import SimpleImputer

In [3]:
# Step 2: Load and explore data set

In [4]:

iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)

print(X.head())
print(y.head())

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2
0    0
1    0
2    0
3    0
4    0
dtype: int64


In [5]:
# Step 3: Handling missing values

In [6]:
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=iris.feature_names)

In [7]:
#Step 4: Split data set

In [8]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")

Training samples: 120
Testing samples: 30


In [9]:
# Step 5: Train decision tree classifier

In [10]:

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

In [11]:
#Step 6: Make predictions

In [12]:

y_pred = clf.predict(X_test)
print("Predictions:", y_pred[:5])

Predictions: [1 0 2 1 1]


In [13]:
#Step 7: Evaluate model

In [14]:

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")

Accuracy: 1.00
Precision: 1.00
Recall: 1.00


In [15]:
#🧠 Conclusion
##- The model achieved excellent performance, with perfect scores on accuracy, precision, and recall (likely due to the simplicity and balance of the dataset).
