In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [2]:
# Load the iris dataset
iris = load_iris()
X = iris.data  # Features: sepal length, sepal width, petal length, petal width
y = iris.target  # Target: flower species (0, 1, or 2)

In [3]:
# Create feature and target names for better understanding
feature_names = iris.feature_names
target_names = iris.target_names

In [4]:
# input_features = df['house_no', 'street', 'city', 'state', 'zip', 'bed', 'bath', 'sqft', 'price']

In [5]:
# output_target = df['house_price']

In [6]:
feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [7]:
target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [8]:
len(X)

150

In [9]:
# Split the data   # 20% testing data     # 80% training data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42000)

In [10]:
X_train.shape

(120, 4)

In [11]:
X_test.shape

(30, 4)

In [12]:
# Import the decision tree classifier
from sklearn.tree import DecisionTreeClassifier

# Create a decision tree classifier
# max_depth=3 limits the tree depth to prevent overfitting
tree_classifier = DecisionTreeClassifier(max_depth=3, random_state=42)

# Train the classifier on our training data
tree_classifier.fit(X_train, y_train)

print("Decision tree has been trained!")

Decision tree has been trained!


In [13]:
# Import visualization tools
from sklearn.tree import export_graphviz
import graphviz
from matplotlib import pyplot as plt

# Create a DOT file and visualize the tree
dot_data = export_graphviz(
    tree_classifier,
    out_file=None,
    feature_names=feature_names,
    class_names=target_names,
    filled=True,
    rounded=True,
    special_characters=True
)

# Display the tree
graph = graphviz.Source(dot_data)
graph.render("iris_decision_tree", format="png", cleanup=True)

# If you can't use graphviz, here's a simpler text representation
from sklearn.tree import export_text
tree_text = export_text(tree_classifier, feature_names=feature_names)
print(tree_text)

ExecutableNotFound: failed to execute WindowsPath('dot'), make sure the Graphviz executables are on your systems' PATH

In [None]:
%pip show graphviz

Note: you may need to restart the kernel to use updated packages.




In [16]:
# Make predictions on the test set
y_pred = tree_classifier.predict(X_test)

# Look at the first 5 predictions vs actual values
for i in range(5):
    print(f"Sample {i+1}:")
    print(f"  Features: {X_test[i]}")
    print(f"  Actual species: {target_names[y_test[i]]}")
    print(f"  Predicted species: {target_names[y_pred[i]]}")
    print()

Sample 1:
  Features: [6.7 3.  5.  1.7]
  Actual species: versicolor
  Predicted species: virginica

Sample 2:
  Features: [5.7 2.6 3.5 1. ]
  Actual species: versicolor
  Predicted species: versicolor

Sample 3:
  Features: [6.1 2.9 4.7 1.4]
  Actual species: versicolor
  Predicted species: versicolor

Sample 4:
  Features: [5.7 4.4 1.5 0.4]
  Actual species: setosa
  Predicted species: setosa

Sample 5:
  Features: [5.5 2.4 3.7 1. ]
  Actual species: versicolor
  Predicted species: versicolor



In [17]:
# Predict a new flower
new_flower = [[5.1, 3.5, 1.4, 0.2]]  # Measurements of a new iris flower
prediction = tree_classifier.predict(new_flower)
print(f"New flower prediction: {target_names[prediction[0]]}")

New flower prediction: setosa


In [20]:
# Import evaluation metrics
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
accuracy_percent = accuracy * 100
print(f"Accuracy: {accuracy_percent:.2f}%")

Accuracy: 90.00%


In [21]:
# More detailed evaluation
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=target_names))


Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00         7
  versicolor       1.00      0.77      0.87        13
   virginica       0.77      1.00      0.87        10

    accuracy                           0.90        30
   macro avg       0.92      0.92      0.91        30
weighted avg       0.92      0.90      0.90        30



In [22]:
# Confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Confusion Matrix:
[[ 7  0  0]
 [ 0 10  3]
 [ 0  0 10]]
