In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

In [2]:
# Step 1: Load the Zoo dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/zoo/zoo.data"
columns = ["animal_name", "hair", "feathers", "eggs", "milk", "airborne", 
           "aquatic", "predator", "toothed", "backbone", "breathes", "venomous", 
           "fins", "legs", "tail", "domestic", "catsize", "class_type"]
zoo_data = pd.read_csv(url, names=columns)

# Drop the animal name column (not a feature for classification)
zoo_data = zoo_data.drop("animal_name", axis=1)

# Separate features and target
X = zoo_data.drop("class_type", axis=1)
y = zoo_data["class_type"]

# Encode the target variable
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

In [3]:
# Step 2: Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

# Step 3: Train the Decision Tree
clf = DecisionTreeClassifier(criterion='gini', max_depth=5, random_state=42)
clf.fit(X_train, y_train)

# Step 4: Make Predictions
y_pred = clf.predict(X_test)

In [4]:
# Step 5: Evaluate the Decision Tree
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)

# Print Metrics
print("Decision Tree Performance Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("Confusion Matrix:")
print(conf_matrix)

Decision Tree Performance Metrics:
Accuracy: 0.94
Precision: 0.92
Recall: 0.94
F1 Score: 0.92
Confusion Matrix:
[[15  0  0  0  0  0  0]
 [ 0  3  0  0  0  0  0]
 [ 0  0  0  0  1  0  0]
 [ 0  0  0  2  0  0  0]
 [ 0  0  0  0  2  0  0]
 [ 0  0  0  0  0  4  1]
 [ 0  0  0  0  0  0  3]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
