In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
def entropy(y):
    _, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    return -np.sum(probabilities * np.log2(probabilities + 1e-10))

def information_gain(X, y, feature_index, threshold):
    mask = X[:, feature_index] <= threshold
    left_entropy = entropy(y[mask])
    right_entropy = entropy(y[~mask])
    p_left = sum(mask) / len(y)
    p_right = 1 - p_left
    return entropy(y) - (p_left * left_entropy + p_right * right_entropy)

def find_best_split(X, y):
    num_samples, num_features = X.shape
    best_information_gain = 0
    best_feature_index = None
    best_threshold = None

    for feature_index in range(num_features):
        thresholds = np.unique(X[:, feature_index])

        for threshold in thresholds:
            ig = information_gain(X, y, feature_index, threshold)

            if ig > best_information_gain:
                best_information_gain = ig
                best_feature_index = feature_index
                best_threshold = threshold

    return best_feature_index, best_threshold

def build_tree(X, y, depth, max_depth):
    num_samples, num_features = X.shape
    unique_classes, counts = np.unique(y, return_counts=True)

    if len(unique_classes) == 1 or depth == max_depth:
        return {'class': unique_classes[0]}

    feature_index, threshold = find_best_split(X, y)
    mask = X[:, feature_index] <= threshold

    left_subtree = build_tree(X[mask], y[mask], depth + 1, max_depth)
    right_subtree = build_tree(X[~mask], y[~mask], depth + 1, max_depth)

    return {'feature_index': feature_index, 'threshold': threshold,
            'left': left_subtree, 'right': right_subtree}

def predict_tree(node, sample):
    if 'class' in node:
        return node['class']

    if sample[node['feature_index']] <= node['threshold']:
        return predict_tree(node['left'], sample)
    else:
        return predict_tree(node['right'], sample)


In [6]:
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=420)

# Build a decision tree
max_depth = 3
tree = build_tree(X_train, y_train, depth=0, max_depth=max_depth)

# Make predictions on the test set
y_pred = [predict_tree(tree, sample) for sample in X_test]

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.97
