In [3]:
import numpy as np
import math

data = np.array([
    [3, 22000, 1, 1],
    [6, 75000, 0, 0],
    [2, 14000, 1, 1],
    [8, 85000, 0, 0],
    [4, 50000, 1, 1]
])

X = data[:, :-1]
y = data[:, -1]

In [4]:
# Step 1: Calculate Entropy
def calculate_entropy(y):
    p1 = np.sum(y == 1) / len(y)
    p0 = np.sum(y == 0) / len(y)
    if p1 == 0 or p0 == 0:
        return 0
    return - (p1 * math.log2(p1) + p0 * math.log2(p0))

# Initial entropy of the dataset
initial_entropy = calculate_entropy(y)
print(f"Initial Entropy: {initial_entropy:.4f}")


Initial Entropy: 0.9710


In [None]:
# Step 2: Calculate Information Gain for each feature
def calculate_information_gain(X, y, feature_index):
    unique_values = np.unique(X[:, feature_index])
    weighted_entropy = 0
    
    for value in unique_values:
        subset_y = y[X[:, feature_index] == value]
        subset_entropy = calculate_entropy(subset_y)
        weight = len(subset_y) / len(y)
        weighted_entropy += weight * subset_entropy
        
    info_gain = initial_entropy - weighted_entropy
    return info_gain

In [None]:
# Calculate IG for all features
info_gains = []
for i in range(X.shape[1]):
    ig = calculate_information_gain(X, y, i)
    info_gains.append(ig)
    print(f"Information Gain for Feature {i}: {ig:.4f}")

# Step 3: Choose Best Feature
best_feature_index = np.argmax(info_gains)
print(f"Best Feature Index: {best_feature_index}")

Information Gain for Feature 0: 0.9710
Information Gain for Feature 1: 0.9710
Information Gain for Feature 2: 0.9710
Best Feature Index: 0


In [None]:
# Step 4: Build Simple Decision Tree 
def predict(x):
    return 1 if x[best_feature_index] == 1 else 0

# Predict on training data
y_pred = np.array([predict(x) for x in X])

In [None]:
# Step 5: Calculate Confusion Matrix
TP = np.sum((y == 1) & (y_pred == 1))
FP = np.sum((y == 0) & (y_pred == 1))
TN = np.sum((y == 0) & (y_pred == 0))
FN = np.sum((y == 1) & (y_pred == 0))


In [None]:
# Step 6: Evaluation Metrics
accuracy = (TP + TN) / (TP + FP + TN + FN)
precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TP / (TP + FN) if (TP + FN) > 0 else 0
f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0


In [None]:
print("\nConfusion Matrix:")
print(f"TP={TP}, FP={FP}, TN={TN}, FN={FN}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1_score:.4f}")


Confusion Matrix:
TP=0, FP=0, TN=2, FN=3
Accuracy: 0.4000
Precision: 0.0000
Recall: 0.0000
F1-Score: 0.0000
