In [6]:
# Cell 1

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

df = pd.read_csv("drug_200.csv")
df.head()


Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,drugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,drugY


In [7]:
# Cell 2

# Encode target column "Drug"
le = LabelEncoder()
df["Drug"] = le.fit_transform(df["Drug"])

# Encode categorical features
for col in df.columns:
    if df[col].dtype == "object":
        df[col] = LabelEncoder().fit_transform(df[col])

# Features & Labels
X = df.drop(columns=["Drug"]).values
y = df["Drug"].values

# Normalize numeric features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-Test Split 80:20
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [8]:
# Cell 3

def entropy(y):
    classes, counts = np.unique(y, return_counts=True)
    p = counts / len(y)
    return -np.sum(p * np.log2(p + 1e-10))

class Node:
    def __init__(self):
        self.feature = None
        self.threshold = None
        self.left = None
        self.right = None
        self.label = None


In [9]:
# Cell 4

class DecisionTreeScratch:

    def fit(self, X, y):
        self.root = self._build(X, y)

    def _build(self, X, y):
        node = Node()

        # If pure node → leaf
        if len(np.unique(y)) == 1:
            node.label = y[0]
            return node

        best_gain = -1
        best_feature = None
        best_threshold = None

        n_features = X.shape[1]

        # Find best split
        for feature in range(n_features):
            values = np.unique(X[:, feature])

            for thresh in values:
                left_idx = X[:, feature] <= thresh
                right_idx = ~left_idx

                if left_idx.sum() == 0 or right_idx.sum() == 0:
                    continue

                parent_entropy = entropy(y)
                left_entropy = entropy(y[left_idx])
                right_entropy = entropy(y[right_idx])

                gain = parent_entropy - (
                    (left_idx.sum() / len(y)) * left_entropy +
                    (right_idx.sum() / len(y)) * right_entropy
                )

                if gain > best_gain:
                    best_gain = gain
                    best_feature = feature
                    best_threshold = thresh

        # If no split improved → leaf
        if best_gain == -1:
            node.label = np.bincount(y).argmax()
            return node

        # Set decision rule
        node.feature = best_feature
        node.threshold = best_threshold

        # Recurse
        left_idx = X[:, best_feature] <= best_threshold
        right_idx = ~left_idx

        node.left = self._build(X[left_idx], y[left_idx])
        node.right = self._build(X[right_idx], y[right_idx])

        return node

    def predict_one(self, x):
        node = self.root
        while node.label is None:
            if x[node.feature] <= node.threshold:
                node = node.left
            else:
                node = node.right
        return node.label

    def predict(self, X):
        return np.array([self.predict_one(x) for x in X])


In [10]:
# Cell 5

dt = DecisionTreeScratch()
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)


In [12]:
# Cell 6

def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

print("Decision Tree Accuracy =", accuracy(y_test, y_pred_dt))


Decision Tree Accuracy = 1.0


In [13]:
# Cell 7

def print_tree(node, depth=0):
    indent = "  " * depth
    
    # Leaf node
    if node.label is not None:
        print(f"{indent}Leaf → Class: {node.label}")
        return
    
    # Decision node
    print(f"{indent}Feature[{node.feature}] <= {node.threshold}")
    
    print(f"{indent}→ Left:")
    print_tree(node.left, depth + 1)
    
    print(f"{indent}→ Right:")
    print_tree(node.right, depth + 1)

print_tree(dt.root)


Feature[4] <= -0.20018185143783082
→ Left:
  Feature[2] <= -1.11016893790767
  → Left:
    Feature[0] <= 0.34448486659880945
    → Left:
      Leaf → Class: 0
    → Right:
      Leaf → Class: 1
  → Right:
    Feature[2] <= 0.10979692792493435
    → Left:
      Feature[3] <= -0.9704367948586523
      → Left:
        Leaf → Class: 2
      → Right:
        Leaf → Class: 3
    → Right:
      Leaf → Class: 3
→ Right:
  Leaf → Class: 4
