<a href="https://colab.research.google.com/github/Pragya1712/OnlineBookstore/blob/main/Decision_Tree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter


In [None]:
X_train = np.array([
    [1, 1, 1],
    [0, 0, 1],
    [0, 1, 0],
    [1, 0, 1],
    [1, 1, 1],
    [1, 1, 0],
    [0, 0, 0],
    [1, 1, 0],
    [0, 1, 0],
    [0, 1, 0]
])

y_train = np.array([1, 1, 0, 0, 1, 1, 0, 1, 0, 0])

feature_names = ['Ear Shape', 'Face Shape', 'Whiskers']


In [None]:
def entropy(y):
  counts = Counter(y)
  total = len(y)
  ent = 0
  for count in counts.values():
    p = count/total
    ent -= p*np.log2(p)
  return ent

print(entropy([0.5]))



0.0


In [None]:
def split_dataset(X,y,feature_index):
  left_mask = X[:,feature_index] == 1
  right_mask = X[:,feature_index] == 0
  return X[left_mask],y[left_mask],X[right_mask],y[right_mask]




In [None]:
def information_gain(X, y, feature_index):
    H_parent = entropy(y)

    X_left, y_left, X_right, y_right = split_dataset(X, y, feature_index)

    if len(y_left) == 0 or len(y_right) == 0:
        return 0

    w_left = len(y_left) / len(y)
    w_right = len(y_right) / len(y)

    return H_parent - (
        w_left * entropy(y_left) +
        w_right * entropy(y_right)
    )


In [None]:
def best_split(X,y):
  gains = [information_gain(X,y,i) for i in range(X.shape[1])]
  return np.argmax(gains),gains

In [None]:
class Node:
  def __init__(self,feature=None,left=None,right=None,value=None):
    self.feature = feature
    self.left=left
    self.right=right
    self.value=value

In [None]:
def build_tree(X, y, depth=0, max_depth=2):
    if len(set(y)) == 1:
        return Node(value=y[0])

    if depth == max_depth:
        return Node(value=Counter(y).most_common(1)[0][0])

    feature, gains = best_split(X, y)
    if gains[feature] == 0:
        return Node(value=Counter(y).most_common(1)[0][0])

    X_left, y_left, X_right, y_right = split_dataset(X, y, feature)

    left_child = build_tree(X_left, y_left, depth + 1, max_depth)
    right_child = build_tree(X_right, y_right, depth + 1, max_depth)

    return Node(feature=feature, left=left_child, right=right_child)


In [None]:
tree = build_tree(X_train,y_train,max_depth=2)