In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

In [None]:
def z_normalize(x):
  m,n = x.shape
  mean = np.zeros(n)
  std = np.zeros(n)
  for i in range(n):
    mean[i] = np.mean(x[:,i])
    std[i] = np.std(x[:,i])
    x[:,i] = (x[:,i] - mean[i])/std[i]
  return x

In [None]:
def entropy(x):
  entropy = 0
  p = 0
  if len(x) != 0:
    p = len(x[x == 1])/len(x)

  if p == 0 or p == 1:
    entropy = 0

  else:
    entropy = -p*math.log(p,2) - (1-p)*math.log(1-p,2)
  return entropy

In [None]:
def split_data(X, node_indices, feature):
    left_indices = []
    right_indices = []

    for i in node_indices:
        if X[i][feature] == 1:
            left_indices.append(i)

        else:
            right_indices.append(i)

    return left_indices, right_indices

In [None]:
def information_gain(x, y, node_indices, feature):
    left_indices, right_indices = split_data(x, node_indices, feature)

    information_gain = 0
    entropy1 = 0.

    if len(left_indices) == 0 or len(right_indices) == 0:
        return 0

    w_left = len(x[left_indices])/len(x[node_indices])
    w_right = len(x[right_indices])/len(x[node_indices])

    entropy1 = (w_left)*(entropy(y[left_indices])) + (w_right)*(entropy(y[right_indices]))

    information_gain = (entropy(y[node_indices])) - (entropy1)

    return information_gain

In [None]:
def best_split(x, y, node_indices):
    num_features = x.shape[1]
    best_feature = 0

    max_info_gain = 0
    for i in range(num_features):
            info_gain = information_gain(x,y,node_indices,i)
            if max_info_gain < info_gain:
                max_info_gain = info_gain
                best_feature = i

    return best_feature

In [None]:
def build_tree_recursive(x, y, node_indices, branch_name, max_depth, current_depth):
    if current_depth == max_depth:
        formatting = " "*current_depth + "-"*current_depth
        print(formatting, "%s leaf node with indices" % branch_name, node_indices)
        return

    best_feature = best_split(x, y, node_indices)

    formatting = "-"*current_depth
    print("%s Depth %d, %s: Split on feature: %d" % (formatting, current_depth, branch_name, best_feature))

    left_indices, right_indices = split_data(x, node_indices, best_feature)

    build_tree_recursive(x, y, left_indices, "Left", max_depth, current_depth+1)
    build_tree_recursive(x, y, right_indices, "Right", max_depth, current_depth+1)

In [None]:
def decision_tree(x,y,max_depth):
  root_indices = np.arange(len(x))
  build_tree_recursive(x, y, root_indices, "Root", max_depth, 0)

In [None]:
def decision_tree_data(path):
  path = path.strip("'")
  df = pd.read_csv(path)

  x = df[df.columns[1:-1]].to_numpy()
  y = df[df.columns[-1]].to_numpy()

  max_depth = int(input("Enter the maximum depth:"))

  return (x,y,max_depth)

In [None]:
x_decision , y_decision , max_depth = decision_tree_data()

In [None]:
decision_tree(x_decision,y_decision,max_depth)