In [2]:
def entropy(a):
  # list of all the classes in the feature
  classes = np.unique(a)
  entropy = 0                             
  for i in classes:
    # counting the number of instances of a class
    # count_of_class = a.value_counts()[i]
    count_of_class = np.count_nonzero(a == i)
    probability = count_of_class/len(a)   
    # formula of entropy      
    entropy += (-1)*probability*math.log(probability,2)
  return entropy

In [None]:
class decisiontreeclassifier:

  def __init__(self, max_depth=5): # setting max_depth = 5 as default
    self.max_depth = max_depth
    self.x_train = None
    self.y_train = None
    self.tree = None

  def best_split_feature(self, x_train, y_train): #function to find attribute that leads to the best split using information gain.
    if x_train.shape[0] == 0 or x_train.shape[1] == 0: # to handle the cases when x_train has 0 columns or 0 rows.
      return None
    entropy_df = entropy(y_train) #entropy of whole dataset
    no_of_features = x_train.shape[1]
    info_gain_of_features = []
    for i in range(no_of_features):
      categories = np.unique(x_train[:, i])
      info_gain = entropy_df
      for j in categories:
        entropy_j = entropy(y_train[x_train[:, i] == j])
        info_gain += (-1)*entropy_j*len(x_train[x_train[:, i] == j]) / len(y_train)
      info_gain_of_features.append(info_gain)
          
    return np.argmax(info_gain_of_features)

  def mode(self, arr): # function to calculate mode of an array
    if(len(arr) == 0): 
      return None
    uniq = np.unique(arr)
    Dict = {}
    for i in uniq:
      Dict[i] = np.count_nonzero(arr == i)
    return max(Dict, key=lambda k: Dict[k])

  def make_split(self, x_train, y_train, depth):
    if depth >= self.max_depth:
      return {'Class': self.mode(y_train)}

    if len(np.unique(y_train)) == 1:
      return {'Class': self.mode(y_train)}

    BestSplitFeature = self.best_split_feature(x_train, y_train)
    categories = np.unique(x_train[:, BestSplitFeature])
    node = {'split_feature': BestSplitFeature, 'children': []}
    for i in range(len(categories)):
      x_train_splitted = x_train[x_train[:, BestSplitFeature] == i]
      y_train_splitted = y_train[x_train[:, BestSplitFeature] == i]
      x_train_splitted = np.delete(x_train_splitted, BestSplitFeature, axis=1)
      child = self.make_split(x_train_splitted, y_train_splitted, depth+1)
      node['children'].append((i, child))
    
    return node

  def train(self, x_train, y_train):
    self.x_train = x_train
    self.y_train = y_train
    self.tree = self.make_split(x_train, y_train, 0)

  def test(self, x_test):
    predictions = []
    for x in x_test:
      current_node = self.tree
      while current_node.get('children') is not None:
        split_feature = current_node.get('split_feature')
        category = x[split_feature]
        for child in current_node.get('children'):
            if child[0] == category:
              current_node = child[1]
              break
      predictions.append(current_node['Class'])
    return np.array(predictions)

In [4]:
# function to convert continuous features to categorical features optimally.

def cont_to_cat(feature):
  x = np.array(feature)
  y = np.array(df['species'])

  sorted_indices = np.argsort(x)

  x = x[sorted_indices]
  y = y[sorted_indices]

  x = x.reshape(-1,1)
  y = y.reshape(-1,1)

  z = np.hstack((x,y))
  
  length_z = len(z)
  info_gain_i = []
  for i in range(length_z):

    z[:i+1, 0] = 0
    z[i+1:, 0] = 1

    z0 = z[z[:, 0] == 0]
    z1 = z[z[:, 0] == 1]

    entropy_z0 = entropy(z0[:, 1])
    entropy_z1 = entropy(z1[:, 1])
    entropy_z = entropy(z[:, 1])

    length_z0 = len(z0)
    length_z1 = len(z1)

    info_gain = entropy_z - (length_z0*entropy_z0 / length_z) - (length_z1*entropy_z1 / length_z)

    info_gain_i.append(info_gain)

  info_gain_i = np.array(info_gain_i)

  max_index = np.argmax(info_gain_i)

  z[:max_index+1, 0] = 0
  z[max_index+1:, 0] = 1

  x = z[:, 0]

  x_original = x[np.argsort(sorted_indices)]

  return x_original