# Abdul Wahid Awan 20i-2367 J Lab#8

# Libraries

In [144]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import math
import os

# Decision Tree

In [145]:
parent=r'C:\Users\Abdul Wahid Awan\Desktop'
os.chdir(parent)
df = pd.read_csv("play_tennis.csv")
df=df.drop(columns=["day"],axis=1)
play=df['play']
xtrain, xtest, ytrain, ytest = train_test_split(df, play, test_size=0.2)

# Entropy

In [146]:
def find_entropy(classs):
    sample = len(classs)
    if sample < 1:
        return 0
    n_classes = len(np.unique(classs))
    value=0
    for c in range(n_classes):
        p = len(classs[classs==c])/(sample)
        if p > 0:
            value =value + (-p*math.log2(p))
    return value

# Node Class

In [147]:
class Node:
    def __init__(self, feature_name=None, threshold=None, value=None, left=None, right=None):
        self.feature_name = feature_name
        self.threshold = threshold
        self.value = value
        self.left = left
        self.right = right
    
    def is_leaf(self):
        return self.value is not None

class DecisionTree:
    def __init__(self, max_depth=10):
        self.max_depth = max_depth
        self.root = None
    
    def fit(self, X, y):
        self.n_classes = len(np.unique(y))
        self.n_features = X.shape[1]
        self.feature_names = X.columns
        self.root = self.grow_tree(X, y)
    
    def grow_tree(self, X, y, depth=0):
        X.reset_index(drop=True, inplace=True)
        y.reset_index(drop=True, inplace=True)
        n_samples, n_features = X.shape
        n_classes = len(np.unique(y))
        if not len(y):
            return Node(value="Yes")
        if (depth >= self.max_depth or n_classes == 1 ):
            leaf_value = max(set(y), key=list(y).count)
            return Node(value=leaf_value)
        
        feature_idx, threshold = self.best_split(X, y)
        feature_name = self.feature_names[feature_idx]
        left_idx, right_idx = self.split(X[feature_name], threshold)
        left = self.grow_tree(X.iloc[left_idx], y.iloc[left_idx], depth+1)
        right = self.grow_tree(X.iloc[right_idx], y.iloc[right_idx], depth+1)
        return Node(feature_name, threshold, left=left, right=right)
    
    def best_split(self, X, y):
        best_gain = -1
        split_idx, split_threshold = None, None
        for feature_idx in range(self.n_features):
            X_column = X.iloc[:, feature_idx]
            thresholds = np.unique(X_column)
            for threshold in thresholds:
                gain = self.information_gain(y, X_column, threshold)
                if gain > best_gain:
                    best_gain = gain
                    split_idx = feature_idx
                    split_threshold = threshold
        return split_idx, split_threshold

   
    def split(self, X_column, split_threshold):
        left_idx = []
        right_idx = []
        for i in range(len(X_column)):
            if X_column[i] == split_threshold:
                left_idx.append(i)
            else:
                right_idx.append(i)
                
        return left_idx, right_idx
    
    def information_gain(self, y, X_column, split_threshold):
        parent_entropy = find_entropy(y)
        
        left_idx, right_idx = self.split(X_column, split_threshold)
        if len(left_idx) == 0 or len(right_idx) == 0:
            return 0
        
        n = len(y)
        n_l, n_r = len(left_idx), len(right_idx)
        e_l, e_r = find_entropy(y[left_idx]), find_entropy(y[right_idx])
        child_entropy = (n_l/n)*e_l + (n_r/n)*e_r
        
        ig = parent_entropy - child_entropy
        return ig
    
    def predict(self, X):
        return np.array([self._traverse_tree(x, self.root) for x in X])
    
    def traverse_tree(self, x, node):
        if node.is_leaf():
            return node.value
        feature_name = node.feature_name
        indx =  list(self.feature_names).index(feature_name)
        if x[indx] == node.threshold:
            return self.traverse_tree(x, node.left)
        return self.traverse_tree(x, node.right)
     

In [148]:
Tree = DecisionTree()
Tree.fit(xtrain,ytrain)

# Prediction

In [151]:
pred = tree.predict(xtest)
pred=pred[:3]
print("Decision Tree Accuracy:", accuracy_score(ytest, pred)*100)

Decision Tree Accuracy: 74.6
