In [28]:
import numpy as np
import pandas as pd


def entro(Y=None, pro=None):# calculating entropy

    if Y is not None:
        pro = sum(Y == 1) / len(Y)
    if pro == 0 or pro == 1:
        return 0
    return -pro*np.log2(pro) - (1-pro)*np.log2(1-pro)


def midval(X, Y, A):# calculating a value required for information gain value
 
    c = X[:, A]
    return sum(
        (sum(c == a) / len(X)) * entro(Y[c == a])
        for a in set(c)
    )


def finalgain(X, Y, A):# calculating information gain

    return entro(Y) - midval(X, Y, A)


def gainratio(X, Y, A):# calculating the final gain ratio which is further called in DecisionTreeClassifier

    c = X[:, A]
    loop = set(c)
    v = 0
    for i in loop:
        pro = sum(c == i) / len(X)
        if pro == 0 or pro == 1:
            return 0
        v -= pro*np.log2(pro)
    return finalgain(X, Y, A) / v

class DecisionTreeClassifier:

    class leaf:
        def __init__(self, p=None):
           
            self.p = p
            self.A = None
            self.c = {}
            self.posi = None
            self.negi = None

        def __repr__(self):
            return f"({self.posi}+, {self.negi}-)"
    
    def __init__(self,m_depth):
        self.m_depth=m_depth


    def fitter(self, X, Y, sample_weights=None):
        if sample_weights is None:
            sample_weights = np.ones(len(Y))/len(Y)
        self.mainr = self.leaf()
        self.btree(self.mainr, X, Y, set(), depth=1)    


    def prediction(self, x):
        assert hasattr(self, 'mainr'), "not trained yet"
        n = self.mainr
        while n.c:
            try:
                n = n.c[x[n.A]]
            except KeyError:
                break
        return int(n.posi > n.negi)

    def error(self, X, Y):
        e = np.array([self.prediction(x) for x in X])
        return print(sum(e != Y) / len(Y))

    def btree(self, mainr, X, Y, attribute, depth):
        X, Y, attribute = X.copy(), Y.copy(), attribute.copy()
        mainr.posi = sum(Y == 1)
        mainr.negi = len(Y) - mainr.posi
        if mainr.posi == 0 or mainr.negi == 0:
            return
        if depth <= self.m_depth:
            mainr.A = self.bestfeature(X, Y, attribute)
            attribute.add(mainr.A)
            if mainr.A is None:
                return
            for a in set(X[:, mainr.A]):
                mainr.c[a] = self.leaf(p=mainr)
                self.btree(mainr.c[a],
                                 X[X[:, mainr.A] == a],
                                 Y[X[:, mainr.A] == a],
                                 attribute,
                                 depth+1)

    def bestfeature(self, X, Y, zat):
        best_gain, best_feature = 0, None
        for A in range(X.shape[1]):
            if A in zat:
                continue
            gain = gainratio(X, Y, A)
            if gain > best_gain:
                best_gain = gain
                best_feature = A
        return best_feature


    def show_tree(self):
        self.treeprint(self.mainr, depth=0)

    def treeprint(self, mainr, depth):
        print(mainr)
        for k, v in mainr.c.items():
            print("\t"*(depth+1) + f"Node:[x{mainr.A+1} = {k}] ", end='')
            self.treeprint(v, depth+1)


In [29]:
class BoostingTreeClassifier:
   
    
    def __init__(self, n, m_depth):
        self.n = n
        self.m_depth = m_depth
        self.trees = [
            DecisionTreeClassifier(m_depth=m_depth)
            for i in range(n)
        ]
        self.alpha = np.ones(n)
    
    def fitter(self, X, Y):
        assert X.shape[0] == Y.shape[0]
        N = X.shape[0]
        w = np.ones(N) / N
        for i in range(self.n):
          
            self.trees[i].fitter(X, Y)

            var2 = np.array([1 if self.trees[i].prediction(x) else -1 for x in X])
            e = np.sum(var2 != Y) / len(Y)
            e = np.clip(e, 1e-10, 1-1e-10)
            self.alpha[i] = 0.5 * np.log((1-e)/e)
            
            w *= np.exp(-self.alpha[i] * Y * var2)
            w /= sum(w)
            indices = np.random.choice(np.arange(N), size=N, p=w)
            X, Y = X[indices], Y[indices]
            
    
    def prediction(self, x):
        var2 = np.array([1 if C.prediction(x) == 1 else -1 for C in self.trees])
        if np.dot(var2, self.alpha) > 0:
            return 1
        else:
            return -1
    
    def accuracy(self, X, Y):
        var2 = np.array([self.prediction(x) for x in X])
        return print((sum(var2 == Y) / len(Y))*100)
    
    def error(self, X, Y):
        var2 = np.array([self.prediction(x) for x in X])
        return sum(var2 != Y) / len(Y)   

In [30]:
Decisiontrain = pd.read_csv('tic-tac-toe_train.csv',header=None)
Xtrain = np.asarray(Decisiontrain)[:,:9]
Ytrain = (np.asarray(Decisiontrain)[:,-1] == 'win').astype(int)
Decisiontest =pd.read_csv('tic-tac-toe_test.csv',header=None)
Xtest = np.asarray(Decisiontest)[:,:9]
Ytest = (np.asarray(Decisiontest)[:,-1] == 'win').astype(int)


In [38]:
obj = BoostingTreeClassifier(10, m_depth=5)
obj.fitter(Xtrain, Ytrain)
print("Error for training data:",obj.error(Xtrain,Ytrain))
print("Error for test data:",obj.error(Xtest, Ytest))


Error for training data: 0.364
Error for test data: 0.46
