Uploading data from my Drive

In [1]:
# ! gdown https://drive.google.com/drive/folders/1eJPlwS6bd_3SUlm-rh_t7KglTaR4zrIQ?usp=sharing

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


Importing libraries

In [2]:
from math import exp, sqrt, log
import numpy as np
from numpy.random import seed
from itertools import combinations
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

Loading data model from CNN model and normilze the array

In [3]:
x_train = np.load("/content/gdrive/MyDrive/saved_data/x_train.npy", allow_pickle=True)
x_test = np.load("/content/gdrive/MyDrive/saved_data/x_test.npy", allow_pickle=True)
y_train = np.load("/content/gdrive/MyDrive/saved_data/y_train.npy", allow_pickle=True)
y_test = np.load("/content/gdrive/MyDrive/saved_data/y_test.npy", allow_pickle=True)

x_train = x_train/255.
x_test = x_test/255.

y_train_demo, y_test_demo = [], []
for i in y_train:
    if i[0] == 1:
        y_train_demo.append(1)
    else:
        y_train_demo.append(-1)
for i in y_test:
    if i[0] == 1:
        y_test_demo.append(1)
    else:
        y_test_demo.append(-1)
        
y_train = np.array(y_train_demo)
y_test = np.array(y_test_demo)

x_train_demo, x_test_demo = [], []
for i in x_train:
    x_train_demo.append(i.flatten())
for i in x_test:
    x_test_demo.append(i.flatten())


x_train = np.array(x_train_demo)
x_test = np.array(x_test_demo)

Building AdaBoost algorithm

In [4]:
class AdaBoost:
    """this class represents the AdaBoost algorithm for classify the data representations"""

    def __init__(self):
        self.DecTree = None
        self.DecTree_weights = None
        self.errors = None
        self.weights = None
        self.epochs = None

    def _check_data(self, X, y):
        assert set(y) == {-1, 1}, 'data must be ±1'
        return X, y

    def _initialize_weights(self, X, y):
        """Initialize weights to (1 / size of the train set)"""
        self.weights = np.zeros(shape=(self.epochs, X.shape[0]))
        self.DecTree = np.zeros(shape=self.epochs, dtype=object)
        self.DecTree_weights = np.zeros(shape=self.epochs)
        self.errors = np.zeros(shape=self.epochs)
        
        self.weights[0] = np.ones(shape=X.shape[0]) / X.shape[0]
        
        
    def update_weights(self, epoch, alpha, y, pred):
        """
        this function compute the weight for the point that if finds error in it or not
        and computes the new weight by that
        """
        self.weights[epoch] = (self.weights[epoch - 1] * np.exp(-alpha * y * pred))
    
    def normalize_weights(self, epoch):
        """this function normalize the weight by the sum"""
        self.weights[epoch] /= self.weights[epoch].sum()
                
    def fit(self, X: np.ndarray, y: np.ndarray, epochs: int):
        """ Fit the model using training data and this function is the main algorithm 
        for the AdaBoost that initialize the weight and 
        computes the alpha for the min error"""
        
        X, y = self._check_data(X, y)
        self.epochs = epochs

        self._initialize_weights(X, y)

        for epoch in range(epochs):
            cur_weights = self.weights[epoch]
            stump = DecisionTreeClassifier(max_depth=1, max_leaf_nodes=2)
            stump = stump.fit(X, y, sample_weight=cur_weights)

            stump_pred = stump.predict(X)
            err = cur_weights[(stump_pred != y)].sum()
            alpha = 0.5 * np.log((1 - err) / err)

                        
            if epoch+1 < epochs:
                self.update_weights(epoch + 1,alpha, y, stump_pred)
                self.normalize_weights(epoch + 1)

            self.DecTree[epoch] = stump
            self.DecTree_weights[epoch] = alpha
            self.errors[epoch] = err

        return self

    def predict(self, X):
        """ The predictions of the model after fitting the model """
        pred = np.array([decision.predict(X) for decision in self.DecTree])
        return np.sign(np.dot(self.DecTree_weights, pred))
    

Running AdaBoost algorithm and checking model accuracy

In [5]:
adaboost = AdaBoost()
model = adaboost.fit(x_train, y_train, epochs=10)

train_acc = model.predict(x_train)
print("Train Accuracy: {:.2f}%".format(metrics.accuracy_score(y_train, train_acc) * 100))
y_pred = model.predict(x_test)

train_err = (model.predict(x_train) != y_train).mean()
print(f'Train error: {train_err:.1%}')
print("Accuracy: {:.2f}%".format(metrics.accuracy_score(y_test, y_pred) * 100))

Train Accuracy: 90.90%
Train error: 9.1%
Accuracy: 72.24%
