# Bagging

- Use each weak classifier to classify each sample.
- For each classifier, calculate the overall accuracy
- Use mean/majority voting to decide the final class
- Calculate the ensemble accuracy : must be > than the best weak classifier

In [6]:
import numpy as np
import math

In [18]:
def bagging(X, y, classifiers, method='maj'):
    classifications = []
    for classifier in classifiers:
        labels = []
        errors = 0
        for i, x in enumerate(X): 
            label = classifier.classify(x)
            labels.append(label)
            if label != y[i]:
                errors += 1
        classifications.append(labels)
        print('------------------------------')
        print('classificator', classifier.name)
        print('accuracy:', (len(X) - errors)/len(X))
        print('classifications:', labels)
    classifications = np.array(classifications)
    if method == 'maj':
        for i, x in enumerate(X): 
            # get columns values aka sample labels
            labels = classifications[ :,i]
            labels.sort()
            print('sample', i, ':', x)
            # get first positive sample
            idx = list(labels).index(1)
            if idx > len(labels)/2:
                print('class 1')
            elif idx < len(labels)/2:
                print('class -1')
            else:
                print('class 0')
    else: 
        for i, x in enumerate(X): 
            # get columns values aka sample labels
            labels = classifications[ :,i]
            tot = sum(labels)
            if tot > 0:
                print('class 1')
            elif tot < 0:
                 print('class -1')
            else:
                print('class 0')

## Examples

In [2]:
class c_1:
    def __init__(self):
        self.name = 'c1'
    def classify(self, x):
        if x[0] > -0.5:
            return 1
        else:
            return -1
class c_2:
    def __init__(self):
        self.name = 'c2'
    def classify(self, x):
        if x[0] < -0.5:
            return 1
        else:
            return -1
class c_3:
    def __init__(self):
        self.name = 'c3'
    def classify(self, x):
        if x[0] > 0.5:
            return 1
        else:
            return -1
class c_4:
    def __init__(self):
        self.name = 'c4'
    def classify(self, x):
        if x[0] < 0.5:
            return 1
        else:
            return -1
class c_5:
    def __init__(self):
        self.name = 'c5'
    def classify(self, x):
        if x[1] > -0.5:
            return 1
        else:
            return -1
class c_6:
    def __init__(self):
        self.name = 'c6'
    def classify(self, x):
        if x[1] < -0.5:
            return 1
        else:
            return -1
class c_7:
    def __init__(self):
        self.name = 'c7'
    def classify(self, x):
        if x[1] > 0.5:
            return 1
        else:
            return -1
class c_8:
    def __init__(self):
        self.name = 'c8'
    def classify(self, x):
        if x[1] < 0.5:
            return 1
        else:
            return -1

In [3]:
X = np.array([
    [1, 0], 
    [-1, 0],
    [0, 1], 
    [0, -1]
])
y = np.array([1, 1, -1, -1])
c1 = c_1()
c2 = c_2()
c3 = c_3()
c4 = c_4()
c5 = c_5()
c6 = c_6()
c7 = c_7()
c8 = c_8()
classifiers = [c1, c2, c3, c4, c5, c6, c7, c8]

In [21]:
bagging(X, y, classifiers, method='maj')

------------------------------
classificator c1
accuracy: 0.25
classifications: [1, -1, 1, 1]
------------------------------
classificator c2
accuracy: 0.75
classifications: [-1, 1, -1, -1]
------------------------------
classificator c3
accuracy: 0.75
classifications: [1, -1, -1, -1]
------------------------------
classificator c4
accuracy: 0.25
classifications: [-1, 1, 1, 1]
------------------------------
classificator c5
accuracy: 0.75
classifications: [1, 1, 1, -1]
------------------------------
classificator c6
accuracy: 0.25
classifications: [-1, -1, -1, 1]
------------------------------
classificator c7
accuracy: 0.25
classifications: [-1, -1, 1, -1]
------------------------------
classificator c8
accuracy: 0.75
classifications: [1, 1, -1, 1]
sample 0 : [1 0]
class 0
sample 1 : [-1  0]
class 0
sample 2 : [0 1]
class 0
sample 3 : [ 0 -1]
class 0
