# Bagging - Naïve Bayes

## Tools

In [1]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

import pandas as pd
import numpy as np

## Data

### Train

In [2]:
datatrain = pd.read_csv('TrainsetTugas4ML.csv')
print(datatrain.shape)
datatrain.head()

(298, 3)


Unnamed: 0,X1,X2,Class
0,0.75,15.6,2
1,0.85,17.45,2
2,2.9,21.15,2
3,3.1,21.85,2
4,3.4,19.7,2


### Test

In [3]:
datatest = pd.read_csv('TestsetTugas4ML.csv')
print(datatest.shape)
datatest.head()

(75, 3)


Unnamed: 0,X1,X2,Class
0,3.3,15.45,
1,3.9,21.85,
2,4.6,18.25,
3,5.2,23.4,
4,7.05,19.9,


## Bootstrap and Model

### Make Naïve Bayes Classifier Object

In [4]:
gnb = GaussianNB()

### Bootstraps and Models

In [5]:
models = []
number_bootstrap = 5

for i in range(number_bootstrap):
    bootstrap = []

    for j in range(150):
        bootstrap .append(datatrain.iloc[np.random.randint(0, datatrain.shape[0])])

    subtrain = pd.DataFrame(bootstrap )
    subtrain['Class'] = subtrain['Class'].astype('int64')

    model = gnb.fit(subtrain[['X1','X2']], subtrain['Class'])
    models.append(model)
  
    print('Bootstrap Ke-' + str(i+1))
    print('Length of Bootstrap: '+ str(subtrain.shape[0]))
    print(subtrain.head())

Bootstrap Ke-1
Length of Bootstrap: 150
       X1     X2  Class
168  24.5  14.70      2
117  20.4   7.00      1
274  38.5  14.25      1
200  29.4   4.85      1
241  35.9   9.35      1
Bootstrap Ke-2
Length of Bootstrap: 150
        X1     X2  Class
290  40.45  12.50      1
0     0.75  15.60      2
6     4.40  20.05      2
168  24.50  14.70      2
221  32.90   4.15      1
Bootstrap Ke-3
Length of Bootstrap: 150
        X1     X2  Class
187  27.15   3.15      1
263  37.70  10.80      1
136  22.30   7.70      1
208  30.80   3.95      1
156  23.65   7.20      1
Bootstrap Ke-4
Length of Bootstrap: 150
        X1     X2  Class
241  35.90   9.35      1
60   16.90  15.35      1
156  23.65   7.20      1
123  21.05   7.00      1
244  36.15   8.40      1
Bootstrap Ke-5
Length of Bootstrap: 150
       X1     X2  Class
206  30.6   3.40      1
294  40.7  16.45      1
260  37.5  13.40      1
273  38.5  12.20      1
72   17.3  11.20      1


## Bagging Process

In [6]:
def sign(value):
    if value > 5:
        return 2
    else:
        return 1

In [7]:
def bagging(data_train, models, test=True):
    X_train = data_train[data_train.columns[:-1]]
  
    prediksi = []
  
    i = 1
    for model in models:
        pred = model.predict(X_train)
        prediksi.append(pred)
        if test == False:
            print('Model Ke-' + str(i) + ' , akurasi: ' + str(accuracy_score(pred, data_train['Class'])*100))
            i += 1
  
    zipped_pred = zip(prediksi[0],prediksi[1],prediksi[2],prediksi[3],prediksi[4])
    sum_pred = [sum(item) for item in zipped_pred]
    y_prediction = list(map(sign, sum_pred))
  
    return y_prediction

In [8]:
print(models)
prediksi = bagging(datatrain, models, False)
print('Total Akurasi: '+ str(accuracy_score(prediksi, datatrain['Class'])*100))

[GaussianNB(priors=None, var_smoothing=1e-09), GaussianNB(priors=None, var_smoothing=1e-09), GaussianNB(priors=None, var_smoothing=1e-09), GaussianNB(priors=None, var_smoothing=1e-09), GaussianNB(priors=None, var_smoothing=1e-09)]
Model Ke-1 , akurasi: 94.29530201342283
Model Ke-2 , akurasi: 94.29530201342283
Model Ke-3 , akurasi: 94.29530201342283
Model Ke-4 , akurasi: 94.29530201342283
Model Ke-5 , akurasi: 94.29530201342283
Total Akurasi: 94.29530201342283


## Bagging for Test

In [9]:
y_prediksi = bagging(datatest, models)
datatest['Class'] = y_prediksi
datatest.head()

Unnamed: 0,X1,X2,Class
0,3.3,15.45,2
1,3.9,21.85,2
2,4.6,18.25,2
3,5.2,23.4,2
4,7.05,19.9,2


## Save It

save the prediction into `TebakanTugas4ML.csv`

In [10]:
datatest.to_csv('TebakanTugas4ML.csv')