# Bagging

In [1]:
import imblearn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data = pd.read_csv("pca_data.csv")

In [3]:
data.drop(data.columns[0], axis = 1, inplace = True)

In [4]:
data.head()

Unnamed: 0,pca1,pca2,pca3,Attr5,Attr6,Attr9,Attr15,Attr17,Attr18,Attr20,...,Attr47,Attr54,Attr55,Attr57,Attr59,Attr60,Attr61,Attr63,Attr64,class
0,-0.736027,-0.348747,-0.05804,-0.007032,-0.00403,-0.055249,0.03247,-0.029024,-0.129252,-0.103675,...,-0.051149,-0.045553,-0.058091,-0.031549,-0.016588,-0.046548,-0.024825,-0.067129,-0.088498,0
1,-1.346851,3.477502,-0.117813,-0.007454,-0.235543,-0.05178,-0.036294,0.006887,1.031123,-0.183374,...,-0.066955,0.005472,-0.113787,0.099133,-0.028335,-0.048775,0.012525,-0.033076,-0.090512,0
2,-0.179147,-2.537603,-0.033529,-0.007016,0.001522,-0.024981,-0.041462,-0.033089,-0.759637,-0.183374,...,-0.066955,-0.042601,-0.111453,-0.187408,-0.022549,-0.049574,-0.026057,-0.09724,-0.081367,0
3,-1.168294,0.404212,-0.067943,0.000447,0.053798,-0.051257,-0.032966,-0.021344,0.12272,-0.061746,...,-0.04148,-0.020404,2.419677,0.003994,-0.028335,-0.048031,-0.017621,0.005353,-0.070075,0
4,-0.821225,1.156009,-0.089037,-0.012538,0.129818,-0.072841,-0.032875,-0.027827,0.322233,-0.046371,...,-0.027693,-0.043371,-0.113987,0.036589,-0.020736,-0.048348,-0.021583,-0.065824,-0.087455,0


In [5]:
from sklearn.model_selection import train_test_split
X = data.iloc[:, :-1]
y = data["class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 24)

In [6]:
from sklearn.ensemble import BaggingClassifier
from imblearn.ensemble import BalancedBaggingClassifier
from imblearn.metrics import classification_report_imbalanced

In [7]:
bagging = BaggingClassifier(random_state = 54)
balanced_bagging = BalancedBaggingClassifier(random_state = 24)

In [8]:
bagging.fit(X_train, y_train)
balanced_bagging.fit(X_train, y_train)

BalancedBaggingClassifier(base_estimator=None, bootstrap=True,
                          bootstrap_features=False, max_features=1.0,
                          max_samples=1.0, n_estimators=10, n_jobs=None,
                          oob_score=False, random_state=24, replacement=False,
                          sampling_strategy='auto', verbose=0,
                          warm_start=False)

In [9]:
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score

In [11]:
b_pred_y = bagging.predict(X_test)
bb_pred_y = balanced_bagging.predict(X_test)

In [13]:
print("Bagging F1 : %.3f" % f1_score(y_test, b_pred_y, average = 'micro'))
print("Bagging ROC AUC : %.3f" % roc_auc_score(y_test, b_pred_y))

Bagging F1 : 0.936
Bagging ROC AUC : 0.549


In [15]:
print("Balanced Bagging F1 : %.3f" % f1_score(y_test, bb_pred_y, average = 'micro'))
print("Balanced Bagging ROC AUC : %.3f" % roc_auc_score(y_test, bb_pred_y))

Balanced Bagging F1 : 0.801
Balanced Bagging ROC AUC : 0.717
