# **Practicum - 2**
*Boosting*

## Boosting with AdaBoost

### Import Library

In [1]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier # import DT
from sklearn.ensemble import AdaBoostClassifier # import AdaBoost
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder # encoding label

### Data Preparation 

In [2]:
# Load data
df = pd.read_csv('data/iris.csv')

df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
# Check null column
df.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [4]:
# Seleksi fitur
X = df.iloc[:,2:-1]
y = df['Species']

# encode label
ec = LabelEncoder()
y = ec.fit_transform(y)

# Cek jumlah fitur dan instance
print(X.shape)

# Cek label
print(y)

(150, 3)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


### Split data training dan testing

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

### Training Desicion Tree

In [6]:
# By default, scikit-learn's DecisionTreeClassifier will use the "Gini" value for the criterion
# In this case we will use the default parameters
dt = DecisionTreeClassifier()

# Adjust dt to training set
dt.fit(X_train, y_train)

# Predicting test set labels
y_pred_dt = dt.predict(X_test)

#  calculate set accuracy
acc_dt = accuracy_score(y_test, y_pred_dt)
print("Test set accuracy: {:.2f}".format(acc_dt))
print(f"Test set accuracy: {acc_dt}")

Test set accuracy: 0.97
Test set accuracy: 0.9666666666666667


### Training AdaBoost

In [16]:
# In this case we will use the estimator on AdaBoost

# Instantiate AdaBoostClassifier with SAMME algorithm
ada = AdaBoostClassifier(n_estimators=2, algorithm='SAMME')

# Instantiate AdaBoostClassifier with SAMME.R algorithm
ada2 = AdaBoostClassifier(n_estimators=2, algorithm='SAMME.R')

# Adjust dt to training set
ada.fit(X_train, y_train)
ada2.fit(X_train, y_train)

# Predicting test set labels
y_pred_ada = ada.predict(X_test)
y_pred_ada2 = ada2.predict(X_test)

#  calculate set accuracy
acc_ada = accuracy_score(y_test, y_pred_ada)
acc_ada2 = accuracy_score(y_test, y_pred_ada2)

print("SAMME Algorithm")
print("Test set accuracy: {:.2f}".format(acc_ada))
print(f"Test set accuracy: {acc_ada}")
print("---------------------------------------")
print("SAMME.R Algorithm")
print("Test set accuracy: {:.2f}".format(acc_ada2))
print(f"Test set accuracy: {acc_ada2}")


SAMME Algorithm
Test set accuracy: 0.80
Test set accuracy: 0.8
---------------------------------------
SAMME.R Algorithm
Test set accuracy: 0.97
Test set accuracy: 0.9666666666666667


