# ENSEMBLE MODEL

## 1. Classifier Case

In [1]:
import numpy as np
import pandas as pd
import sys
sys.path.append("../")

from src.tree import DecisionTreeClassifier
from src.ensemble import BaggingClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
def load_data(filename):
    return pd.read_csv(filename, sep = ";")

def split_input_output(data, target_column):
    X = data.drop(columns=[target_column])
    y = data[target_column]

    return X, y

def split_train_test(X, y, test_size, random_state=42):
    return train_test_split(X, y,
                            test_size = test_size,
                            stratify = y,
                            random_state = random_state)

In [3]:
# PREPARE THE DATA
# -----------------
# Load data
filename = "../../data/raw/bank.csv"
data = load_data(filename)

# Filter data
data = data[["age", "balance", "duration", "day", "y"]]
data["y"] = np.where(data["y"] == "no", -1, 1)

# Split data
X, y = split_input_output(data, "y")
X_train, X_test, y_train, y_test = split_train_test(X, y, test_size=0.2)
X_train, X_valid, y_train, y_valid = split_train_test(X_train, y_train, test_size=0.2)

In [4]:
# TREE MODELING
# -------------
# Create Decision Tree Classifier
clf_tree = DecisionTreeClassifier()
clf_tree.fit(X_train, y_train)

# Predict the tree
y_pred_train_tree = clf_tree.predict(X_train)
y_pred_valid_tree = clf_tree.predict(X_valid)

acc_train_tree = accuracy_score(y_train, y_pred_train_tree)
acc_valid_tree = accuracy_score(y_valid, y_pred_valid_tree)

print("Decision Tree")
print("-------------")
print(f"acc. train  : {acc_train_tree*100:.2f}%")
print(f"acc. valid  : {acc_valid_tree*100:.2f}%")
print("")

Decision Tree
-------------
acc. train  : 100.00%
acc. valid  : 82.73%



In [5]:
# BAGGING MODELING
# -------------
# Create Bagging Classifier
clf_bagging = BaggingClassifier(estimator = DecisionTreeClassifier(),
                                n_estimators = 10,
                                random_state = 42)
clf_bagging.fit(X_train, y_train)

# Predict the tree
y_pred_train_bagging = clf_bagging.predict(X_train)
y_pred_valid_bagging = clf_bagging.predict(X_valid)

acc_train_bagging = accuracy_score(y_train, y_pred_train_bagging)
acc_valid_bagging = accuracy_score(y_valid, y_pred_valid_bagging)

print("Bagging Tree")
print("-------------")
print(f"acc. train  : {acc_train_bagging*100:.2f}%")
print(f"acc. valid  : {acc_valid_bagging*100:.2f}%")
print("")

4 4
Bagging Tree
-------------
acc. train  : 98.55%
acc. valid  : 87.29%



## 2. Regression Case