In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn import datasets
import pandas as pd
import numpy as np

seed = 42
np.random.seed(seed)


In [2]:
# STANDARD DECISION TREE CLASSIFIER EXAMPLE

cancer = datasets.load_breast_cancer()
X = cancer.data
y = cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

clf = DecisionTreeClassifier(random_state=seed)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


Accuracy: 0.95


In [3]:
# BAGGING DECISION TREE CLASSIFIER EXAMPLE
from BaggingDT import create_bags, create_models, get_accuracy


data_raw = datasets.load_breast_cancer()
X = cancer.data
y = cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

bags = create_bags(X_train, y_train, n_bags=10)
models = create_models(bags=bags, n_trees=10, seed=seed)
accuracy = get_accuracy(models=models, X=X_test, y=y_test)

print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.97


In [4]:
from BaggingSA import BaggingSA

data_raw = datasets.load_breast_cancer()
X = cancer.data
y = cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


bagging_sa = BaggingSA(X=X_train, y=y_train, X_test=X_test, y_test=y_test, 
                       T0=1.0, alpha=0.95, max_iterations=100, n_trees=10)

models = bagging_sa.run_simulated_annealing()

accuracy = get_accuracy(models=models, X=X_test, y=y_test)
print(f"Accuracy: {accuracy:.2f}")

Iteration: 0, Temperature: 1.000, Accuracy: 0.93, New Accuracy: 0.93
Iteration: 1, Temperature: 0.950, Accuracy: 0.93, New Accuracy: 0.89
Iteration: 2, Temperature: 0.902, Accuracy: 0.93, New Accuracy: 0.91
Iteration: 3, Temperature: 0.857, Accuracy: 0.93, New Accuracy: 0.90
Iteration: 4, Temperature: 0.815, Accuracy: 0.93, New Accuracy: 0.89
Iteration: 5, Temperature: 0.774, Accuracy: 0.93, New Accuracy: 0.92
Iteration: 6, Temperature: 0.735, Accuracy: 0.93, New Accuracy: 0.91
Iteration: 7, Temperature: 0.698, Accuracy: 0.93, New Accuracy: 0.90
Iteration: 8, Temperature: 0.663, Accuracy: 0.93, New Accuracy: 0.89
Iteration: 9, Temperature: 0.630, Accuracy: 0.93, New Accuracy: 0.92
Iteration: 10, Temperature: 0.599, Accuracy: 0.93, New Accuracy: 0.92
Iteration: 11, Temperature: 0.569, Accuracy: 0.93, New Accuracy: 0.92
Iteration: 12, Temperature: 0.540, Accuracy: 0.93, New Accuracy: 0.92
Iteration: 13, Temperature: 0.513, Accuracy: 0.93, New Accuracy: 0.89
Iteration: 14, Temperature: 0.