# Ensemble Learning

## 1. Gradient Boosting Tree

### 1.1. GBT Regressor

In [1]:
!pip3 install -U ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [2]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score, roc_auc_score
from ucimlrepo import fetch_ucirepo
from sklearn.model_selection import train_test_split
import numpy as np

In [60]:
# Load regression dafatset https://archive.ics.uci.edu/dataset/186/wine+quality
dataset = fetch_ucirepo(id=186)
X = dataset.data.features
y = dataset.data.targets
y = y.values.ravel() # flatten to 1D array

In [61]:
# TODO: Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [62]:
# TODO: Create the GBT Regressor class
class MyGBTRegressor:
    def __init__(self,
                 n_estimators: int = 100,
                 learning_rate: float = 0.1,
                 max_depth: int = 3,
                 random_state: int = 42):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.random_state = random_state

        self.initial_prediction = None
        self.trees = []

    def initialize_prediction(self, y):
        return np.mean(y)

    def compute_negative_gradient(self, y, y_pred):
        return y - y_pred

    def fit(self, X, y):
        self.initial_prediction = self.initialize_prediction(y)
        y_pred = np.full(shape=y.shape[0], fill_value=self.initial_prediction)
        rng = np.random.RandomState(self.random_state)

        for _ in range(self.n_estimators):
            negative_gradient = self.compute_negative_gradient(y, y_pred)

            tree = DecisionTreeRegressor(
                max_depth=self.max_depth,
                random_state=rng.randint(1, 10_000)
            )
            tree.fit(X, negative_gradient)

            y_pred += self.learning_rate * tree.predict(X)
            self.trees.append(tree)

        return self

    def predict(self, X):
        y_pred = np.full(shape=X.shape[0], fill_value=self.initial_prediction)
        for tree in self.trees:
            y_pred += self.learning_rate * tree.predict(X)

        return y_pred

In [66]:
# TODO: Train the GBT Regressor and evaluate its performance
reg = MyGBTRegressor()
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(mse)

0.45092609395715455


In [67]:
# TODO: Compare with sklearn's GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
sk_reg = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
sk_reg.fit(X_train, y_train)
y_pred = sk_reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(mse)

0.4513241201881356


In [68]:
# XGBoost Regressor
xgb_reg = xgb.XGBRegressor(
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    random_state=42
)
xgb_reg.fit(X_train, y_train)
y_pred = xgb_reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(mse)

0.4515293538570404


### 1.2. GBT Classifier

In [70]:
# Load classification dataset https://archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic
dataset = fetch_ucirepo(id=17)
X = dataset.data.features
y = dataset.data.targets
y = y.values.ravel() # flatten to 1D array
y = (y == 'M').astype(int)  # Convert labels to 0 and 1

In [71]:
# TODO: Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.shape, y_train.shape

((398, 30), (398,))

In [72]:
# TODO: Create the GBT Classifier class
class MyGBTClassifier:
    def __init__(self,
                 n_estimators: int = 100,
                 learning_rate: float = 0.1,
                 max_depth: int = 3,
                 random_state: int = 42
    ):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.random_state = random_state

        self.initial_logodds = None
        self.trees = []

    def initialize_prediction(self, y):
        count_positive = np.sum(y)
        count_negative = len(y) - count_positive
        odds = count_positive / count_negative

        return np.log(odds)

    def compute_negative_gradient(self, y, y_pred_proba):
        return y - y_pred_proba

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        self.initial_logodds = self.initialize_prediction(y)
        rng = np.random.RandomState(self.random_state)
        y_pred_logit = np.full(shape=y.shape, fill_value=self.initial_logodds)
        y_pred_proba = self._sigmoid(y_pred_logit)

        for _ in range(self.n_estimators):
            negative_gradient = self.compute_negative_gradient(y, y_pred_proba)

            tree = DecisionTreeRegressor(
                max_depth=self.max_depth,
                random_state=rng.randint(1, 10_000)
            )
            tree.fit(X, negative_gradient)
            update = tree.predict(X)  # logit
            y_pred_logit += self.learning_rate * update
            y_pred_proba = self._sigmoid(y_pred_logit)

            self.trees.append(tree)

        return self

    def predict_logit(self, X):
        y_pred_logit = np.full(X.shape[0], fill_value=self.initial_logodds)
        for tree in self.trees:
            y_pred_logit += self.learning_rate * tree.predict(X)
        return y_pred_logit

    def predict_proba(self, X):
        y_pred_logit = self.predict_logit(X)
        prob_1 = self._sigmoid(y_pred_logit)
        prob_0 = 1 - prob_1
        return np.column_stack((prob_0, prob_1))

    def predict(self, X):
        y_pred_proba = self.predict_proba(X)
        y_pred = (y_pred_proba[:, 1]>=0.5).astype(int)
        return y_pred

In [73]:
# TODO: Train the GBT Classifier and evaluate its performance
clf = MyGBTClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)

acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred_proba[:, 1])
print(f"ACC: {acc}, F1: {f1}, AUC: {auc}")

ACC: 0.9649122807017544, F1: 0.953125, AUC: 0.9895649617871839


In [74]:
# TODO: Compare with sklearn's GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingClassifier

sk_clf =  GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

sk_clf.fit(X_train, y_train)
y_pred = sk_clf.predict(X_test)
y_pred_proba = sk_clf.predict_proba(X_test)

sk_acc = accuracy_score(y_test, y_pred)
sk_f1 = f1_score(y_test, y_pred)
sk_auc = roc_auc_score(y_test, y_pred_proba[:, 1])
print(f"ACC: {sk_acc}, F1: {sk_f1}, AUC: {sk_auc}")

ACC: 0.9590643274853801, F1: 0.944, AUC: 0.9951499118165784


## 2. XGBoost

In [13]:
# TODO: Create XGBoost Regressor and Classifier models and compare their performance with your implementations
!pip install xgboost
import xgboost as xgb



In [76]:
xgb_clf = xgb.XGBClassifier(
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    random_state=42
)
xgb_clf.fit(X_train, y_train)
y_pred = xgb_clf.predict(X_test)
y_pred_proba = xgb_clf.predict_proba(X_test)

xgb_acc = accuracy_score(y_test, y_pred)
xgb_f1 = f1_score(y_test, y_pred)
xgb_auc = roc_auc_score(y_test, y_pred_proba[:, 1])
print(f"ACC: {xgb_acc}, F1: {xgb_f1}, AUC: {xgb_auc}")

ACC: 0.9532163742690059, F1: 0.9354838709677419, AUC: 0.9948559670781894
