## **Gradient Boosting with large neural networks**

In [208]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

In [209]:
X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=10, random_state=42)

In [210]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [211]:
def create_keras_model():
    model = Sequential()
    model.add(Dense(12, input_dim=20, activation='relu', kernel_regularizer=keras.regularizers.L2(1e-5)))
    model.add(Dense(8, activation='relu', kernel_regularizer=keras.regularizers.L2(1e-5)))
    model.add(Dense(1, activation='linear'))

    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy'])
    return model

# Custom wrapper for Keras model to handle sample weights
class KerasSampleWeightWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, build_fn, epochs=10, batch_size=10, verbose=1):
        self.build_fn = build_fn
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.model_ = None
        self.classes_ = []
        self.n_classes_ = 0

    def fit(self, X, y, sample_weight=None):
        self.classes_ = np.unique(y)
        self.n_classes_ = len(self.classes_)
        self.model_ = self.build_fn()
        sample_weight = sample_weight.reshape(sample_weight.shape[0], -1)[:, 0]
        self.model_.fit(X, y, sample_weight=sample_weight.reshape(-1, 1), epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose)
        self.model_.evaluate(X_train, y_train)
        return self

    def predict(self, X):
        pred_probs = self.model_.predict(X).reshape(-1)
        #return pred_probs
        return (pred_probs > 0.5).astype('int32')

    def predict_proba(self, X):
        pred_probs = self.model_.predict(X, verbose = 0)
        return np.hstack([1 - pred_probs, pred_probs])

keras_model = KerasSampleWeightWrapper(create_keras_model, verbose= 0, epochs=50)

In [212]:
ada_model = AdaBoostClassifier(estimator=keras_model, n_estimators=5, learning_rate=1.0, algorithm='SAMME')

# Train the AdaBoost model
ada_model.fit(X_train, y_train)



In [218]:
y_pred = ada_model.predict(X_train)

accuracy = accuracy_score(y_train, y_pred)
print(f'Train Accuracy: {accuracy * 100:.2f}%')

Train Accuracy: 92.57%


In [219]:
y_pred = ada_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 87.67%


## **Comparison with large NN**

In [227]:
model = Sequential()
model.add(Dense(24, input_dim=20, activation='relu', kernel_regularizer=keras.regularizers.L2(1e-4)))  # Input dimension should match your dataset's features
model.add(Dense(16, activation='relu', kernel_regularizer=keras.regularizers.L2(1e-4)))
model.add(Dense(6, activation='relu', kernel_regularizer=keras.regularizers.L2(1e-4)))
model.add(Dense(1, activation='linear'))  # Binary classification

# Compile model
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy'])

In [228]:
model.fit(X_train, y_train, epochs=20, batch_size=10)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2a89bbc5330>

In [230]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Accuracy: 85.33%


Conclusion: Slight improvements. May not be worth the computational expenses for this particular dataset