In [1]:
import numpy as np

class Node:
    def __init__(self, feature_index=None, threshold=None, 
                 left=None, right=None, value=None):
        self.feature_index = feature_index  # Feature index for splitting
        self.threshold = threshold          # Threshold for splitting
        self.left = left                    # Left child node
        self.right = right                  # Right child node
        self.value = value                  # Prediction value if leaf node

class DecisionTree:
    def __init__(self, max_depth=None, min_samples_leaf=1):
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.root = None

    def fit(self, X, y):
        self.root = self._build_tree(X, y, depth=0)

    def _build_tree(self, X, y, depth):
        n_samples, n_features = X.shape

        if n_samples < self.min_samples_leaf or depth >= self.max_depth:
            return Node(value=np.mean(y))  # Create leaf with average target

        best_split = self._find_best_split(X, y)

        if best_split is None:  # No informative split found
            return Node(value=np.mean(y))

        left_idx, right_idx = self._split_data(X, best_split)
        left_child = self._build_tree(X[left_idx], y[left_idx], depth + 1)
        right_child = self._build_tree(X[right_idx], y[right_idx], depth + 1)
        return Node(**best_split, left=left_child, right=right_child)

    def _find_best_split(self, X, y):
        # (Implementation using variance reduction; placeholder for brevity)
        pass 

    def _split_data(self, X, split):
        # (Implementation to split data based on split['feature_index'] and split['threshold'])
        pass 

    def predict(self, X):
        predictions = np.zeros(X.shape[0])
        for i, x in enumerate(X):
            node = self.root
            while node.value is None:  # Traverse until leaf node
                if x[node.feature_index] <= node.threshold:
                    node = node.left
                else:
                    node = node.right
            predictions[i] = node.value
        return predictions

class GBM:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3, min_samples_leaf=1):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        self.trees = []

    def fit(self, X, y):
        residuals = y.copy()
        for _ in range(self.n_estimators):
            tree = DecisionTree(max_depth=self.max_depth, min_samples_leaf=self.min_samples_leaf)
            tree.fit(X, residuals)
            self.trees.append(tree)
            residuals -= self.learning_rate * tree.predict(X)

    def predict(self, X):
        predictions = np.zeros_like(X[:, 0])
        for tree in self.trees:
            predictions += self.learning_rate * tree.predict(X)
        return predictions


In [3]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


# Load dataset
boston = fetch_california_housing()
X = boston.data
y = boston.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the GBM model
model = GBM(n_estimators=50, learning_rate=0.1, max_depth=3, min_samples_leaf=5)  
model.fit(X_train, y_train)

# Make predictions on test data
y_pred = model.predict(X_test)

# Evaluate performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 1.3104482346395718


In [4]:
import numpy as np

class DecisionStump:
    def __init__(self):
        self.threshold = None
        self.feature_index = None
        self.alpha = None
        self.polarity = 1

    def predict(self, X):
        n_samples = X.shape[0]
        X_column = X[:, self.feature_index]
        predictions = np.ones(n_samples)
        if self.polarity == 1:
            predictions[X_column < self.threshold] = -1
        else:
            predictions[X_column > self.threshold] = -1
        return predictions
def exponential_loss(y_true, y_pred):
    return np.exp(-y_true * y_pred)
class GBM:
    def __init__(self, n_estimators=5, learning_rate=1.0):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.trees = []

    def fit(self, X, y):
        y_pred = np.zeros(np.shape(y))
        for _ in range(self.n_estimators):
            tree = DecisionStump()
            min_error = float('inf')
            for feature_i in range(X.shape[1]):
                X_column = X[:, feature_i]
                thresholds = np.unique(X_column)
                for threshold in thresholds:
                    p = 1
                    prediction = np.ones(np.shape(y))
                    prediction[X_column < threshold] = -1
                    error = sum(exponential_loss(y, prediction))
                    if error > 0.5:
                        error = 1 - error
                        p = -1
                    if error < min_error:
                        tree.polarity = p
                        tree.threshold = threshold
                        tree.feature_index = feature_i
                        min_error = error
            tree.alpha = 0.5 * np.log((1.0 - min_error) / (min_error + 1e-10))
            y_pred += tree.alpha * tree.predict(X)
            self.trees.append(tree)

    def predict(self, X):
        y_pred = np.array([tree.alpha * tree.predict(X) for tree in self.trees])
        y_pred = np.sum(y_pred, axis=0)
        y_pred = np.sign(y_pred)
        return y_pred
def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

def preprocess_data():
    data = load_iris()
    X = data.data
    y = data.target
    y = np.where(y == 0, -1, 1)  # Convert to binary

    return train_test_split(X, y, test_size=0.2, random_state=42)

X_train, X_test, y_train, y_test = preprocess_data()
gbm = GBM(n_estimators=10, learning_rate=0.5)
gbm.fit(X_train, y_train)
y_pred = gbm.predict(X_test)
print(f'Accuracy: {accuracy(y_test, y_pred)}')


Accuracy: 0.0


  tree.alpha = 0.5 * np.log((1.0 - min_error) / (min_error + 1e-10))
