In [2]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from typing import Union, Literal

class Bagging:
    """
    A Bagging ensemble that can be used for both classification and regression.
    """
    def __init__(
        self,
        n_estimators: int = 10,
        max_samples: float = 0.8,
        task: Literal['classification', 'regression'] = 'classification',
        random_state: int = None
    ):
        """
        Initialize the Bagging ensemble.
        
        Args:
            n_estimators: Number of base estimators
            max_samples: Fraction of samples to use for each estimator
            task: Either 'classification' or 'regression'
            random_state: Random seed for reproducibility
        """
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.task = task
        self.random_state = random_state
        self.estimators = []
        
        # Set base estimator based on task
        if task == 'classification':
            self.base_estimator = DecisionTreeClassifier(random_state=random_state)
        else:
            self.base_estimator = DecisionTreeRegressor(random_state=random_state)
            
        if random_state is not None:
            np.random.seed(random_state)
    
    def _bootstrap_sample(self, X: np.ndarray, y: np.ndarray) -> tuple:
        """Generate a bootstrap sample of the data."""
        n_samples = int(X.shape[0] * self.max_samples)
        indices = np.random.choice(X.shape[0], size=n_samples, replace=True)
        return X[indices], y[indices]
    
    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        """
        Fit the bagging ensemble.
        
        Args:
            X: Training features
            y: Training target
        """
        self.estimators = []
        
        for _ in range(self.n_estimators):
            # Create bootstrap sample
            X_bootstrap, y_bootstrap = self._bootstrap_sample(X, y)
            
            # Create and train new estimator
            if self.task == 'classification':
                estimator = DecisionTreeClassifier(random_state=self.random_state)
            else:
                estimator = DecisionTreeRegressor(random_state=self.random_state)
            
            estimator.fit(X_bootstrap, y_bootstrap)
            self.estimators.append(estimator)
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Make predictions using the bagging ensemble.
        
        Args:
            X: Features to predict
            
        Returns:
            Predictions (class labels for classification, mean predictions for regression)
        """
        predictions = np.array([estimator.predict(X) for estimator in self.estimators])
        
        if self.task == 'classification':
            # Mode for classification
            return np.apply_along_axis(
                lambda x: np.bincount(x.astype(int)).argmax(),
                axis=0,
                arr=predictions
            )
        else:
            # Mean for regression
            return np.mean(predictions, axis=0)

# Example usage with California Housing dataset
def demonstrate_bagging():
    # Load and prepare data
    data = fetch_california_housing()
    X, y = data.data, data.target
    
    # Use a subset of data for faster execution
    n_samples = 5000
    indices = np.random.choice(len(X), n_samples, replace=False)
    X, y = X[indices], y[indices]
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Create and train bagging regressor
    bagging = Bagging(
        n_estimators=10,
        max_samples=0.8,
        task='regression',
        random_state=42
    )
    bagging.fit(X_train_scaled, y_train)
    
    # Make predictions
    y_pred = bagging.predict(X_test_scaled)
    
    # Calculate MSE
    mse = np.mean((y_test - y_pred) ** 2)
    
    print(f"Mean Squared Error: {mse:.4f}")
    
    # Calculate R-squared
    ss_tot = np.sum((y_test - np.mean(y_test)) ** 2)
    ss_res = np.sum((y_test - y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    
    print(f"R-squared Score: {r2:.4f}")

if __name__ == "__main__":
    demonstrate_bagging()

Mean Squared Error: 0.4026
R-squared Score: 0.7081
