In [3]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score

# Load Breast Cancer Dataset
cancer = load_breast_cancer()
X_cancer = cancer.data
y_cancer = cancer.target

# Number of models in the ensemble
n_models = 5

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_cancer, y_cancer, test_size=0.3, random_state=42)

# Train multiple Random Forest models
models = []
for i in range(n_models):
    model = RandomForestClassifier(n_estimators=100, random_state=42 + i)
    model.fit(X_train, y_train)
    models.append(model)

# Get predictions from each model
predictions = np.zeros((X_test.shape[0], n_models))
for i, model in enumerate(models):
    predictions[:, i] = model.predict(X_test)

# Majority voting
final_predictions = np.apply_along_axis(lambda x: np.bincount(x.astype(int)).argmax(), axis=1, arr=predictions)

# Calculate accuracy
accuracy = accuracy_score(y_test, final_predictions)
print(f"Breast Cancer Dataset - Ensemble Random Forest Accuracy: {accuracy * 100:.2f}%")


Breast Cancer Dataset - Ensemble Random Forest Accuracy: 96.49%
