In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Generate a random dataset
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Further split the training data into training and holdout sets for blending
X_train_base, X_holdout, y_train_base, y_holdout = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Define base models
base_models = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('gb', GradientBoostingClassifier(n_estimators=100, random_state=42)),
    ('svc', SVC(probability=True, random_state=42))
]

# Train base models and predict on holdout set
holdout_predictions = np.zeros((X_holdout.shape[0], len(base_models)))
for i, (name, model) in enumerate(base_models):
    model.fit(X_train_base, y_train_base)
    holdout_predictions[:, i] = model.predict_proba(X_holdout)[:, 1]

# Train the meta-model using the holdout predictions
meta_model = LogisticRegression()
meta_model.fit(holdout_predictions, y_holdout)

# Predict on the test set using base models and meta-model
test_predictions = np.zeros((X_test.shape[0], len(base_models)))
for i, (name, model) in enumerate(base_models):
    test_predictions[:, i] = model.predict_proba(X_test)[:, 1]

# Meta-model final prediction
final_predictions = meta_model.predict(test_predictions)

# Evaluate the model
accuracy = accuracy_score(y_test, final_predictions)
print(f"Model accuracy: {accuracy:.4f}")


Model accuracy: 0.8900
