# Model Ensemble & Weighted Voting
This notebook implements ensemble methods to combine multiple models for improved performance.

## Why Ensemble Methods?
- âœ… **Reduced Variance**: Different models make different errors
- âœ… **Improved Accuracy**: Wisdom of crowds effect
- âœ… **Better Generalization**: Less prone to overfitting
- âœ… **Robustness**: More reliable predictions
- âœ… **Confidence Estimation**: Agreement between models

## Ensemble Techniques Implemented:
1. **Simple Averaging**: Equal weight to all models
2. **Weighted Averaging**: Weight by validation performance
3. **Voting**: Majority vote for classification
4. **Stacking**: Meta-learner on top of base models
5. **K-Fold Ensemble**: Combine models from different folds

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow import keras

print(f"TensorFlow version: {tf.__version__}")
%matplotlib inline

## 1. Simple Averaging Ensemble

In [None]:
def simple_averaging_ensemble(models, X, threshold=0.5):
    """
    Ensemble using simple averaging of predictions
    
    Parameters:
    -----------
    models : list
        List of trained Keras models
    X : array
        Input data to predict on
    threshold : float
        Decision threshold for classification
    
    Returns:
    --------
    y_pred_proba : array
        Averaged prediction probabilities
    y_pred : array
        Binary predictions
    """
    predictions = []
    
    for model in models:
        pred = model.predict(X, verbose=0)
        predictions.append(pred.flatten())
    
    # Average predictions
    y_pred_proba = np.mean(predictions, axis=0)
    y_pred = (y_pred_proba > threshold).astype(int)
    
    return y_pred_proba, y_pred

print("âœ… Simple averaging ensemble defined")

## 2. Weighted Averaging Ensemble

In [None]:
def weighted_averaging_ensemble(models, X, weights=None, threshold=0.5):
    """
    Ensemble using weighted averaging of predictions
    
    Parameters:
    -----------
    models : list
        List of trained Keras models
    X : array
        Input data to predict on
    weights : array or None
        Weights for each model (should sum to 1)
        If None, uses equal weights
    threshold : float
        Decision threshold for classification
    
    Returns:
    --------
    y_pred_proba : array
        Weighted averaged prediction probabilities
    y_pred : array
        Binary predictions
    """
    predictions = []
    
    for model in models:
        pred = model.predict(X, verbose=0)
        predictions.append(pred.flatten())
    
    predictions = np.array(predictions)
    
    # Use equal weights if not provided
    if weights is None:
        weights = np.ones(len(models)) / len(models)
    else:
        weights = np.array(weights)
        weights = weights / weights.sum()  # Normalize
    
    # Weighted average
    y_pred_proba = np.average(predictions, axis=0, weights=weights)
    y_pred = (y_pred_proba > threshold).astype(int)
    
    return y_pred_proba, y_pred

print("âœ… Weighted averaging ensemble defined")

## 3. Voting Ensemble

In [None]:
def voting_ensemble(models, X, threshold=0.5, voting_threshold=0.5):
    """
    Ensemble using majority voting
    
    Parameters:
    -----------
    models : list
        List of trained Keras models
    X : array
        Input data to predict on
    threshold : float
        Decision threshold for individual model predictions
    voting_threshold : float
        Fraction of models that must agree for positive prediction
    
    Returns:
    --------
    y_pred : array
        Binary predictions based on voting
    vote_confidence : array
        Fraction of models voting for positive class
    """
    predictions = []
    
    for model in models:
        pred = model.predict(X, verbose=0)
        pred_binary = (pred.flatten() > threshold).astype(int)
        predictions.append(pred_binary)
    
    predictions = np.array(predictions)
    
    # Calculate voting confidence (fraction of models voting positive)
    vote_confidence = np.mean(predictions, axis=0)
    
    # Final prediction based on voting threshold
    y_pred = (vote_confidence >= voting_threshold).astype(int)
    
    return y_pred, vote_confidence

print("âœ… Voting ensemble defined")

## 4. Stacking Ensemble

In [None]:
class StackingEnsemble:
    """
    Stacking ensemble with meta-learner
    
    Uses base model predictions as features for a meta-learner
    """
    
    def __init__(self, base_models, meta_learner=None):
        """
        Parameters:
        -----------
        base_models : list
            List of trained Keras models
        meta_learner : sklearn classifier or None
            Meta-learner model. If None, uses LogisticRegression
        """
        self.base_models = base_models
        self.meta_learner = meta_learner if meta_learner else LogisticRegression(max_iter=1000)
    
    def get_base_predictions(self, X):
        """
        Get predictions from all base models
        """
        predictions = []
        for model in self.base_models:
            pred = model.predict(X, verbose=0)
            predictions.append(pred.flatten())
        return np.column_stack(predictions)
    
    def fit(self, X_train, y_train):
        """
        Train meta-learner on base model predictions
        """
        # Get base model predictions
        base_predictions = self.get_base_predictions(X_train)
        
        # Train meta-learner
        self.meta_learner.fit(base_predictions, y_train)
        print(f"âœ… Meta-learner trained on {len(self.base_models)} base model predictions")
        return self
    
    def predict_proba(self, X):
        """
        Predict probabilities using stacked ensemble
        """
        base_predictions = self.get_base_predictions(X)
        
        if hasattr(self.meta_learner, 'predict_proba'):
            return self.meta_learner.predict_proba(base_predictions)[:, 1]
        else:
            return self.meta_learner.predict(base_predictions)
    
    def predict(self, X, threshold=0.5):
        """
        Predict classes using stacked ensemble
        """
        proba = self.predict_proba(X)
        return (proba > threshold).astype(int)

print("âœ… Stacking ensemble defined")

## 5. Calculate Optimal Weights

In [None]:
def calculate_optimal_weights(models, X_val, y_val, metric='accuracy'):
    """
    Calculate optimal weights for weighted ensemble based on validation performance
    
    Parameters:
    -----------
    models : list
        List of trained models
    X_val, y_val : arrays
        Validation data
    metric : str
        Metric to use for weighting ('accuracy', 'f1', 'roc_auc')
    
    Returns:
    --------
    weights : array
        Normalized weights for each model
    scores : array
        Individual model scores
    """
    scores = []
    
    for i, model in enumerate(models):
        y_pred_proba = model.predict(X_val, verbose=0).flatten()
        y_pred = (y_pred_proba > 0.5).astype(int)
        
        if metric == 'accuracy':
            score = accuracy_score(y_val, y_pred)
        elif metric == 'f1':
            score = f1_score(y_val, y_pred)
        elif metric == 'roc_auc':
            score = roc_auc_score(y_val, y_pred_proba)
        else:
            raise ValueError(f"Unknown metric: {metric}")
        
        scores.append(score)
        print(f"Model {i+1} {metric}: {score:.4f}")
    
    # Convert scores to weights (softmax-like)
    scores = np.array(scores)
    weights = scores / scores.sum()
    
    print(f"\nâœ… Calculated weights based on {metric}:")
    for i, (score, weight) in enumerate(zip(scores, weights)):
        print(f"   Model {i+1}: score={score:.4f}, weight={weight:.4f}")
    
    return weights, scores

print("âœ… Weight calculation function defined")

## 6. Comprehensive Ensemble Evaluation

In [None]:
def evaluate_ensemble_methods(models, X_test, y_test, weights=None):
    """
    Compare different ensemble methods
    
    Parameters:
    -----------
    models : list
        List of trained models
    X_test, y_test : arrays
        Test data
    weights : array or None
        Weights for weighted averaging
    
    Returns:
    --------
    results : dict
        Dictionary with results for each ensemble method
    """
    results = {}
    
    print(f"\n{'='*70}")
    print(f"Evaluating Ensemble Methods on {len(models)} models")
    print(f"{'='*70}\n")
    
    # 1. Individual models
    print("ðŸ“Š Individual Model Performance:")
    for i, model in enumerate(models):
        y_pred_proba = model.predict(X_test, verbose=0).flatten()
        y_pred = (y_pred_proba > 0.5).astype(int)
        
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        auc = roc_auc_score(y_test, y_pred_proba)
        
        results[f'model_{i+1}'] = {
            'accuracy': acc,
            'f1_score': f1,
            'roc_auc': auc,
            'y_pred': y_pred,
            'y_pred_proba': y_pred_proba
        }
        print(f"   Model {i+1}: Acc={acc:.4f}, F1={f1:.4f}, AUC={auc:.4f}")
    
    # 2. Simple averaging
    print("\nðŸ“Š Simple Averaging Ensemble:")
    y_pred_proba, y_pred = simple_averaging_ensemble(models, X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred_proba)
    results['simple_avg'] = {
        'accuracy': acc,
        'f1_score': f1,
        'roc_auc': auc,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba
    }
    print(f"   Acc={acc:.4f}, F1={f1:.4f}, AUC={auc:.4f}")
    
    # 3. Weighted averaging
    if weights is not None:
        print("\nðŸ“Š Weighted Averaging Ensemble:")
        y_pred_proba, y_pred = weighted_averaging_ensemble(models, X_test, weights)
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        auc = roc_auc_score(y_test, y_pred_proba)
        results['weighted_avg'] = {
            'accuracy': acc,
            'f1_score': f1,
            'roc_auc': auc,
            'y_pred': y_pred,
            'y_pred_proba': y_pred_proba
        }
        print(f"   Acc={acc:.4f}, F1={f1:.4f}, AUC={auc:.4f}")
    
    # 4. Voting
    print("\nðŸ“Š Voting Ensemble:")
    y_pred, vote_conf = voting_ensemble(models, X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, vote_conf)
    results['voting'] = {
        'accuracy': acc,
        'f1_score': f1,
        'roc_auc': auc,
        'y_pred': y_pred,
        'y_pred_proba': vote_conf
    }
    print(f"   Acc={acc:.4f}, F1={f1:.4f}, AUC={auc:.4f}")
    
    print(f"\n{'='*70}\n")
    
    return results

print("âœ… Ensemble evaluation function defined")

## 7. Visualization: Ensemble Comparison

In [None]:
def plot_ensemble_comparison(results, save_path=None):
    """
    Visualize comparison of ensemble methods
    """
    # Prepare data
    methods = []
    accuracies = []
    f1_scores = []
    aucs = []
    
    for method, metrics in results.items():
        methods.append(method.replace('_', ' ').title())
        accuracies.append(metrics['accuracy'])
        f1_scores.append(metrics['f1_score'])
        aucs.append(metrics['roc_auc'])
    
    # Create plot
    x = np.arange(len(methods))
    width = 0.25
    
    fig, ax = plt.subplots(figsize=(14, 7))
    
    bars1 = ax.bar(x - width, accuracies, width, label='Accuracy', alpha=0.8)
    bars2 = ax.bar(x, f1_scores, width, label='F1-Score', alpha=0.8)
    bars3 = ax.bar(x + width, aucs, width, label='ROC-AUC', alpha=0.8)
    
    ax.set_xlabel('Method', fontsize=12, fontweight='bold')
    ax.set_ylabel('Score', fontsize=12, fontweight='bold')
    ax.set_title('Ensemble Methods Comparison', fontsize=14, fontweight='bold')
    ax.set_xticks(x)
    ax.set_xticklabels(methods, rotation=45, ha='right')
    ax.legend(fontsize=11)
    ax.set_ylim([0, 1.05])
    ax.grid(True, alpha=0.3, axis='y')
    
    # Add value labels
    def add_labels(bars):
        for bar in bars:
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height,
                   f'{height:.3f}',
                   ha='center', va='bottom', fontsize=8)
    
    add_labels(bars1)
    add_labels(bars2)
    add_labels(bars3)
    
    plt.tight_layout()
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"âœ… Comparison plot saved to {save_path}")
    
    plt.show()

print("âœ… Comparison plotting function defined")

## 8. ROC Curve Comparison

In [None]:
def plot_roc_curves_comparison(results, y_test, save_path=None):
    """
    Plot ROC curves for all ensemble methods
    """
    plt.figure(figsize=(10, 8))
    
    colors = plt.cm.tab10(np.linspace(0, 1, len(results)))
    
    for (method, metrics), color in zip(results.items(), colors):
        y_pred_proba = metrics['y_pred_proba']
        fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
        auc_score = metrics['roc_auc']
        
        plt.plot(fpr, tpr, color=color, lw=2, 
                label=f'{method.replace("_", " ").title()} (AUC = {auc_score:.3f})')
    
    plt.plot([0, 1], [0, 1], 'k--', lw=2, label='Random Classifier')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontsize=12, fontweight='bold')
    plt.ylabel('True Positive Rate', fontsize=12, fontweight='bold')
    plt.title('ROC Curves - Ensemble Methods Comparison', fontsize=14, fontweight='bold')
    plt.legend(loc='lower right', fontsize=10)
    plt.grid(True, alpha=0.3)
    
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        print(f"âœ… ROC curves saved to {save_path}")
    
    plt.show()

print("âœ… ROC curve plotting function defined")

## 9. Example Usage Template

In [None]:
# Example usage (uncomment to use):
# 
# # Assuming you have trained models from K-Fold CV
# # models = [...]
# # X_val, y_val = ...
# # X_test, y_test = ...
# 
# # Calculate optimal weights
# weights, scores = calculate_optimal_weights(models, X_val, y_val, metric='roc_auc')
# 
# # Evaluate all ensemble methods
# results = evaluate_ensemble_methods(models, X_test, y_test, weights=weights)
# 
# # Visualize results
# plot_ensemble_comparison(results, save_path='ensemble_comparison.png')
# plot_roc_curves_comparison(results, y_test, save_path='ensemble_roc_curves.png')
# 
# # Use stacking ensemble
# stacking = StackingEnsemble(models)
# stacking.fit(X_val, y_val)
# y_pred_stack = stacking.predict(X_test)
# print(f"Stacking accuracy: {accuracy_score(y_test, y_pred_stack):.4f}")

print("\n" + "="*70)
print("âœ… Model Ensemble utilities loaded successfully!")
print("="*70)
print("\nAvailable functions:")
print("  - simple_averaging_ensemble(models, X, threshold)")
print("  - weighted_averaging_ensemble(models, X, weights, threshold)")
print("  - voting_ensemble(models, X, threshold, voting_threshold)")
print("  - StackingEnsemble(base_models, meta_learner)")
print("  - calculate_optimal_weights(models, X_val, y_val, metric)")
print("  - evaluate_ensemble_methods(models, X_test, y_test, weights)")
print("  - plot_ensemble_comparison(results, save_path)")
print("  - plot_roc_curves_comparison(results, y_test, save_path)")
print("\n" + "="*70)