In [None]:
# Voting Ensemble Classifier is an ensemble learning technique that combines multiple individual 
# classifiers (base models) and aggregates their predictions to make a final prediction. 
# It can be used for both classification and regression tasks. The Voting Ensemble Classifier can 
# improve the overall prediction accuracy by leveraging the diversity of different models and 
# their collective decision-making.





# There are two main types of Voting Ensemble Classifiers: Hard Voting and Soft Voting.

# Hard Voting: In hard voting, each base model in the ensemble predicts the class label, 
# and the majority class label is selected as the final prediction.

# Soft Voting: In soft voting, each base model predicts the class probabilities, and the average probabilities 
# across all models are calculated. The class label with the highest average probability is selected as the final prediction.



In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier


In [4]:
from sklearn.datasets import make_blobs

# Generate a random dataset with 2 classes
X, y = make_blobs(n_samples=100000, centers=2, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
model1 = RandomForestClassifier()
model2 = LogisticRegression()
model3 = KNeighborsClassifier()


voting_classifier = VotingClassifier(
    estimators=[('rf', model1), 
    ('lr', model2), 
    ('knn', model3)], voting='hard')


params = {'rf__n_estimators': [100, 200, 300],
          'knn__n_neighbors': [3, 5, 7],
          'knn__weights': ['uniform', 'distance']}

grid_search = GridSearchCV(estimator=voting_classifier, param_grid=params, cv=5)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
print(best_model)

print(grid_search.best_params_)


VotingClassifier(estimators=[('rf', RandomForestClassifier()),
                             ('lr', LogisticRegression()),
                             ('knn', KNeighborsClassifier(n_neighbors=3))])
{'knn__n_neighbors': 3, 'knn__weights': 'uniform', 'rf__n_estimators': 100}


In [6]:
y_pred = best_model.predict(X_test)


In [8]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report



# Calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate confusion matrix
confusion_mat = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(confusion_mat)

# Generate classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 1.0
Confusion Matrix:
[[ 9920     0]
 [    0 10080]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9920
           1       1.00      1.00      1.00     10080

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000



In [None]:
        #              +-------------------+
        #              |                   |
        # +------------>    Base Models    |
        # |            |                   |
        # |            +--------+----------+
        # |                     |
        # |           +---------v--------+
        # |           |                  |
        # +----------->  Voting Ensemble |
        #             |    Classifier    |
        #             |                  |
        #             +------------------+


# the base models make predictions, and the Voting Ensemble 
# Classifier combines their predictions to make the final prediction.
