In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier,
VotingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [None]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indiansdiabetes.data.csv"

In [None]:
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI',
'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, names=columns)

In [None]:
X = data.drop('Outcome', axis=1)
y = data['Outcome']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_pred)

In [None]:
ab = AdaBoostClassifier(n_estimators=100, random_state=42)
ab.fit(X_train, y_train)
ab_pred = ab.predict(X_test)
ab_accuracy = accuracy_score(y_test, ab_pred)

In [None]:
voting_clf = VotingClassifier(estimators=[('rf', rf), ('ab', ab)], voting='hard')
voting_clf.fit(X_train, y_train)
voting_pred = voting_clf.predict(X_test)
voting_accuracy = accuracy_score(y_test, voting_pred)

In [None]:
estimators = [('rf', rf), ('ab', ab), ('knn', KNeighborsClassifier())]
stacking_clf = StackingClassifier(estimators=estimators,
final_estimator=LogisticRegression())
stacking_clf.fit(X_train, y_train)
stacking_pred = stacking_clf.predict(X_test)
stacking_accuracy = accuracy_score(y_test, stacking_pred)

In [None]:
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")
print(f"AdaBoost Accuracy: {ab_accuracy:.4f}")
print(f"Voting Classifier Accuracy: {voting_accuracy:.4f}")
print(f"Stacking Classifier Accuracy: {stacking_accuracy:.4f}")

In [None]:
methods = ['Random Forest', 'AdaBoost', 'Voting', 'Stacking']
accuracies = [rf_accuracy, ab_accuracy, voting_accuracy, stacking_accuracy]
plt.figure(figsize=(10, 6))
plt.barh(methods, accuracies, color='skyblue')
plt.xlabel('Accuracy')
plt.title('Comparison of Ensemble Methods on Pima Indians Diabetes Dataset')
plt.show()