In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report


In [2]:
# Load the dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, names=columns)

In [3]:
# Split the data into features and labels
X = data.drop('Outcome', axis=1)
y = data['Outcome']

In [4]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)


In [5]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
# Initialize individual classifiers
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
ada_clf = AdaBoostClassifier(n_estimators=100, random_state=42)
lr_clf = LogisticRegression(random_state=42)
svc_clf = SVC(probability=True, random_state=42)
knn_clf = KNeighborsClassifier()
dt_clf = DecisionTreeClassifier(random_state=42)

In [7]:
# Bagging - Random Forest
rf_clf.fit(X_train, y_train)
y_pred_rf = rf_clf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))


Random Forest Accuracy: 0.7532467532467533
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.87      0.82       150
           1       0.69      0.54      0.61        81

    accuracy                           0.75       231
   macro avg       0.73      0.70      0.71       231
weighted avg       0.75      0.75      0.75       231



In [8]:
# Boosting - AdaBoost
ada_clf.fit(X_train, y_train)
y_pred_ada = ada_clf.predict(X_test)
print("AdaBoost Accuracy:", accuracy_score(y_test, y_pred_ada))
print("AdaBoost Classification Report:\n", classification_report(y_test, y_pred_ada))


AdaBoost Accuracy: 0.7532467532467533
AdaBoost Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.85      0.82       150
           1       0.68      0.57      0.62        81

    accuracy                           0.75       231
   macro avg       0.73      0.71      0.72       231
weighted avg       0.75      0.75      0.75       231



In [9]:
# Voting - Hard Voting
voting_clf_hard = VotingClassifier(estimators=[
    ('lr', lr_clf), ('rf', rf_clf), ('svc', svc_clf), ('knn', knn_clf)], voting='hard')
voting_clf_hard.fit(X_train, y_train)
y_pred_voting_hard = voting_clf_hard.predict(X_test)
print("Voting Classifier (Hard Voting) Accuracy:", accuracy_score(y_test, y_pred_voting_hard))
print("Voting Classifier (Hard Voting) Classification Report:\n", classification_report(y_test, y_pred_voting_hard))


Voting Classifier (Hard Voting) Accuracy: 0.7532467532467533
Voting Classifier (Hard Voting) Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.88      0.82       150
           1       0.70      0.52      0.60        81

    accuracy                           0.75       231
   macro avg       0.74      0.70      0.71       231
weighted avg       0.75      0.75      0.74       231



In [10]:
# Voting - Soft Voting
voting_clf_soft = VotingClassifier(estimators=[
    ('lr', lr_clf), ('rf', rf_clf), ('svc', svc_clf)], voting='soft')
voting_clf_soft.fit(X_train, y_train)
y_pred_voting_soft = voting_clf_soft.predict(X_test)
print("Voting Classifier (Soft Voting) Accuracy:", accuracy_score(y_test, y_pred_voting_soft))
print("Voting Classifier (Soft Voting) Classification Report:\n", classification_report(y_test, y_pred_voting_soft))


Voting Classifier (Soft Voting) Accuracy: 0.7489177489177489
Voting Classifier (Soft Voting) Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.86      0.82       150
           1       0.68      0.54      0.60        81

    accuracy                           0.75       231
   macro avg       0.73      0.70      0.71       231
weighted avg       0.74      0.75      0.74       231



In [11]:
# Stacking
stacking_clf = StackingClassifier(estimators=[
    ('rf', rf_clf), ('svc', svc_clf), ('knn', knn_clf)], final_estimator=LogisticRegression())
stacking_clf.fit(X_train, y_train)
y_pred_stacking = stacking_clf.predict(X_test)
print("Stacking Classifier Accuracy:", accuracy_score(y_test, y_pred_stacking))
print("Stacking Classifier Classification Report:\n", classification_report(y_test, y_pred_stacking))

Stacking Classifier Accuracy: 0.7575757575757576
Stacking Classifier Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.87      0.82       150
           1       0.70      0.54      0.61        81

    accuracy                           0.76       231
   macro avg       0.74      0.71      0.72       231
weighted avg       0.75      0.76      0.75       231



In [12]:
# Compare the results
results = {
    "Random Forest": accuracy_score(y_test, y_pred_rf),
    "AdaBoost": accuracy_score(y_test, y_pred_ada),
    "Voting (Hard)": accuracy_score(y_test, y_pred_voting_hard),
    "Voting (Soft)": accuracy_score(y_test, y_pred_voting_soft),
    "Stacking": accuracy_score(y_test, y_pred_stacking)
}

In [13]:
print("\nAccuracy Comparison:")
for method, acc in results.items():
    print(f"{method}: {acc:.4f}")


Accuracy Comparison:
Random Forest: 0.7532
AdaBoost: 0.7532
Voting (Hard): 0.7532
Voting (Soft): 0.7489
Stacking: 0.7576
