# Implement Ensemble Learning Comparison on Diabetes Classification

In [3]:
import pandas as pd

df = pd.read_csv('diabetes.csv')

In [4]:
df.head(4)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0


In [6]:
X = df.drop('Outcome', axis=1)
Y = df['Outcome']

In [8]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)
X = pd.DataFrame(X_scaled)
X.head(4)

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0


In [10]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=0)

In [11]:
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, precision_recall_fscore_support

clf = BaggingClassifier().fit(x_train, y_train)
y_pred = clf.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

print('BAGGING RESULT =')
print('Accuracy : ', accuracy, '\nPrecision : ', precision, '\nRecall : ', recall, '\nF1-Score : ', f1)

BAGGING RESULT =
Accuracy :  0.7987012987012987 
Precision :  0.7626923076923078 
Recall :  0.7716245774507855 
F1-Score :  0.7667464699272


In [12]:
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier().fit(x_train, y_train)
y_pred = clf.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

print('ADABOOST RESULT =')
print('Accuracy : ', accuracy, '\nPrecision : ', precision, '\nRecall : ', recall, '\nF1-Score : ', f1)

ADABOOST RESULT =
Accuracy :  0.7857142857142857 
Precision :  0.7485119047619048 
Recall :  0.7324517796778683 
F1-Score :  0.7392642758196091


In [13]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

estimators = [
    ('svm', SVC(probability=True)),
    ('bayes', GaussianNB()),
    ('knn', KNeighborsClassifier()),
    ('cart', DecisionTreeClassifier())
]

meta_model = LogisticRegression()

stacking_clf = StackingClassifier(
    estimators=estimators,
    final_estimator=meta_model,
    cv=5
).fit(x_train, y_train)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

print('STACKING RESULT =')
print('Accuracy : ', accuracy, '\nPrecision : ', precision, '\nRecall : ', recall, '\nF1-Score : ', f1)

STACKING RESULT =
Accuracy :  0.7857142857142857 
Precision :  0.7485119047619048 
Recall :  0.7324517796778683 
F1-Score :  0.7392642758196091
