In [8]:
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

In [2]:
# Load data
df = pd.read_csv('dataset/iris.csv')

df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
# Cek kolom null
df.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [4]:
# Seleksi fitur
X = df.iloc[:,2:-1]
y = df['Species']

# encode label
ec = LabelEncoder()
y = ec.fit_transform(y)

# Cek jumlah fitur dan instance
print(X.shape)

# Cek label
print(y)

(150, 3)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

In [7]:
rf = RandomForestClassifier(n_estimators=10, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
acc_rf = accuracy_score(y_test, y_pred_rf)
print("Random Forest Test set accuracy:", acc_rf)

Random Forest Test set accuracy: 0.9473684210526315


In [9]:
# Model AdaBoost
ada = AdaBoostClassifier(n_estimators=50, random_state=42)
ada.fit(X_train, y_train)
y_pred_ada = ada.predict(X_test)
acc_ada = accuracy_score(y_test, y_pred_ada)
print("AdaBoost Test set accuracy:", acc_ada)

AdaBoost Test set accuracy: 0.9473684210526315




In [10]:
layer_one_estimators = [
    ('rf_1', RandomForestClassifier(n_estimators=10, random_state=42)),
    ('knn_1', KNeighborsClassifier(n_neighbors=5))
]

layer_two_estimators = [
    ('dt_2', DecisionTreeClassifier()),
    ('rf_2', RandomForestClassifier(n_estimators=50, random_state=42)),
]

# Buat stacking untuk layer kedua
layer_two = StackingClassifier(estimators=layer_two_estimators, final_estimator=LogisticRegression())

# Buat stacking untuk layer pertama
clf = StackingClassifier(estimators=layer_one_estimators, final_estimator=layer_two)

In [11]:
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)
print(f"Stacking Classifier Accuracy: {accuracy:.2f}")

Stacking Classifier Accuracy: 0.89


In [12]:
print(f"Random Forest Accuracy: {acc_rf:.2f}")
print(f"AdaBoost Accuracy: {acc_ada:.2f}")
print(f"Stacking Classifier Accuracy: {accuracy:.2f}")

Random Forest Accuracy: 0.95
AdaBoost Accuracy: 0.95
Stacking Classifier Accuracy: 0.89
