In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SelectKBest, f_classif
from scipy.stats import randint, uniform

In [None]:
# Load dataset iris
iris = load_iris()
X, y = iris.data, iris.target

# Pisahkan data train dan test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),       # Normalisasi data
    ('feature_selection', SelectKBest(score_func=f_classif, k=2)),  # Pilih 2 fitur terbaik
    ('pca', PCA(n_components=2)),       # Reduksi dimensi ke 2
    ('classifier', RandomForestClassifier(random_state=42))  # Model default
])

In [None]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [None]:
param_dist = {
    'scaler': [StandardScaler(), MinMaxScaler()],  # Coba dua jenis scaling
    'feature_selection__k': randint(1, 4),  # Pilih antara 1-3 fitur terbaik
    'pca__n_components': randint(1, 4),  # Pilih antara 1-3 komponen PCA
    'classifier': [RandomForestClassifier(random_state=42), 
                   SVC(probability=True, random_state=42),
                   DecisionTreeClassifier(random_state=42)],
    'classifier__C': uniform(0.1, 10) if isinstance(pipeline.named_steps['classifier'], SVC) else [None],
    'classifier__n_estimators': randint(50, 200) if isinstance(pipeline.named_steps['classifier'], RandomForestClassifier) else [None],
    'classifier__max_depth': randint(3, 20) if isinstance(pipeline.named_steps['classifier'], (RandomForestClassifier, DecisionTreeClassifier)) else [None],
}

In [None]:
random_search = RandomizedSearchCV(
    pipeline, param_distributions=param_dist, 
    n_iter=20, cv=cv, scoring='accuracy', random_state=42, n_jobs=-1
)

In [None]:
# Fit model
random_search.fit(X_train, y_train)

# Print hasil terbaik
print("Best Params:", random_search.best_params_)
print("Best Score:", random_search.best_score_)

In [None]:
base_learners = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('svm', SVC(probability=True, random_state=42)),
    ('dt', DecisionTreeClassifier(max_depth=5, random_state=42))
]

In [None]:
# Stacking model dengan Logistic Regression sebagai meta-model
stacking_model = StackingClassifier(
    estimators=base_learners,
    final_estimator=LogisticRegression(),
    cv=cv
)

In [None]:
# Fit model stacking
stacking_model.fit(X_train, y_train)

# Evaluasi
print("Stacking Model Score:", stacking_model.score(X_test, y_test))