In [3]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [4]:
X, y = load_iris(return_X_y=True)
# Chia dữ liệu thành 5 phần, giữ tỷ lệ class cân bằng.
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

scalers = {
    "No Scaler": None,
    "MinMax": MinMaxScaler(),
    "Standard": StandardScaler(),
    "Robust": RobustScaler()
}

In [5]:
def evaluate_model(clf):
    results = {}
    for name, scaler in scalers.items():
        steps = []
        if scaler is not None:
            steps.append(("scaler", scaler))
        steps.append(("clf", clf))
        pipe = Pipeline(steps)
        scores = cross_val_score(pipe, X, y, cv=cv, scoring="accuracy", n_jobs=-1)
        results[name] = (scores.mean(), scores.std())
    return results


In [6]:
#Chạy mô hình KNN và RandomForest
print("KNN results:")
knn_results = evaluate_model(KNeighborsClassifier(n_neighbors=5))
for k, (m, s) in knn_results.items():
    print(f"{k}: {m:.4f} ± {s:.4f}")

print("\nRandomForest results:")
rf_results = evaluate_model(RandomForestClassifier(n_estimators=100, random_state=42))
for k, (m, s) in rf_results.items():
    print(f"{k}: {m:.4f} ± {s:.4f}")

KNN results:
No Scaler: 0.9667 ± 0.0298
MinMax: 0.9600 ± 0.0389
Standard: 0.9733 ± 0.0249
Robust: 0.9200 ± 0.0618

RandomForest results:
No Scaler: 0.9467 ± 0.0267
MinMax: 0.9467 ± 0.0267
Standard: 0.9467 ± 0.0267
Robust: 0.9467 ± 0.0267
