In [2]:
import numpy as np
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# 1. Tải dữ liệu
data = np.load("../split_data.npz")
X_train, X_test, y_train, y_test = data['X_train'], data['X_test'], data['y_train'], data['y_test']

# 2. Khai báo mô hình
models = {
    "Logistic Regression": LogisticRegression(max_iter=500),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "KNN": KNeighborsClassifier()
}

results = {}

# 3. Huấn luyện và đánh giá
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f"{name}: Accuracy = {acc:.3f}")

# 4. Lưu mô hình tốt nhất
best_model_name = max(results, key=results.get)
best_model = models[best_model_name]
joblib.dump(best_model, "../models/best_model.pkl")

print(f" Mô hình tốt nhất: {best_model_name} (Accuracy={results[best_model_name]:.3f})")


Logistic Regression: Accuracy = 0.897
Decision Tree: Accuracy = 0.894
Random Forest: Accuracy = 0.928
SVM: Accuracy = 0.909
KNN: Accuracy = 0.896
 Mô hình tốt nhất: Random Forest (Accuracy=0.928)
