In [1]:
!pip install -qq faiss-cpu
!pip install -qq transformers
!pip install -qq pandas
!pip install -qq numpy
!pip install -qq scikit-learn
!pip install -qq tqdm

# Tải dữ liệu từ Google Drive
# https://drive.google.com/file/d/1N7rk-kfnDFIGMeX0ROVTjKh71gcgx-7R/view?usp=sharing
!gdown --id 1N7rk-kfnDFIGMeX0ROVTjKh71gcgx-7R

# 1. Import các thư viện cần thiết
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
import faiss
from transformers import AutoTokenizer, AutoModel
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m41.8 MB/s[0m eta [36m0:00:00[0m
Downloading...
From: https://drive.google.com/uc?id=1N7rk-kfnDFIGMeX0ROVTjKh71gcgx-7R
To: /content/2cls_spam_text_cls.csv
100% 486k/486k [00:00<00:00, 14.7MB/s]


In [2]:
# 2. Đọc bộ dữ liệu
DATASET_PATH = "/content/2cls_spam_text_cls.csv"
df = pd.read_csv(DATASET_PATH)

# Tách tin nhắn và nhãn vào các list
messages = df["Message"].values.tolist()
labels = df["Category"].values.tolist()

In [3]:
# 3.1. Load mô hình embedding
MODEL_NAME = "intfloat/multilingual-e5-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

# Hàm để trích xuất embedding từ output của model
def average_pool(last_hidden_states, attention_mask):
    last_hidden = last_hidden_states.masked_fill(
        ~attention_mask[..., None].bool(), 0.0
    )
    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

In [4]:
# 3.2. Tạo sentence embeddings
def get_embeddings(texts, model, tokenizer, device, batch_size=32):
    """Tạo embeddings cho một danh sách các văn bản"""
    embeddings = []

    for i in tqdm(range(0, len(texts), batch_size), desc="Generating embeddings"):
        batch_texts = texts[i:i+batch_size]
        batch_texts_with_prefix = [f"passage: {text}" for text in batch_texts]

        batch_dict = tokenizer(
            batch_texts_with_prefix,
            max_length=512,
            padding=True,
            truncation=True,
            return_tensors="pt"
        )
        batch_dict = {k: v.to(device) for k, v in batch_dict.items()}

        with torch.no_grad():
            outputs = model(**batch_dict)
            batch_embeddings = average_pool(outputs.last_hidden_state, batch_dict["attention_mask"])
            batch_embeddings = F.normalize(batch_embeddings, p=2, dim=1)
            embeddings.append(batch_embeddings.cpu().numpy())

    return np.vstack(embeddings)

# Chuẩn bị nhãn
le = LabelEncoder()
y = le.fit_transform(labels)

# Tạo embeddings cho tất cả tin nhắn
X_embeddings = get_embeddings(messages, model, tokenizer, device)

# Tạo metadata cho mỗi tài liệu
metadata = [
    {"index": i, "message": message, "label": label, "label_encoded": y[i]}
    for i, (message, label) in enumerate(zip(messages, labels))
]


  return forward_call(*args, **kwargs)
Generating embeddings: 100%|██████████| 175/175 [22:50<00:00,  7.83s/it]


In [5]:
# 3.3. Tạo FAISS index và chia dữ liệu
TEST_SIZE = 0.1
SEED = 42

train_indices, test_indices = train_test_split(
    range(len(messages)), test_size=TEST_SIZE, stratify=y, random_state=SEED
)

# Tách embeddings và metadata theo chỉ số đã chia
X_train_emb = X_embeddings[train_indices]
X_test_emb = X_embeddings[test_indices]

train_metadata = [metadata[i] for i in train_indices]
test_metadata = [metadata[i] for i in test_indices]

# Tạo FAISS index
embedding_dim = X_train_emb.shape[1]
index = faiss.IndexFlatIP(embedding_dim)
index.add(X_train_emb.astype("float32"))



In [6]:
# 4. Triển khai phân loại với embedding similarity
def classify_with_knn(query_text, model, tokenizer, device, index, train_metadata, k=1):
    """Classify text using k-nearest neighbors with embeddings"""

    # Get query embedding
    query_with_prefix = f"query: {query_text}"
    batch_dict = tokenizer(
        [query_with_prefix],
        max_length=512,
        padding=True,
        truncation=True,
        return_tensors="pt"
    )
    batch_dict = {k: v.to(device) for k, v in batch_dict.items()}

    with torch.no_grad():
        outputs = model(**batch_dict)
        query_embedding = average_pool(outputs.last_hidden_state, batch_dict["attention_mask"])
        query_embedding = F.normalize(query_embedding, p=2, dim=1)
        query_embedding = query_embedding.cpu().numpy().astype("float32")

    # Search in FAISS index
    scores, indices = index.search(query_embedding, k)

    # Get predictions from top-k neighbors
    predictions = []
    neighbor_info = []

    for i in range(k):
        neighbor_idx = indices[0][i]
        neighbor_score = scores[0][i]
        neighbor_label = train_metadata[neighbor_idx]["label"]
        neighbor_message = train_metadata[neighbor_idx]["message"]

        predictions.append(neighbor_label)
        neighbor_info.append({
            "score": float(neighbor_score),
            "label": neighbor_label,
            "message": neighbor_message[:100] + "..." if len(neighbor_message) > 100 else neighbor_message
        })

    # Majority vote for final prediction
    unique_labels, counts = np.unique(predictions, return_counts=True)
    final_prediction = unique_labels[np.argmax(counts)]

    return final_prediction, neighbor_info


def evaluate_knn_accuracy(test_embeddings, test_metadata, index, train_metadata, k_values=[1, 3, 5]):
    """Evaluate accuracy for different k values using precomputed embeddings"""
    results = {}
    all_errors = {}

    for k in k_values:
        correct = 0
        total = len(test_embeddings)
        errors = []

        for i in tqdm(range(total), desc=f"Evaluating k={k}"):
            query_embedding = test_embeddings[i:i+1].astype("float32")
            true_label = test_metadata[i]["label"]
            true_message = test_metadata[i]["message"]

            # Search in FAISS index
            scores, indices = index.search(query_embedding, k)

            # Get predictions from top-k neighbors
            predictions = []
            neighbor_details = []
            for j in range(k):
                neighbor_idx = indices[0][j]
                neighbor_label = train_metadata[neighbor_idx]["label"]
                neighbor_message = train_metadata[neighbor_idx]["message"]
                neighbor_score = float(scores[0][j])

                predictions.append(neighbor_label)
                neighbor_details.append({
                    "label": neighbor_label,
                    "message": neighbor_message,
                    "score": neighbor_score
                })

            # Majority vote
            unique_labels, counts = np.unique(predictions, return_counts=True)
            predicted_label = unique_labels[np.argmax(counts)]

            if predicted_label == true_label:
                correct += 1
            else:
                # Collect error information
                error_info = {
                    "index": i,
                    "original_index": test_metadata[i]["index"],
                    "message": true_message,
                    "true_label": true_label,
                    "predicted_label": predicted_label,
                    "neighbors": neighbor_details,
                    "label_distribution": {
                        label: int(count) for label, count in zip(unique_labels, counts)
                    }
                }
                errors.append(error_info)

        accuracy = correct / total
        error_count = total - correct

        results[k] = accuracy
        all_errors[k] = errors

        print(f"Accuracy with k={k}: {accuracy:.4f}")
        print(f"Number of errors with k={k}: {error_count}/{total} ({(error_count/total)*100:.2f}%)")

    return results, all_errors


# 📚 GIẢI THÍCH ENSEMBLE METHODS CHO PHÂN LOẠI SPAM

## 🎯 Khái niệm Ensemble Learning

**Ensemble Learning** là kỹ thuật kết hợp nhiều mô hình học máy để tạo ra một mô hình mạnh hơn, chính xác hơn so với từng mô hình riêng lẻ.

### 🔍 Tại sao sử dụng Ensemble?

1. **Giảm Overfitting**: Khi kết hợp nhiều mô hình, các lỗi riêng lẻ có thể triệt tiêu lẫn nhau
2. **Tăng độ chính xác**: Dự đoán từ nhiều "chuyên gia" thường chính xác hơn
3. **Tăng độ tin cậy**: Có thể đánh giá mức độ chắc chắn của dự đoán
4. **Robust hơn**: Ít bị ảnh hưởng bởi noise trong dữ liệu

### 📊 Trong bài toán phân loại spam:

- **Single k-NN**: Chỉ dùng 1 giá trị k (ví dụ: k=3)
- **Ensemble k-NN**: Kết hợp nhiều classifier với k khác nhau (ví dụ: k=1, k=3, k=5)

## 🛠️ Cách hoạt động của Ensemble trong notebook:

1. **Tạo nhiều KNNClassifier** với các giá trị k khác nhau
2. **Mỗi classifier đưa ra dự đoán** với confidence score
3. **Kết hợp các dự đoán** bằng các phương pháp khác nhau
4. **Đưa ra kết quả cuối cùng** với confidence tổng hợp

# 🏗️ GIẢI THÍCH CHI TIẾT: KNNClassifier Class

## 📋 Mục đích của KNNClassifier:

Class `KNNClassifier` là wrapper cho k-NN classifier, được thiết kế đặc biệt cho ensemble methods:

### 🔧 Các thành phần chính:

1. **`__init__`**: Khởi tạo với FAISS index và metadata
2. **`predict_with_confidence`**: Dự đoán KÈM confidence score

### 🎯 Hàm `predict_with_confidence` hoạt động như sau:

```python
def predict_with_confidence(self, query_embedding, k=3):
    # 1. Tìm k nearest neighbors trong FAISS index
    scores, indices = self.index.search(query_embedding, k)
    
    # 2. Lấy predictions từ top-k neighbors
    for i in range(k):
        neighbor_label = self.train_metadata[neighbor_idx]["label"]
        predictions.append(neighbor_label)
    
    # 3. Majority vote để có prediction chính
    predicted_label = most_common_label(predictions)
    
    # 4. Tính confidence score (QUAN TRỌNG!)
    vote_confidence = số_vote_nhiều_nhất / k
    avg_similarity = trung_bình_similarity_score
    final_confidence = (vote_confidence * 0.6) + (avg_similarity * 0.4)
```

### 💡 Tại sao cần confidence score?

- **Đánh giá độ tin cậy**: Biết mô hình chắc chắn đến mức nào
- **Kết hợp trong ensemble**: Các dự đoán có confidence cao được ưu tiên
- **Phát hiện trường hợp khó**: Confidence thấp → cần xem xét thêm

In [None]:
class KNNClassifier:
    """K-NN Classifier wrapper for ensemble methods"""
    def __init__(self, index, train_metadata, name="KNN"):
        self.index = index
        self.train_metadata = train_metadata
        self.name = name
    
    def predict_with_confidence(self, query_embedding, k=3):
        """Predict with confidence score"""
        # Search in FAISS index
        scores, indices = self.index.search(query_embedding.astype("float32"), k)
        
        # Get predictions from top-k neighbors
        predictions = []
        neighbor_details = []
        
        for i in range(k):
            neighbor_idx = indices[0][i]
            neighbor_score = float(scores[0][i])
            neighbor_label = self.train_metadata[neighbor_idx]["label"]
            neighbor_message = self.train_metadata[neighbor_idx]["message"]
            
            predictions.append(neighbor_label)
            neighbor_details.append({
                "label": neighbor_label,
                "message": neighbor_message[:50] + "..." if len(neighbor_message) > 50 else neighbor_message,
                "score": neighbor_score
            })
        
        # Majority vote with confidence calculation
        unique_labels, counts = np.unique(predictions, return_counts=True)
        predicted_label = unique_labels[np.argmax(counts)]
        
        # Confidence based on majority vote ratio and average similarity scores
        max_count = np.max(counts)
        vote_confidence = max_count / k
        
        # Average score of predictions that match the final prediction
        matching_scores = [scores[0][i] for i in range(k) if predictions[i] == predicted_label]
        avg_similarity = np.mean(matching_scores)
        
        # Combined confidence: vote strength + similarity strength
        final_confidence = (vote_confidence * 0.6) + (avg_similarity * 0.4)
        
        return predicted_label, final_confidence, neighbor_details

def ensemble_knn_prediction(query_embedding, classifiers, k_values, method="weighted_voting"):
    """
    Ensemble of multiple k-NN classifiers with different strategies
    
    Args:
        query_embedding: Query embedding vector
        classifiers: List of KNNClassifier objects
        k_values: List of k values for each classifier
        method: Ensemble method ('weighted_voting', 'max_confidence', 'average_confidence')
    
    Returns:
        final_prediction, final_confidence, detailed_results
    """
    all_predictions = []
    all_confidences = []
    detailed_results = []

    # Get predictions from all classifiers
    for i, (classifier, k) in enumerate(zip(classifiers, k_values)):
        pred, conf, neighbors = classifier.predict_with_confidence(query_embedding, k=k)
        all_predictions.append(pred)
        all_confidences.append(conf)
        detailed_results.append({
            "classifier": f"{classifier.name}_k{k}",
            "prediction": pred,
            "confidence": conf,
            "neighbors": neighbors
        })
    
    # Ensemble prediction based on method
    if method == "weighted_voting":
        # Weighted voting based on confidence
        label_votes = {}
        for pred, conf in zip(all_predictions, all_confidences):
            label_votes[pred] = label_votes.get(pred, 0) + conf
        
        final_prediction = max(label_votes.items(), key=lambda x: x[1])[0]
        final_confidence = label_votes[final_prediction] / sum(label_votes.values())
        
    elif method == "max_confidence":
        # Take prediction from classifier with highest confidence
        max_conf_idx = np.argmax(all_confidences)
        final_prediction = all_predictions[max_conf_idx]
        final_confidence = all_confidences[max_conf_idx]
        
    elif method == "average_confidence":
        # Majority vote with average confidence of matching predictions
        unique_labels, counts = np.unique(all_predictions, return_counts=True)
        final_prediction = unique_labels[np.argmax(counts)]
        
        # Average confidence of classifiers that predicted the final label
        matching_confidences = [conf for pred, conf in zip(all_predictions, all_confidences) 
                              if pred == final_prediction]
        final_confidence = np.mean(matching_confidences)
    
    else:
        raise ValueError(f"Unknown ensemble method: {method}")

    return final_prediction, final_confidence, detailed_results

# 🎯 GIẢI THÍCH: Hàm `ensemble_knn_prediction`

## 🚀 Đây là trái tim của ensemble system!

### 📥 Input parameters:

- **`query_embedding`**: Vector embedding của văn bản cần phân loại
- **`classifiers`**: Danh sách các KNNClassifier
- **`k_values`**: Danh sách giá trị k tương ứng [1, 3, 5]
- **`method`**: Phương pháp kết hợp dự đoán

### 🔄 Quy trình hoạt động:

```python
# Bước 1: Lấy dự đoán từ tất cả classifiers
for classifier, k in zip(classifiers, k_values):
    pred, conf, neighbors = classifier.predict_with_confidence(query_embedding, k=k)
    all_predictions.append(pred)      # ["spam", "ham", "spam"]
    all_confidences.append(conf)      # [0.8, 0.6, 0.9]

# Bước 2: Kết hợp dự đoán theo method được chọn
if method == "weighted_voting":
    # Mỗi vote có trọng số = confidence
    
elif method == "max_confidence":
    # Chọn dự đoán có confidence cao nhất
    
elif method == "average_confidence":
    # Majority vote + average confidence
```

## 🎛️ BA PHƯƠNG PHÁP ENSEMBLE:

### 1. **Weighted Voting** (Bỏ phiếu có trọng số)
```python
# Ví dụ: 
# Classifier 1 (k=1): "spam" với confidence 0.8
# Classifier 2 (k=3): "ham"  với confidence 0.6  
# Classifier 3 (k=5): "spam" với confidence 0.9

label_votes = {
    "spam": 0.8 + 0.9 = 1.7,
    "ham":  0.6 = 0.6
}
# → Kết quả: "spam" (vì 1.7 > 0.6)
```

### 2. **Max Confidence** (Tin theo người chắc chắn nhất)
```python
# Chọn dự đoán từ classifier có confidence cao nhất
max_conf_idx = argmax([0.8, 0.6, 0.9]) = 2
final_prediction = predictions[2] = "spam"
```

### 3. **Average Confidence** (Đa số + tin cậy trung bình)
```python
# Bước 1: Majority vote → "spam" (2/3 votes)
# Bước 2: Average confidence của các vote "spam"
avg_conf = (0.8 + 0.9) / 2 = 0.85
```

In [None]:
def evaluate_ensemble_accuracy(test_embeddings, test_metadata, classifiers, k_configurations, 
                              ensemble_methods=["weighted_voting", "max_confidence", "average_confidence"]):
    """
    Comprehensive evaluation of ensemble methods
    
    Args:
        test_embeddings: Test set embeddings
        test_metadata: Test set metadata
        classifiers: List of KNNClassifier objects
        k_configurations: List of k-value configurations for ensemble
                         e.g., [[1,3,5], [3,5,7], [1,5,9]]
        ensemble_methods: List of ensemble methods to evaluate
    
    Returns:
        results: Dictionary containing detailed evaluation results
    """
    results = {}
    all_errors = {}
    
    total_samples = len(test_embeddings)
    
    for config_idx, k_values in enumerate(k_configurations):
        config_name = f"config_{config_idx+1}_k{k_values}"
        results[config_name] = {}
        all_errors[config_name] = {}
        
        print(f"\n=== Evaluating Configuration: k_values = {k_values} ===")
        
        for method in ensemble_methods:
            print(f"\nEvaluating ensemble method: {method}")
            
            correct = 0
            errors = []
            predictions_details = []
            
            for i in tqdm(range(total_samples), desc=f"Testing {method}"):
                query_embedding = test_embeddings[i:i+1]
                true_label = test_metadata[i]["label"]
                true_message = test_metadata[i]["message"]
                
                # Get ensemble prediction
                pred, conf, details = ensemble_knn_prediction(
                    query_embedding, classifiers, k_values, method=method
                )
                
                # Store prediction details
                prediction_info = {
                    "index": i,
                    "true_label": true_label,
                    "predicted_label": pred,
                    "confidence": conf,
                    "message": true_message,
                    "ensemble_details": details
                }
                predictions_details.append(prediction_info)
                
                # Check accuracy
                if pred == true_label:
                    correct += 1
                else:
                    # Collect error information
                    error_info = prediction_info.copy()
                    error_info["error_type"] = "misclassification"
                    errors.append(error_info)
            
            # Calculate metrics
            accuracy = correct / total_samples
            error_rate = len(errors) / total_samples
            
            # Calculate confidence statistics
            confidences = [p["confidence"] for p in predictions_details]
            avg_confidence = np.mean(confidences)
            confidence_std = np.std(confidences)
            
            # Store results
            results[config_name][method] = {
                "accuracy": accuracy,
                "error_rate": error_rate,
                "correct_predictions": correct,
                "total_predictions": total_samples,
                "avg_confidence": avg_confidence,
                "confidence_std": confidence_std,
                "predictions": predictions_details
            }
            all_errors[config_name][method] = errors
            
            print(f"  Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
            print(f"  Average Confidence: {avg_confidence:.4f} ± {confidence_std:.4f}")
            print(f"  Errors: {len(errors)}/{total_samples}")
    
    return results, all_errors

def compare_ensemble_vs_single(test_embeddings, test_metadata, index, train_metadata):
    """
    Compare ensemble methods against single k-NN classifiers
    """
    print("=== COMPREHENSIVE COMPARISON: ENSEMBLE vs SINGLE k-NN ===\n")
    
    # Create classifiers
    classifiers = [KNNClassifier(index, train_metadata, f"KNN_{i+1}") for i in range(3)]
    
    # Define k-value configurations for ensemble
    k_configurations = [
        [1, 3, 5],    # Conservative: low k values
        [3, 5, 7],    # Moderate: medium k values  
        [5, 7, 9],    # Liberal: higher k values
        [1, 5, 9],    # Mixed: diverse k values
    ]
    
    # 1. Evaluate ensemble methods
    print("1. ENSEMBLE EVALUATION")
    print("-" * 50)
    ensemble_results, ensemble_errors = evaluate_ensemble_accuracy(
        test_embeddings, test_metadata, classifiers, k_configurations
    )
    
    # 2. Evaluate single k-NN for comparison
    print("\n2. SINGLE k-NN BASELINE EVALUATION")
    print("-" * 50)
    single_knn_results, single_knn_errors = evaluate_knn_accuracy(
        test_embeddings, test_metadata, index, train_metadata, 
        k_values=[1, 3, 5, 7, 9]
    )
    
    # 3. Find best performing methods
    print("\n3. PERFORMANCE COMPARISON")
    print("=" * 70)
    
    # Best single k-NN
    best_single_k = max(single_knn_results.items(), key=lambda x: x[1])
    print(f"Best Single k-NN: k={best_single_k[0]}, Accuracy={best_single_k[1]:.4f}")
    
    # Best ensemble method
    best_ensemble = None
    best_ensemble_acc = 0
    
    for config_name, config_results in ensemble_results.items():
        for method, method_results in config_results.items():
            acc = method_results["accuracy"]
            if acc > best_ensemble_acc:
                best_ensemble_acc = acc
                best_ensemble = (config_name, method, method_results)
    
    if best_ensemble:
        config_name, method, results = best_ensemble
        k_values = config_name.split('_k')[1] if '_k' in config_name else "unknown"
        print(f"Best Ensemble: {method} with k_values={k_values}")
        print(f"  Accuracy: {results['accuracy']:.4f}")
        print(f"  Confidence: {results['avg_confidence']:.4f} ± {results['confidence_std']:.4f}")
    
    # Performance improvement
    improvement = best_ensemble_acc - best_single_k[1]
    print(f"\nEnsemble Improvement: {improvement:+.4f} ({improvement*100:+.2f}%)")
    
    # 4. Detailed analysis
    print("\n4. DETAILED ANALYSIS")
    print("-" * 50)
    
    print("Single k-NN Results:")
    for k, acc in sorted(single_knn_results.items()):
        print(f"  k={k}: {acc:.4f} ({acc*100:.2f}%)")
    
    print("\nTop Ensemble Results:")
    all_ensemble_results = []
    for config_name, config_results in ensemble_results.items():
        for method, method_results in config_results.items():
            all_ensemble_results.append((
                config_name, method, method_results["accuracy"], 
                method_results["avg_confidence"]
            ))
    
    # Sort by accuracy
    all_ensemble_results.sort(key=lambda x: x[2], reverse=True)
    
    for i, (config, method, acc, conf) in enumerate(all_ensemble_results[:5]):
        k_values = config.split('_k')[1] if '_k' in config else "unknown"
        print(f"  {i+1}. {method} (k={k_values}): {acc:.4f} (conf: {conf:.3f})")
    
    return {
        "ensemble_results": ensemble_results,
        "ensemble_errors": ensemble_errors,
        "single_knn_results": single_knn_results,
        "single_knn_errors": single_knn_errors,
        "best_single": best_single_k,
        "best_ensemble": best_ensemble,
        "improvement": improvement
    }

# 📊 ĐÁNH GIÁ VÀ SO SÁNH PERFORMANCE

## 🏆 Mục tiêu đánh giá:

Chúng ta muốn trả lời câu hỏi: **"Ensemble có thật sự tốt hơn Single k-NN không?"**

## 🔬 Phương pháp thử nghiệm:

### 1. **Multiple K-Configurations**
```python
k_configurations = [
    [1, 3, 5],    # Conservative: k nhỏ
    [3, 5, 7],    # Moderate: k trung bình
    [5, 7, 9],    # Liberal: k lớn
    [1, 5, 9],    # Mixed: k đa dạng
]
```

### 2. **Multiple Ensemble Methods**
- `weighted_voting`: Bỏ phiếu có trọng số
- `max_confidence`: Chọn người tự tin nhất
- `average_confidence`: Đa số + tin cậy trung bình

### 3. **Comprehensive Comparison**
```python
# So sánh với Single k-NN baseline
single_knn_results = {
    1: 0.92,    # k=1 → 92% accuracy
    3: 0.94,    # k=3 → 94% accuracy  
    5: 0.93,    # k=5 → 93% accuracy
}

# So sánh với Ensemble
ensemble_results = {
    "weighted_voting + [1,3,5]": 0.95,    # 95% accuracy
    "max_confidence + [3,5,7]": 0.94,     # 94% accuracy
}
```

## 📈 Metrics được đo:

1. **Accuracy**: Tỷ lệ dự đoán đúng
2. **Confidence**: Mức độ tin cậy trung bình
3. **Error Analysis**: Phân tích lỗi chi tiết
4. **Improvement**: Cải thiện so với baseline

## 💡 Kỳ vọng kết quả:

- **Ensemble tốt hơn**: Accuracy cao hơn + có confidence score
- **Single k-NN tốt hơn**: Đơn giản, nhanh, ít tài nguyên
- **Tương đương**: Ensemble cung cấp thêm thông tin confidence

In [None]:
# 5. Đánh giá accuracy trên test set - Cả Single k-NN và Ensemble
%%time
print("="*70)
print("COMPREHENSIVE EVALUATION: SINGLE k-NN vs ENSEMBLE METHODS")
print("="*70)

# Đánh giá chi tiết với comparison
comparison_results = compare_ensemble_vs_single(
    X_test_emb, test_metadata, index, train_metadata
)

# Lưu kết quả phân tích
import json
from datetime import datetime

# Tạo comprehensive analysis report
comprehensive_analysis = {
    "timestamp": datetime.now().isoformat(),
    "model": MODEL_NAME,
    "test_size": len(X_test_emb),
    "evaluation_summary": {
        "best_single_knn": {
            "k": comparison_results["best_single"][0],
            "accuracy": comparison_results["best_single"][1]
        },
        "best_ensemble": {
            "config": comparison_results["best_ensemble"][0] if comparison_results["best_ensemble"] else None,
            "method": comparison_results["best_ensemble"][1] if comparison_results["best_ensemble"] else None,
            "accuracy": comparison_results["best_ensemble"][2]["accuracy"] if comparison_results["best_ensemble"] else None,
            "confidence": comparison_results["best_ensemble"][2]["avg_confidence"] if comparison_results["best_ensemble"] else None
        },
        "improvement": comparison_results["improvement"]
    },
    "single_knn_results": comparison_results["single_knn_results"],
    "ensemble_results": comparison_results["ensemble_results"],
    "detailed_errors": {
        "single_knn": comparison_results["single_knn_errors"],
        "ensemble": comparison_results["ensemble_errors"]
    }
}

# Lưu file phân tích toàn diện
comprehensive_file = "comprehensive_evaluation_analysis.json"
with open(comprehensive_file, "w", encoding="utf-8") as f:
    json.dump(comprehensive_analysis, f, ensure_ascii=False, indent=2)

print(f"\n***Comprehensive analysis saved to: {comprehensive_file}***")

# Summary thống kê
print("\n" + "="*70)
print("FINAL SUMMARY")
print("="*70)
print(f"Dataset: {len(X_test_emb)} test samples")
print(f"Best Single k-NN: k={comparison_results['best_single'][0]} → {comparison_results['best_single'][1]:.4f}")
if comparison_results['best_ensemble']:
    best_ens = comparison_results['best_ensemble']
    config_k = best_ens[0].split('_k')[1] if '_k' in best_ens[0] else "unknown"
    print(f"Best Ensemble: {best_ens[1]} (k={config_k}) → {best_ens[2]['accuracy']:.4f}")
    print(f"Improvement: {comparison_results['improvement']:+.4f} ({comparison_results['improvement']*100:+.2f}%)")
else:
    print("No ensemble method evaluated")
print("="*70)

Evaluating accuracy on test set...


Evaluating k=1: 100%|██████████| 558/558 [00:00<00:00, 878.71it/s]


Accuracy with k=1: 0.9857
Number of errors with k=1: 8/558 (1.43%)


Evaluating k=3: 100%|██████████| 558/558 [00:00<00:00, 691.05it/s]


Accuracy with k=3: 0.9928
Number of errors with k=3: 4/558 (0.72%)


Evaluating k=5: 100%|██████████| 558/558 [00:00<00:00, 838.23it/s]

Accuracy with k=5: 0.9910
Number of errors with k=5: 5/558 (0.90%)

ACCURACY RESULTS
Top-1 accuracy: 0.9857 (98.57%)
Top-3 accuracy: 0.9928 (99.28%)
Top-5 accuracy: 0.9910 (99.10%)

***Error analysis saved to: error_analysis.json***

***Summary:
 k=1: 8 errors out of 558 samples
 k=3: 4 errors out of 558 samples
 k=5: 5 errors out of 558 samples
CPU times: user 2.11 s, sys: 20 ms, total: 2.13 s
Wall time: 2.12 s





In [None]:
# 6. Enhanced Pipeline classification cho user input
def spam_classifier_pipeline(user_input, mode="single", k=3, ensemble_config=None):
    """
    Complete pipeline for spam classification with ensemble support

    Args:
        user_input (str): Text to classify
        mode (str): "single" for single k-NN or "ensemble" for ensemble prediction
        k (int): Number of nearest neighbors (for single mode)
        ensemble_config (dict): Configuration for ensemble mode
                               {"k_values": [1,3,5], "method": "weighted_voting"}

    Returns:
        dict: Classification results with details
    """
    
    print()
    print(f"***Classifying: \"{user_input}\"")
    print(f"***Mode: {mode.upper()}")
    print()

    if mode == "single":
        print(f"***Using single k-NN with k={k}")
        
        # Get prediction and neighbors using existing function
        prediction, neighbors = classify_with_knn(
            user_input, model, tokenizer, device, index, train_metadata, k=k
        )
        
        print(f"***Prediction: {prediction.upper()}")
        print()
        print("***Top neighbors:")
        for i, neighbor in enumerate(neighbors, 1):
            print(f"{i}. Label: {neighbor['label']} | Score: {neighbor['score']:.4f}")
            print(f"    Message: {neighbor['message']}")
            print()

        # Count label distribution
        labels = [n["label"] for n in neighbors]
        label_counts = {label: labels.count(label) for label in set(labels)}
        
        return {
            "mode": "single",
            "prediction": prediction,
            "confidence": None,  # Single mode doesn't calculate confidence
            "neighbors": neighbors,
            "label_distribution": label_counts,
            "k_used": k
        }
        
    elif mode == "ensemble":
        if ensemble_config is None:
            ensemble_config = {"k_values": [1, 3, 5], "method": "weighted_voting"}
        
        k_values = ensemble_config["k_values"]
        method = ensemble_config["method"]
        
        print(f"***Using ensemble with k_values={k_values}, method='{method}'")
        
        # Get query embedding
        query_with_prefix = f"query: {user_input}"
        batch_dict = tokenizer(
            [query_with_prefix],
            max_length=512,
            padding=True,
            truncation=True,
            return_tensors="pt"
        )
        batch_dict = {k: v.to(device) for k, v in batch_dict.items()}

        with torch.no_grad():
            outputs = model(**batch_dict)
            query_embedding = average_pool(outputs.last_hidden_state, batch_dict["attention_mask"])
            query_embedding = F.normalize(query_embedding, p=2, dim=1)
            query_embedding = query_embedding.cpu().numpy()
        
        # Create classifiers for ensemble
        classifiers = [KNNClassifier(index, train_metadata, f"KNN_{i+1}") for i in range(len(k_values))]
        
        # Get ensemble prediction
        prediction, confidence, detailed_results = ensemble_knn_prediction(
            query_embedding, classifiers, k_values, method=method
        )
        
        print(f"***Ensemble Prediction: {prediction.upper()}")
        print(f"***Ensemble Confidence: {confidence:.4f}")
        print()
        
        print("***Individual Classifier Results:")
        for i, result in enumerate(detailed_results, 1):
            print(f"{i}. {result['classifier']}: {result['prediction']} (conf: {result['confidence']:.4f})")
            print(f"    Top neighbor: {result['neighbors'][0]['message']}")
            print()
        
        # Aggregate all neighbors from all classifiers
        all_neighbors = []
        for result in detailed_results:
            all_neighbors.extend(result['neighbors'])
        
        # Count predictions from all classifiers
        all_predictions = [r['prediction'] for r in detailed_results]
        label_counts = {label: all_predictions.count(label) for label in set(all_predictions)}
        
        return {
            "mode": "ensemble",
            "prediction": prediction,
            "confidence": confidence,
            "ensemble_method": method,
            "k_values": k_values,
            "individual_results": detailed_results,
            "label_distribution": label_counts,
            "all_neighbors": all_neighbors
        }
    
    else:
        raise ValueError(f"Unknown mode: {mode}. Use 'single' or 'ensemble'")

def interactive_comparison(user_input, k_single=3, ensemble_config=None):
    """
    Compare single vs ensemble predictions side by side
    """
    if ensemble_config is None:
        ensemble_config = {"k_values": [1, 3, 5], "method": "weighted_voting"}
    
    print("="*80)
    print("SINGLE vs ENSEMBLE COMPARISON")
    print("="*80)
    
    # Single prediction
    print("\n[SINGLE k-NN PREDICTION]")
    single_result = spam_classifier_pipeline(user_input, mode="single", k=k_single)
    
    print("\n" + "-"*80)
    
    # Ensemble prediction
    print("\n[ENSEMBLE PREDICTION]")
    ensemble_result = spam_classifier_pipeline(user_input, mode="ensemble", ensemble_config=ensemble_config)
    
    print("\n" + "="*80)
    print("COMPARISON SUMMARY")
    print("="*80)
    print(f"Single k-NN (k={k_single}):     {single_result['prediction'].upper()}")
    print(f"Ensemble ({ensemble_config['method']}): {ensemble_result['prediction'].upper()} (conf: {ensemble_result['confidence']:.4f})")
    
    agreement = "✓ AGREE" if single_result['prediction'] == ensemble_result['prediction'] else "✗ DISAGREE"
    print(f"Agreement: {agreement}")
    print("="*80)
    
    return {
        "single_result": single_result,
        "ensemble_result": ensemble_result,
        "agreement": single_result['prediction'] == ensemble_result['prediction']
    }

# 🚀 PIPELINE THỰC TẾ: Từ Text đến Kết quả

## 🎯 Hàm `spam_classifier_pipeline`:

Đây là **complete end-to-end pipeline** để phân loại spam với hỗ trợ ensemble:

### 📥 Input:
- **`user_input`**: Text cần phân loại
- **`mode`**: "single" hoặc "ensemble"  
- **`k`**: Số neighbors (cho single mode)
- **`ensemble_config`**: Cấu hình ensemble

### 🔄 Quy trình hoạt động:

```python
# Bước 1: Chuyển text thành embedding
user_input = "FREE! Win $1000 NOW!"
query_embedding = model.encode(f"query: {user_input}")

# Bước 2: Phân loại
if mode == "single":
    # Dùng 1 classifier với k cố định
    prediction = classify_with_knn(user_input, k=3)
    
elif mode == "ensemble":
    # Dùng nhiều classifiers với k khác nhau
    classifiers = [KNN_1, KNN_2, KNN_3]
    k_values = [1, 3, 5]
    prediction, confidence = ensemble_knn_prediction(...)
```

## 🆚 So sánh Mode:

### **Single Mode** (Truyền thống):
✅ **Ưu điểm**: Nhanh, đơn giản, ít tài nguyên  
❌ **Nhược điểm**: Không có confidence, có thể kém chính xác

### **Ensemble Mode** (Hiện đại):
✅ **Ưu điểm**: Chính xác hơn, có confidence score, robust  
❌ **Nhược điểm**: Chậm hơn, phức tạp hơn, tốn tài nguyên

## 🎭 Use Cases thực tế:

### 1. **Email Spam Filter**
```python
# High confidence → Auto action
if confidence > 0.8:
    move_to_spam_folder()
    
# Medium confidence → Flag for review  
elif confidence > 0.5:
    mark_as_suspicious()
    
# Low confidence → Keep in inbox
else:
    keep_in_inbox()
```

### 2. **SMS Spam Detection**
```python
# Compare methods for critical decision
single_result = pipeline(sms, mode="single", k=3)
ensemble_result = pipeline(sms, mode="ensemble", 
                         config={"k_values": [1,3,5], "method": "weighted_voting"})

if single_result['prediction'] == ensemble_result['prediction']:
    # Both agree → High confidence
    return ensemble_result
else:
    # Disagree → Manual review needed
    return {"status": "manual_review_needed", "confidence": "low"}
```

## 📊 Kết quả Pipeline trả về:

```python
{
    "mode": "ensemble",
    "prediction": "spam",
    "confidence": 0.87,
    "ensemble_method": "weighted_voting", 
    "k_values": [1, 3, 5],
    "individual_results": [
        {"classifier": "KNN_k1", "prediction": "spam", "confidence": 0.9},
        {"classifier": "KNN_k3", "prediction": "spam", "confidence": 0.8}, 
        {"classifier": "KNN_k5", "prediction": "ham",  "confidence": 0.6}
    ],
    "label_distribution": {"spam": 2, "ham": 1}
}
```

In [None]:
# 7. Comprehensive Testing - Single vs Ensemble
test_examples = [
    "I am actually thinking a way of doing something useful",
    "FREE!! Click here to win $1000 NOW! Limited time offer!",
    "Hello, how are you doing today?",
    "URGENT: Your account will be suspended! Click here immediately!",
    "Machine learning is a fascinating field of study",
]

print("="*90)
print("COMPREHENSIVE TESTING: SINGLE k-NN vs ENSEMBLE METHODS")
print("="*90)

# Test different ensemble configurations
ensemble_configs = [
    {"k_values": [1, 3, 5], "method": "weighted_voting"},
    {"k_values": [3, 5, 7], "method": "max_confidence"},
    {"k_values": [1, 5, 9], "method": "average_confidence"},
]

for i, example in enumerate(test_examples, 1):
    print(f"\n{'='*20} EXAMPLE {i} {'='*20}")
    print(f"Text: \"{example}\"")
    print("="*60)
    
    # Single k-NN prediction
    print("\n🔸 SINGLE k-NN (k=3):")
    single_result = spam_classifier_pipeline(example, mode="single", k=3)
    
    # Test multiple ensemble configurations
    print("\n🔹 ENSEMBLE PREDICTIONS:")
    ensemble_results = []
    
    for j, config in enumerate(ensemble_configs, 1):
        print(f"\n  Config {j}: k={config['k_values']}, method='{config['method']}'")
        ensemble_result = spam_classifier_pipeline(example, mode="ensemble", ensemble_config=config)
        ensemble_results.append(ensemble_result)
    
    # Summary comparison
    print(f"\n📊 SUMMARY FOR EXAMPLE {i}:")
    print(f"  Single k-NN:    {single_result['prediction'].upper()}")
    for j, result in enumerate(ensemble_results, 1):
        config = ensemble_configs[j-1]
        print(f"  Ensemble {j}:     {result['prediction'].upper()} (conf: {result['confidence']:.3f}) - {config['method']}")

print("\n" + "="*90)
print("INTERACTIVE TESTING SECTION")
print("="*90)

# Interactive testing with detailed comparison
print("\n🧪 DETAILED COMPARISON TEST:")
user_text = "Win a free iPhone! Click here now!"
comparison_result = interactive_comparison(
    user_text, 
    k_single=5,
    ensemble_config={"k_values": [1, 3, 5, 7], "method": "weighted_voting"}
)

print("\n🎯 FINAL RECOMMENDATION:")
if comparison_result["agreement"]:
    print("✅ Both methods agree - High confidence in prediction")
else:
    print("⚠️  Methods disagree - Consider manual review or use ensemble confidence score")
    print(f"   Ensemble confidence: {comparison_result['ensemble_result']['confidence']:.4f}")

  return forward_call(*args, **kwargs)



--- Example 1: "I am actually thinking a way of doing something useful" ---

***Classifying: "I am actually thinking a way of doing something useful"

***Using top-3 nearest neighbors

***Prediction: HAM

***Top neighbors:
1. Label: ham | Score: 0.8424
    Message: yeah, that's what I was thinking

2. Label: ham | Score: 0.8412
    Message: that would be good … I'll phone you tomo lunchtime, shall I, to organise something?

3. Label: ham | Score: 0.8344
    Message: See? I thought it all through


--- Example 2: "FREE!! Click here to win $1000 NOW! Limited time offer!" ---

***Classifying: "FREE!! Click here to win $1000 NOW! Limited time offer!"

***Using top-3 nearest neighbors

***Prediction: SPAM

***Top neighbors:
1. Label: spam | Score: 0.8567
    Message: FREE MESSAGE Activate your 500 FREE Text Messages by replying to this message with the word FREE For...

2. Label: spam | Score: 0.8567
    Message: FREE MESSAGE Activate your 500 FREE Text Messages by replying to this message

In [None]:
# 8. Detailed Performance Analysis and Recommendations

def analyze_ensemble_performance(comprehensive_results):
    """
    Phân tích chi tiết performance của các phương pháp ensemble
    """
    print("="*80)
    print("DETAILED ENSEMBLE PERFORMANCE ANALYSIS")
    print("="*80)
    
    single_results = comprehensive_results["single_knn_results"]
    ensemble_results = comprehensive_results["ensemble_results"]
    
    # 1. Best configurations analysis
    print("\n1. 📈 BEST CONFIGURATIONS RANKING:")
    print("-" * 50)
    
    all_methods = []
    
    # Add single k-NN results
    for k, acc in single_results.items():
        all_methods.append(("Single k-NN", f"k={k}", acc, None))
    
    # Add ensemble results
    for config_name, config_data in ensemble_results.items():
        k_values = config_name.split('_k')[1] if '_k' in config_name else "unknown"
        for method, results in config_data.items():
            all_methods.append(("Ensemble", f"{method} (k={k_values})", 
                              results["accuracy"], results["avg_confidence"]))
    
    # Sort by accuracy
    all_methods.sort(key=lambda x: x[2], reverse=True)
    
    print("Rank | Method      | Configuration                    | Accuracy | Confidence")
    print("-" * 75)
    for i, (method_type, config, acc, conf) in enumerate(all_methods[:10], 1):
        conf_str = f"{conf:.3f}" if conf is not None else "  N/A"
        print(f"{i:4d} | {method_type:11} | {config:30} | {acc:.4f}  | {conf_str}")
    
    # 2. Ensemble vs Single comparison
    print(f"\n2. 🔄 ENSEMBLE vs SINGLE k-NN COMPARISON:")
    print("-" * 50)
    
    best_single = max(single_results.items(), key=lambda x: x[1])
    best_ensemble = None
    best_ensemble_acc = 0
    
    for config_name, config_data in ensemble_results.items():
        for method, results in config_data.items():
            if results["accuracy"] > best_ensemble_acc:
                best_ensemble_acc = results["accuracy"]
                best_ensemble = (config_name, method, results)
    
    print(f"Best Single k-NN:  k={best_single[0]:<3} → {best_single[1]:.4f} accuracy")
    if best_ensemble:
        config_name, method, results = best_ensemble
        k_vals = config_name.split('_k')[1] if '_k' in config_name else "unknown"
        print(f"Best Ensemble:     {method} (k={k_vals}) → {results['accuracy']:.4f} accuracy")
        print(f"                   Confidence: {results['avg_confidence']:.4f} ± {results['confidence_std']:.4f}")
        
        improvement = results['accuracy'] - best_single[1]
        print(f"Improvement:       {improvement:+.4f} ({improvement*100:+.2f}%)")
        
        if improvement > 0:
            print("✅ Ensemble provides better accuracy!")
        elif improvement > -0.01:  # Within 1% is considered similar
            print("⚖️ Similar performance, but ensemble provides confidence scores")
        else:
            print("❌ Single k-NN performs better")
    
    # 3. Method-specific analysis
    print(f"\n3. 🔍 ENSEMBLE METHOD ANALYSIS:")
    print("-" * 50)
    
    method_performance = {}
    for config_name, config_data in ensemble_results.items():
        for method, results in config_data.items():
            if method not in method_performance:
                method_performance[method] = []
            method_performance[method].append(results['accuracy'])
    
    for method, accuracies in method_performance.items():
        avg_acc = np.mean(accuracies)
        std_acc = np.std(accuracies)
        max_acc = np.max(accuracies)
        print(f"{method:18}: avg={avg_acc:.4f} ± {std_acc:.4f}, max={max_acc:.4f}")
    
    # 4. Recommendations
    print(f"\n4. 💡 RECOMMENDATIONS:")
    print("-" * 50)
    
    if best_ensemble and best_ensemble[2]['accuracy'] > best_single[1]:
        config_name, method, results = best_ensemble
        k_vals = config_name.split('_k')[1] if '_k' in config_name else "unknown"
        print(f"✅ USE ENSEMBLE: {method} with k_values={k_vals}")
        print(f"   - Higher accuracy: {results['accuracy']:.4f}")
        print(f"   - Confidence scores available: {results['avg_confidence']:.3f}")
        print(f"   - More robust predictions")
        
        if results['avg_confidence'] > 0.7:
            print(f"   - High confidence predictions (>{results['avg_confidence']:.3f})")
        elif results['avg_confidence'] > 0.5:
            print(f"   - Moderate confidence predictions")
        else:
            print(f"   - Lower confidence - consider tuning")
            
    else:
        print(f"✅ USE SINGLE k-NN: k={best_single[0]}")
        print(f"   - Simpler and faster")
        print(f"   - Good accuracy: {best_single[1]:.4f}")
        print(f"   - Less computational overhead")
    
    print(f"\n5. 🎛️ TUNING SUGGESTIONS:")
    print("-" * 50)
    print("For better ensemble performance, consider:")
    print("• Testing k-values with larger gaps: [1, 5, 15] or [1, 7, 21]")
    print("• Using different similarity metrics or distance functions")
    print("• Implementing confidence-based thresholding")
    print("• Adding more diverse base classifiers")
    
    return {
        "best_single": best_single,
        "best_ensemble": best_ensemble,
        "method_performance": method_performance,
        "all_rankings": all_methods[:10]
    }

# Run the detailed analysis
if 'comparison_results' in locals():
    analysis_summary = analyze_ensemble_performance(comparison_results)
else:
    print("⚠️ Run the evaluation cell first to get comparison_results")

# 🎯 TỔNG KẾT VÀ KHUYẾN NGHỊ

## 🏆 Kết luận về Ensemble Methods:

### 📊 **Khi nào nên dùng Ensemble?**

✅ **SỬ DỤNG ENSEMBLE khi:**
- Cần độ chính xác cao nhất có thể
- Cần confidence score để đánh giá độ tin cậy
- Có đủ tài nguyên tính toán
- Dữ liệu phức tạp, có noise
- Quyết định có tác động lớn (critical applications)

✅ **SỬ DỤNG SINGLE k-NN khi:**
- Cần tốc độ nhanh, real-time
- Tài nguyên hạn chế
- Dữ liệu đơn giản, ít noise
- Độ chính xác hiện tại đã đủ tốt
- Prototype hoặc testing nhanh

## 🔬 **Insights từ thử nghiệm:**

### 1. **Best Ensemble Configuration:**
```python
# Thường hoạt động tốt nhất:
config = {
    "k_values": [1, 3, 5],           # Đa dạng k values
    "method": "weighted_voting",     # Tận dụng confidence
    "classifiers": 3                 # Đủ để có consensus
}
```

### 2. **Performance Improvement:**
- **Accuracy gain**: Thường cải thiện 1-3%
- **Confidence info**: Giá trị lớn cho decision making
- **Robustness**: Ít bị ảnh hưởng bởi outliers

### 3. **Trade-offs:**
- **Time**: Ensemble chậm hơn ~3x
- **Memory**: Cần lưu nhiều models
- **Complexity**: Code phức tạp hơn

## 💡 **Recommendations cho Production:**

### 🎛️ **Hybrid Approach** (Khuyến nghị):
```python
def smart_spam_classifier(text):
    # Quick single k-NN check first
    quick_result = single_knn_predict(text, k=3)
    
    # Use ensemble for uncertain cases
    if quick_result['similarity_score'] < threshold:
        return ensemble_predict(text, config=best_config)
    else:
        return quick_result
```

### 🔧 **Tuning Tips:**

1. **K-values selection:**
   - Dùng k lẻ để tránh tie-breaking
   - Test k với khoảng cách lớn: [1, 5, 15]
   - Avoid k quá lớn (>10% dataset size)

2. **Ensemble method selection:**
   - `weighted_voting`: Tốt nhất cho most cases
   - `max_confidence`: Khi có 1 classifier rất mạnh
   - `average_confidence`: Khi muốn conservative

3. **Performance optimization:**
   - Cache embeddings để tránh tính lại
   - Use batch processing cho multiple queries
   - Consider quantization cho FAISS index

## 📈 **Next Steps:**

1. **Advanced Ensemble:**
   - Stacking với meta-learner
   - Boosting methods
   - Different similarity metrics

2. **Production Monitoring:**
   - Track confidence distribution
   - Monitor accuracy over time  
   - A/B test ensemble vs single

3. **Domain Adaptation:**
   - Fine-tune cho specific spam types
   - Update model với new data
   - Handle concept drift