In [1]:
import os
import pickle
import gzip
import pandas as pd
import numpy as np
import torch
from time import time
from tqdm import tqdm

from src.datasets.caltech256 import Caltech256DataModule
from src.featuring.rgb_histogram import RGBHistogram
from src.storage.VectorDBStore import VectorDBStore
from src.retrieval.KNN import KNNRetrieval
from src.pipeline import CBIR
from src.metrics import average_precision, recall, hit_rate


In [2]:
os.makedirs('out', exist_ok=True)

# INDEXING

In [18]:
TRAIN_SIZE = 24607  # Số lượng ảnh để index
TEST_SIZE = 6000   # Số lượng ảnh để test
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Khởi tạo dataset
import os
root_path = os.path.abspath('data/caltech-256/256_ObjectCategories')
print(f"🔍 Dataset root: {root_path}")
print(f"🔍 Exists: {os.path.exists(root_path)}")
data_module = Caltech256DataModule(batch_size=32, root=root_path)
data_module.setup()
train_loader = data_module.train_dataloader()
test_loader = data_module.test_dataloader()

# Hàm để test một metric
def test_metric(metric_name):
    print(f"\n🔬 Testing with {metric_name.upper()} metric...")
    
    # Khởi tạo CBIR pipeline cho metric này
    feature_extractor = RGBHistogram(n_bin=8, h_type="global")
    retrieval = KNNRetrieval(metric=metric_name)
    storage = VectorDBStore(retrieval)
    cbir = CBIR(feature_extractor, storage)
    
    # Indexing
    print(f"Indexing {TRAIN_SIZE} images...")
    start = time()
    indexed = 0
    
    for images, labels, _ in tqdm(train_loader, desc=f"Indexing ({metric_name})"):
        if indexed >= TRAIN_SIZE:
            break
    
        if device.type == "cuda":
            images = images.to(device)
    
        count = min(len(images), TRAIN_SIZE - indexed)
        images = images[:count]
        images = (images.cpu().numpy().transpose(0, 2, 3, 1) * 255).astype(np.uint8)
    
        cbir.add_images(images)
        indexed += count
    
    indexing_time = time() - start
    print(f"Indexed {indexed} images in {indexing_time:.2f}s")
    
    # Save model
    model_path = f'out/caltech256_model_{metric_name}.pkl.gz'
    with gzip.open(model_path, 'wb') as f:
        pickle.dump(cbir, f)
    
    file_size = os.path.getsize(model_path) / 1024 / 1024
    print(f"💾 Model saved: {file_size:.2f} MB")
    
    return cbir, indexing_time, file_size, indexed

# Test cả 2 metrics
results_comparison = {}
models = {}

for metric in ["cosine", "euclidean"]:
    models[metric], indexing_time, file_size, indexed = test_metric(metric)
    results_comparison[metric] = {
        'indexing_time': indexing_time,
        'file_size': file_size,
        'indexed_images': indexed
    }

🔍 Dataset root: D:\AI\Food-CBIR\src\evaluation\data\caltech-256\256_ObjectCategories
🔍 Exists: True
📂 Found 256 valid categories
📊 Loaded 29780 total images from 256 classes
📋 Train: 23824 images
📂 Found 256 valid categories
📊 Loaded 29780 total images from 256 classes
📋 Test: 5956 images
Train: 23824, Test: 5956

🔬 Testing with COSINE metric...
Indexing 24607 images...


Indexing (cosine): 100%|██████████| 745/745 [05:21<00:00,  2.31it/s]


Indexed 23824 images in 321.99s
💾 Model saved: 32.44 MB

🔬 Testing with EUCLIDEAN metric...
Indexing 24607 images...


Indexing (euclidean): 100%|██████████| 745/745 [02:15<00:00,  5.49it/s]


Indexed 23824 images in 135.78s
💾 Model saved: 32.44 MB


# Evaluation

In [19]:
# Hàm để evaluate một model
def evaluate_model(cbir, metric_name):
    print(f"\n📊 Evaluating {metric_name.upper()} model...")
    start = time()
    results = []
    ground_truth = []
    tested = 0

    # Get dataset targets for evaluation
    dataset_targets = []
    for images, labels, _ in train_loader:
        if len(dataset_targets) >= indexed:
            break
        count = min(len(labels), indexed - len(dataset_targets))
        dataset_targets.extend(labels[:count].numpy())
    dataset_targets = np.array(dataset_targets)

    # Query với k=100 để có đủ data cho tất cả metrics
    MAX_K = 100

    for images, labels, _ in tqdm(test_loader, desc=f"Testing ({metric_name})"):
        if tested >= TEST_SIZE:
            break

        if device.type == "cuda":
            images = images.to(device)

        count = min(len(images), TEST_SIZE - tested)
        images = images[:count]
        labels = labels[:count]

        images = (images.cpu().numpy().transpose(0, 2, 3, 1) * 255).astype(np.uint8)

        for image in images:
            if tested >= TEST_SIZE:
                break
            # Query với k=100 để tính được MAP@100, Recall@100
            result = cbir.query_similar_images(image, k=MAX_K)
            results.append(result)
            tested += 1

        ground_truth.extend(labels.numpy())

    retrieval_time = time() - start
    print(f"Tested {tested} images in {retrieval_time:.2f}s")

    # Calculate metrics cho k=5, k=10, k=50
    k_values = [5, 10, 50]
    metrics_data = {}

    print(f"📈 Calculating metrics for k={k_values}...")

    for k in k_values:
        map_k, recall_k, hit_k = [], [], []

        for r, gt in zip(results, ground_truth):
            # Lấy top-k results
            top_k_results = r[:k]
            indices = [item.index for item in top_k_results]
            preds = np.take(dataset_targets, indices)
            relevant = np.where(dataset_targets == gt)[0]

            map_k.append(average_precision(preds.tolist(), [gt], k))
            recall_k.append(recall(indices, relevant, k))
            hit_k.append(hit_rate(preds.tolist(), [gt], k))

        # Store metrics
        metrics_data[f'mAP@{k}'] = np.mean(map_k)
        metrics_data[f'Recall@{k}'] = np.mean(recall_k)
        metrics_data[f'HitRate@{k}'] = np.mean(hit_k)

        print(f"   k={k}: mAP={np.mean(map_k):.4f}, Recall={np.mean(recall_k):.4f}, HR={np.mean(hit_k):.4f}")

    # Add timing metrics từ results_comparison
    metrics_data['indexing_time'] = results_comparison[metric_name]['indexing_time']
    metrics_data['retrieval_time'] = retrieval_time
    metrics_data['file_size_mb'] = results_comparison[metric_name]['file_size']
    metrics_data['indexed_images'] = results_comparison[metric_name]['indexed_images']
    metrics_data['tested_images'] = tested
    metrics_data['metric'] = metric_name

    return metrics_data

# Evaluate cả 2 models
print("\n🚀 Starting evaluation of both models...")
all_results = []

for metric in ["cosine", "euclidean"]:
    cbir = models[metric]
    metrics = evaluate_model(cbir, metric)
    all_results.append(metrics)
    results_comparison[metric].update(metrics)

# So sánh kết quả
print(f"\n📊 COMPARISON RESULTS")
print("=" * 60)
print(f"{'Metric':<12} {'mAP@5':<8} {'mAP@10':<8} {'mAP@50':<8} {'Recall@10':<10} {'HR@5':<8}")
print("-" * 60)

for metric in ["cosine", "euclidean"]:
    data = results_comparison[metric]
    print(f"{metric:<12} {data['mAP@5']:<8.4f} {data['mAP@10']:<8.4f} {data['mAP@50']:<8.4f} {data['Recall@10']:<10.4f} {data['HitRate@5']:<8.4f}")

# Tìm winner
cosine_map5 = results_comparison['cosine']['mAP@5']
euclidean_map5 = results_comparison['euclidean']['mAP@5']

winner = "cosine" if cosine_map5 > euclidean_map5 else "euclidean"
print(f"\n🏆 WINNER: {winner.upper()} (mAP@5: {results_comparison[winner]['mAP@5']:.4f})")

# Save detailed results
results_df = pd.DataFrame(all_results)
results_df.to_csv('out/caltech256_metrics_comparison.csv', index=False)
print("✅ Detailed results saved to out/caltech256_metrics_comparison.csv")

# Performance comparison
print(f"\n⏱️  Performance Comparison:")
for metric in ["cosine", "euclidean"]:
    data = results_comparison[metric]
    print(f"{metric.capitalize()}: Index={data['indexing_time']:.1f}s, Query={data['retrieval_time']:.1f}s, Size={data['file_size_mb']:.1f}MB")

if device.type == "cuda":
    torch.cuda.empty_cache()


🚀 Starting evaluation of both models...

📊 Evaluating COSINE model...


Testing (cosine): 100%|██████████| 187/187 [04:58<00:00,  1.60s/it]


Tested 5956 images in 379.55s
📈 Calculating metrics for k=[5, 10, 50]...
   k=5: mAP=0.0054, Recall=0.0002, HR=0.0049
   k=10: mAP=0.0072, Recall=0.0004, HR=0.0052
   k=50: mAP=0.0107, Recall=0.0022, HR=0.0055

📊 Evaluating EUCLIDEAN model...


Testing (euclidean): 100%|██████████| 187/187 [02:22<00:00,  1.31it/s]


Tested 5956 images in 221.13s
📈 Calculating metrics for k=[5, 10, 50]...
   k=5: mAP=0.0083, Recall=0.0003, HR=0.0068
   k=10: mAP=0.0099, Recall=0.0005, HR=0.0061
   k=50: mAP=0.0131, Recall=0.0022, HR=0.0056

📊 COMPARISON RESULTS
Metric       mAP@5    mAP@10   mAP@50   Recall@10  HR@5    
------------------------------------------------------------
cosine       0.0054   0.0072   0.0107   0.0004     0.0049  
euclidean    0.0083   0.0099   0.0131   0.0005     0.0068  

🏆 WINNER: EUCLIDEAN (mAP@5: 0.0083)
✅ Detailed results saved to out/caltech256_metrics_comparison.csv

⏱️  Performance Comparison:
Cosine: Index=322.0s, Query=379.6s, Size=32.4MB
Euclidean: Index=135.8s, Query=221.1s, Size=32.4MB
