In [None]:
import os
import pickle
import gzip
import json
import numpy as np
import cv2
import matplotlib.pyplot as plt
from pathlib import Path
import glob
from PIL import Image

# Import t·ª´ src
from src.datasets.caltech256 import Caltech256DataModule
from src.featuring.rgb_histogram import RGBHistogram
from src.storage.VectorDBStore import VectorDBStore
from src.retrieval.KNN import KNNRetrieval
from src.pipeline import CBIR


In [None]:
# Load best model config
try:
    with open('out/best_color_config.json', 'r') as f:
        best_config = json.load(f)
    
    print("üèÜ Best Color Model Configuration:")
    print(f"   Config: {best_config['config_name']}")
    print(f"   n_bin: {best_config['n_bin']}")
    print(f"   h_type: {best_config['h_type']}")
    print(f"   metric: {best_config['metric']}")
    print(f"   Average mAP: {best_config['avg_mAP']:.4f}")
    print(f"   mAP@1: {best_config['mAP@1']:.4f}")
    print(f"   mAP@5: {best_config['mAP@5']:.4f}")
    print(f"   mAP@10: {best_config['mAP@10']:.4f}")
    
except FileNotFoundError:
    print("‚ùå Best config file not found! Please run evaluation first.")
    
# Load best model
try:
    with gzip.open('out/best_color.pkl.gz', 'rb') as f:
        best_cbir = pickle.load(f)
    
    file_size = os.path.getsize('out/best_color.pkl.gz') / 1024 / 1024
    print(f"\nüíæ Model loaded successfully ({file_size:.2f} MB)")
    print(f"üìä Indexed images: {len(best_cbir.feature_store.vectors)}")
    
except FileNotFoundError:
    print("‚ùå Best model file not found! Please run evaluation first.")


In [None]:
# Load dataset ƒë·ªÉ l·∫•y ƒë∆∞·ªùng d·∫´n ·∫£nh
root_path = os.path.abspath('data/caltech-256/256_ObjectCategories')
data_module = Caltech256DataModule(batch_size=1, root=root_path)
data_module.setup()

# L·∫•y t·∫•t c·∫£ image paths t·ª´ train dataset
train_dataset = data_module.train_dataset
image_paths = train_dataset.image_paths[:len(best_cbir.feature_store.vectors)]
labels = train_dataset.labels[:len(best_cbir.feature_store.vectors)]
class_names = train_dataset.classes

print(f"üìÇ Loaded {len(image_paths)} indexed image paths")
print(f"üè∑Ô∏è  Available classes: {len(class_names)}")
print(f"üìã Sample classes: {class_names[:5]}...")


In [None]:
def load_and_display_image(image_path, title="Image", ax=None):
    """Load v√† display m·ªôt ·∫£nh"""
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=(4, 4))
    
    # Load image
    image = cv2.imread(str(image_path))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    ax.imshow(image)
    ax.set_title(title, fontsize=10)
    ax.axis('off')
    
    return image

def query_similar_images(query_image_path, k=5, show_results=True):
    """Query similar images v√† hi·ªÉn th·ªã k·∫øt qu·∫£"""
    print(f"üîç Querying for similar images to: {Path(query_image_path).name}")
    
    # Load query image
    query_image = cv2.imread(str(query_image_path))
    if query_image is None:
        print("‚ùå Cannot load query image!")
        return
    
    # Query similar images
    results = best_cbir.query_similar_images(query_image, k=k)
    
    if not show_results:
        return results
    
    # Display results
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    
    # Query image
    query_img = load_and_display_image(query_image_path, "üéØ Query Image", axes[0, 0])
    
    # Extract query class info
    query_path = Path(query_image_path)
    if 'caltech-256' in str(query_path):
        # Extract class from path format: .../256_ObjectCategories/XXX.class_name/image.jpg
        parts = query_path.parts
        for part in parts:
            if '.' in part and not part.endswith('.jpg'):
                query_class = part.split('.', 1)[1] if '.' in part else "unknown"
                break
        else:
            query_class = "unknown"
    else:
        query_class = "external"
    
    axes[0, 0].set_title(f"üéØ Query Image\\nClass: {query_class}", fontsize=10)
    
    # Top-5 similar images
    positions = [(0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
    
    print(f"\\nüìä Top-{k} Similar Images:")
    print("-" * 50)
    
    for i, (result, pos) in enumerate(zip(results, positions)):
        idx = result.index
        score = result.score
        
        # Get image info
        img_path = image_paths[idx]
        img_label = labels[idx]
        img_class = class_names[img_label]
        
        # Display image
        row, col = pos
        load_and_display_image(img_path, f"#{i+1} (Score: {score:.3f})\\n{img_class}", axes[row, col])
        
        # Print info
        match_emoji = "‚úÖ" if img_class == query_class else "‚ùå"
        print(f"{match_emoji} #{i+1}: {img_class} (Score: {score:.3f})")
        print(f"     Path: {Path(img_path).name}")
    
    plt.tight_layout()
    plt.show()
    
    return results


In [None]:
# Random query from test set
test_dataset = data_module.test_dataset
random_idx = np.random.randint(0, len(test_dataset))
random_image_path = test_dataset.image_paths[random_idx]
random_class = class_names[test_dataset.labels[random_idx]]

print(f"üé≤ Random query from test set:")
print(f"   Image: {Path(random_image_path).name}")
print(f"   True class: {random_class}")

# Query
results = query_similar_images(random_image_path, k=5)


In [None]:
# List some interesting classes
interesting_classes = ['car-side-101', 'faces-easy-101', 'airplanes-101', 
                      'motorbikes-101', 'sunflower-101', 'butterfly', 
                      'dolphin', 'elephant', 'guitar', 'piano']

available_interesting = [cls for cls in interesting_classes if cls in class_names]
print(f"üéØ Available interesting classes to try:")
for i, cls in enumerate(available_interesting[:10]):
    count = sum(1 for label in labels if class_names[label] == cls)
    print(f"   {i+1:2d}. {cls} ({count} images)")

# Query specific class
target_class = "car-side-101"  # ‚¨ÖÔ∏è Thay ƒë·ªïi class n√†y
if target_class in class_names:
    class_idx = class_names.index(target_class)
    # Find first image of this class
    class_images = [path for path, label in zip(image_paths, labels) if label == class_idx]
    
    if class_images:
        print(f"\\nüéØ Querying {target_class} (found {len(class_images)} images)")
        sample_image = class_images[0]
        results = query_similar_images(sample_image, k=5)
    else:
        print(f"‚ùå No {target_class} images found in indexed data")
else:
    print(f"‚ùå Class '{target_class}' not available")
    print(f"Available: {available_interesting[:5]}...")


In [None]:
print("üìä Best Color Model Summary:")
print("=" * 40)
print(f"Configuration: {best_config['config_name']}")
print(f"Feature Extractor: RGB Histogram")
print(f"   - n_bin: {best_config['n_bin']} (total features: {best_config['n_bin']**3})")
print(f"   - h_type: {best_config['h_type']}")
print(f"Distance Metric: {best_config['metric']}")
print(f"\nPerformance:")
print(f"   - Average mAP: {best_config['avg_mAP']:.4f}")
print(f"   - mAP@1: {best_config['mAP@1']:.4f}")
print(f"   - mAP@5: {best_config['mAP@5']:.4f}")
print(f"   - mAP@10: {best_config['mAP@10']:.4f}")
print(f"\nDataset: Caltech-256")
print(f"   - Indexed images: {len(best_cbir.feature_store.vectors)}")
print(f"   - Classes: {len(class_names)}")
print(f"   - Model size: {file_size:.2f} MB")

print(f"\nüéØ Usage Instructions:")
print(f"1. Run random query cell ƒë·ªÉ xem v√≠ d·ª•")
print(f"2. Thay ƒë·ªïi 'target_class' trong manual query cell")
print(f"3. C√°c class th√∫ v·ªã: {', '.join(available_interesting[:5])}")
print(f"4. ƒê·ªÉ query ·∫£nh external, s·ª≠a ƒë∆∞·ªùng d·∫´n trong custom query cell")
