In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m34.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [None]:
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git

Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ftfy
Successfully installed ftfy-6.3.1
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-t0euuavn
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-t0euuavn
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->clip==1.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting

In [None]:
import torch
import clip
import torchvision.datasets as datasets
import numpy as np
import faiss
from sklearn.neighbors import NearestNeighbors
from torch.utils.data import DataLoader
from sklearn.preprocessing import normalize
from torchvision import transforms

In [None]:
# Load CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
model.eval()

100%|███████████████████████████████████████| 338M/338M [00:38<00:00, 9.11MiB/s]


CLIP(
  (visual): VisionTransformer(
    (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
    (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (transformer): Transformer(
      (resblocks): Sequential(
        (0): ResidualAttentionBlock(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): Sequential(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (gelu): QuickGELU()
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
        (1): ResidualAttentionBlock(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          

In [None]:
# Load CIFAR-100 dataset with CLIP preprocessing
def load_cifar100():
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize for CLIP model
        transforms.ToTensor(),          # Convert to Tensor
        transforms.Normalize((0.481, 0.457, 0.408), (0.268, 0.261, 0.275))  # CLIP Normalization
    ])

    dataset = datasets.CIFAR100(root="./data", train=True, download=True, transform=transform)
    dataloader = DataLoader(dataset, batch_size=128, shuffle=False, num_workers=2)  # Reduce workers to 2

    return dataset, dataloader


In [None]:
# Extract image features using CLIP
def extract_features(model, dataloader, device="cuda"):
    model = model.to(device)
    features, labels = [], []

    with torch.no_grad():
        for images, targets in dataloader:
            images = images.to(device)
            features.append(model.encode_image(images).cpu().numpy())  # CLIP feature extraction
            labels.append(targets.numpy())

    return np.vstack(features), np.hstack(labels)

In [None]:
# Build FAISS (ANN) index
def build_faiss_index(features):
    d = features.shape[1]  # Feature dimension
    index = faiss.IndexFlatL2(d)  # L2 distance-based index
    index.add(features)  # Add dataset features to the index
    return index

In [None]:
# Build Locality-Sensitive Hashing (LSH) index
def build_hashing_index(features):
    features = normalize(features, axis=1)  # Normalize for cosine similarity
    nn = NearestNeighbors(n_neighbors=5, metric='cosine')
    nn.fit(features)
    return nn

In [None]:
# Retrieve similar images using FAISS
def retrieve_similar_images_faiss(query_features, index, k=5):
    distances, indices = index.search(query_features, k)  # Retrieve top-k matches
    return indices, distances

In [None]:
# Retrieve similar images using LSH (Hashing)
def retrieve_similar_images_hashing(query_features, nn, k=5):
    query_features = normalize(query_features, axis=1)
    distances, indices = nn.kneighbors(query_features, n_neighbors=k)
    return indices, distances


In [None]:
# Compute accuracy for retrieval methods
def compute_accuracy(retrieved_indices, true_labels, database_labels):
    correct, total = 0, len(retrieved_indices)
    for i, indices in enumerate(retrieved_indices):
        if true_labels[i] in database_labels[indices]:
            correct += 1
    return correct / total

In [None]:
# Compute combined accuracy for FAISS + Hashing
def compute_combined_accuracy(faiss_indices, hashing_indices, query_labels, database_labels):
    correct, total = 0, len(query_labels)

    for i in range(total):
        retrieved_labels_faiss = database_labels[faiss_indices[i]]
        retrieved_labels_hashing = database_labels[hashing_indices[i]]

        # Merge results from both methods
        combined_retrieved_labels = set(retrieved_labels_faiss) | set(retrieved_labels_hashing)

        # If the correct label appears in either method's top-5 results, count as correct
        if query_labels[i] in combined_retrieved_labels:
            correct += 1

    return correct / total

In [None]:
if __name__ == "__main__":
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Load dataset and model
    dataset, dataloader = load_cifar100()
    features, labels = extract_features(model, dataloader, device)

    # Build retrieval indexes
    faiss_index = build_faiss_index(features)
    hashing_index = build_hashing_index(features)

Files already downloaded and verified


In [None]:
    # Select random query images for evaluation
    num_queries = 100  # Number of queries for evaluation
    query_indices = np.random.choice(len(dataset), num_queries, replace=False)
    query_images = torch.stack([dataset[i][0] for i in query_indices]).to(device)
    query_labels = np.array([dataset[i][1] for i in query_indices])

    # Extract features for query images
    query_features = model.encode_image(query_images).cpu().detach().numpy()

In [None]:
    # Retrieve similar images using FAISS
    faiss_indices, _ = retrieve_similar_images_faiss(query_features, faiss_index, k=5)
    faiss_accuracy = compute_accuracy(faiss_indices, query_labels, labels)
    print("FAISS Top-5 Accuracy:", faiss_accuracy)

    # Retrieve similar images using Hashing
    hashing_indices, _ = retrieve_similar_images_hashing(query_features, hashing_index, k=5)
    hashing_accuracy = compute_accuracy(hashing_indices, query_labels, labels)
    print("Hashing Top-5 Accuracy:", hashing_accuracy)

    # Compute Combined Accuracy
    combined_accuracy = compute_combined_accuracy(faiss_indices, hashing_indices, query_labels, labels)
    print("Combined FAISS + Hashing Top-5 Accuracy:", combined_accuracy)

FAISS Top-5 Accuracy: 1.0
Hashing Top-5 Accuracy: 1.0
Combined FAISS + Hashing Top-5 Accuracy: 1.0


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Function to display the query image and retrieved similar images
def display_similar_images(query_index, dataset, retrieved_indices):
    fig, axes = plt.subplots(1, len(retrieved_indices) + 1, figsize=(15, 5))

    # Display Query Image
    query_image, _ = dataset[query_index]
    axes[0].imshow(np.transpose(query_image.numpy(), (1, 2, 0)))
    axes[0].set_title("Query Image")
    axes[0].axis("off")

    # Display Retrieved Images
    for i, idx in enumerate(retrieved_indices):
        retrieved_image, _ = dataset[idx]
        axes[i + 1].imshow(np.transpose(retrieved_image.numpy(), (1, 2, 0)))
        axes[i + 1].set_title(f"Match {i+1}")
        axes[i + 1].axis("off")

    plt.show()

# Select a random query image
query_index = np.random.randint(0, len(features))  # Ensure query_index is within features range
# ✅ Extract CLIP features for the query image, not the raw image data:
query_features = features[query_index].reshape(1, -1).astype('float32')

# Ensure FAISS index has correct feature dimension
assert query_features.shape[1] == faiss_index.d, f"Feature dimension mismatch! Query: {query_features.shape[1]}, FAISS: {faiss_index.d}"

# Retrieve similar images using FAISS
faiss_indices, _ = retrieve_similar_images_faiss(query_features, faiss_index, k=5)

# Retrieve similar images using Hashing
hashing_indices, _ = retrieve_similar_images_hashing(query_features, hashing_index, k=5)

# Compute Combined Accuracy
combined_accuracy = compute_combined_accuracy(faiss_indices, hashing_indices, query_labels, labels)  # ✅ Use query_labels, not labels

# Print accuracy
print("Combined FAISS + Hashing Top-5 Accuracy:", combined_accuracy)

# Get final retrieved image indices
retrieved_indices = faiss_indices[0]  # Take FAISS results

# Display the Query & Retrieved Images
display_similar_images(query_index, dataset, retrieved_indices)

AssertionError: Feature dimension mismatch! Query: 25600000, FAISS: 512