In [6]:
import os
import torch
from PIL import Image
from torchvision import models, transforms
import torch.nn as nn
from annoy import AnnoyIndex

images_folder = "/Users/younes/Downloads/archive/PetImages/Dog"
images = [img for img in os.listdir(images_folder) if img.endswith(('.jpg', '.jpeg', '.png'))]  # Filter only image files

# Use ResNet50 model for better feature extraction
weights = models.resnet50(pretrained=True)
model = models.resnet50(weights=weights)
model.fc = nn.Identity()
model.eval()

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Increase dimensionality of feature vectors to 2048 for ResNet50
annoy_index = AnnoyIndex(2048, 'angular')

for i, image_name in enumerate(images):
    image_path = os.path.join(images_folder, image_name)
    image = Image.open(image_path)
    input_tensor = transform(image).unsqueeze(0)

    if input_tensor.size()[1] == 3:
        output_tensor = model(input_tensor)
        annoy_index.add_item(i, output_tensor[0])
        if i % 100 == 0:
            print(f'Processed {i} images.')

# Build the Annoy index with more trees for better accuracy
annoy_index.build(100)
annoy_index.save('dog_index_moreacc.ann')


Processed 0 images.
Processed 100 images.
Processed 200 images.
Processed 300 images.
Processed 400 images.
Processed 500 images.


True