## Importing libraries and Resnet50

In [1]:
import torch
import torchvision.transforms as transforms
import torch.nn.functional as F
from torchvision.models import resnet50
from PIL import Image
import os
from tqdm import tqdm
import pickle

In [2]:
model = resnet50(pretrained=True)
model = torch.nn.Sequential(*list(model.children())[:-1])  # Remove final classifier
model.eval()



Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


## Pre processing and Feature extraction

In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

def get_image_embeddings(image_path):
    try:
        
        image = Image.open(image_path).convert("RGB")
        image_tensor = transform(image).unsqueeze(0)

        with torch.no_grad():
            features = model(image_tensor)
        embeddings = torch.flatten(features, start_dim=-1)
        return embeddings
    
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None

## Saving the models

In [4]:
filenames = []
for file in os.listdir('image_dataset'):
    filenames.append(os.path.join('image_dataset', file))

feature_list = []

for file in tqdm(filenames):
    feature_list.append(get_image_embeddings(file))

pickle.dump(feature_list, open('embeddings.pkl','wb'))
pickle.dump(filenames, open('filenames.pkl','wb'))

100%|██████████| 44441/44441 [2:02:32<00:00,  6.04it/s]  
