In [None]:
# Importing required libraries

import os
import pickle
import torch
import numpy as np 
import matplotlib.pyplot as plt
import cv2
from facenet_pytorch import MTCNN
from torchvision.models import resnet50, ResNet50_Weights
import torchvision.transforms as T
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Define preprocessing pipeline for the input images

preprocess = T.Compose([
    T.ToTensor(),
    T.Resize(224),
    T.Normalize(
       mean=[0.485, 0.456, 0.406],
       std=[0.229, 0.224, 0.225]
   )
])

## ResNET50 Model
ResNET50 Model with pretrained weights on **IMAGENET1K_V2** is used in this model. The top layer is removed as we want to use ResNET50 as a **feature extractor** from the image dataset of bollywood actors and actresses. 

In [4]:
import torch.nn as nn
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        base_model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
        self.model = nn.Sequential(*list(base_model.children())[:-1])

    def forward(self, x):
        return self.model(x)

In [5]:
# object created of the model class
feature_extractor = Model()

# uncomment and run below line to see the model architect and description 
#feature_extractor.eval()

Store the dataset of actor images in a folder called **data** 

You can download the dataset from here: https://www.kaggle.com/datasets/sushilyadav1998/bollywood-celeb-localized-face-dataset

In [None]:
# Iterate over all actors and collect the paths of their image files
for actor in actors:
    for file in os.listdir(os.path.join('data', actor)):
        filenames.append(os.path.join("data", actor, file))

# Save the collected filenames to a pickle file for future use
pickle.dump(filenames, open('filenames.pkl', 'wb'))

In [None]:
# Initialize an empty list to store features
features_lst = []

# Load the list of filenames from the pickle file
filenames = pickle.load(open('filenames.pkl', 'rb'))

# Loop through all image files, preprocess them, and extract features using the feature extractor
for filename in filenames:
    # Open the image and convert it to RGB format
    image = Image.open(filename).convert('RGB')
    
    # Preprocess the image and add a batch dimension
    input_tensor = preprocess(image).unsqueeze(0)
    
    # Extract features without computing gradients (no training)
    with torch.no_grad():
        features = feature_extractor(input_tensor)
    
    # Flatten the features and add them to the list
    features_lst.append(features.flatten())

In [None]:
# Store the extracted feature list in a file for later use
pickle.dump(features_lst,open("embedding.pkl","wb"))