In [None]:
! pip install facenet_pytorch

### Import data

In [None]:
import os
import numpy as np
from facenet_pytorch import InceptionResnetV1
import pickle

In [None]:
root_dir = '/content/drive/MyDrive/Deep_learning_course/Project/'
project_dir = os.path.join(root_dir, 'FastRCNN/')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path_to_external_folder = os.path.join(root_dir, 'data/')
path_to_zip_folder = os.path.join(path_to_external_folder, 'Archive_verification.zip')
current_folder_path = '/content/localdata/'
!unzip -q $path_to_zip_folder -d $current_folder_path

### Load data

In [None]:
import torch
from torch.utils.data import Dataset
from torchvision.transforms import v2 as T

In [None]:
trnsf = T.Compose([
              T.ToDtype(torch.float, scale=True),
              T.ToPureTensor()
          ])

In [None]:
from torchvision.io import read_image
class VerificationDataset(Dataset):
  def __init__(self, root_dir, transform):
    super().__init__()
    self.root_dir = root_dir
    self.transform = transform
    self.images = os.listdir(root_dir)
    self.path = [os.path.join(root_dir, image) for image in self.images]
    self.labels = [image.split('/')[-1].split('_')[0] for image in self.images]

  def __len__(self):
    return len(self.images)

  def __getitem__(self, idx):
    image = read_image(self.path[idx])

    if self.transform:
      image = self.transform(image)

    label = self.labels[idx]
    return image, label

In [None]:
path_to_data = os.path.join(current_folder_path, "Faces", "Faces/")
dataset = VerificationDataset(path_to_data, trnsf)

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
dataloader = torch.utils.data.DataLoader(
    dataset,
    batch_size=150,
    shuffle=True,
    pin_memory=True,
    pin_memory_device=device.type,
    num_workers=8
)

### Get embeddings

In [None]:
resnet = InceptionResnetV1(pretrained='vggface2').to(device)

In [None]:
embeddings = []
labels = []
i = 0
for image, label in dataloader:
  i+=1
  output = resnet(image.to(device))
  embeddings.extend(output.cpu().detach().numpy())
  labels.extend(label)
  print(i)

In [None]:
embeddings_path = os.path.join(path_to_external_folder, 'embeddings.pkl')

In [None]:
vec_id = (embeddings, labels)

In [None]:
with open(embeddings_path, 'wb') as f:
  pickle.dump(vec_id, f)