# IA Project - Face Recognition with Dynamic Triplet Loss

References: https://openaccess.thecvf.com/content_ICCV_2019/papers/Zhang_Learning_Local_Descriptors_With_a_CDF-Based_Dynamic_Soft_Margin_ICCV_2019_paper.pdf

Dataset: http://vis-www.cs.umass.edu/lfw/#download

In [37]:
import torch
from matplotlib import pyplot as plt
from torchvision import datasets
import torchvision.transforms.functional as TF
import torchvision.transforms as T
import numpy as np
import collections
import PIL.Image
from torch.utils.data import DataLoader, Subset
import os
import random

In [None]:
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(dev)

cpu


### Pre-precessing data

In [38]:
#path
data_path = "./LFW_DIR"
train_path = "./data/train_pairs.txt"
test_path = "./data/test_pairs.txt"
people_path = "./data/people.txt"

In [46]:
norm_mean = (0.485, 0.456, 0.406)
norm_std = (0.229, 0.224, 0.225)

test_transform = T.Compose([
    T.Resize(250),  # make 250x250
    T.CenterCrop(150),   # then take 150x150 center crop
    T.ToTensor(),
    T.Normalize(norm_mean, norm_std),
])

train_transform = T.Compose([
    T.Resize(250),
    T.RandomCrop(150),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(norm_mean, norm_std),
])

In [39]:
def readPeople(people_path):
  people_list = []
  with open(people_path, 'r') as file:
    for line in file.readlines():
      person = line.strip().split()
      people_list.append(person)
  return people_list

In [None]:
people_list= readPeople(people_path)
people_list

In [None]:
'''
# loading dataset
# dataset = datasets.ImageFolder(data_path, transform = transform)
dataset = datasets.ImageFolder(data_path)
print(f"num samples: {len(dataset)}")

num samples: 13233


In [None]:
'''
# devo estrarre un subset delle persone con un numero di immagini >1
# mi serve una lista della label delle persone 
#ciclo people_list: se il numero di immagini per persona è >1 copio quella persona
# in una lista di classi. poi queste classi le converto in label e faccio il subset

def getIndeces(dataset, people_list):

  people_idx = []
  data_dict = dataset.class_to_idx

  for person in people_list:
    if int(person[1]) > 1:
      name = person[0]
      people_idx.append(data_dict[name])

  return people_idx
  '''

In [None]:
'''
people_idx = getIndeces(dataset, people_list)
people_idx      #lista delle labels relative alle persone con più di un'immagine
'''

In [5]:
'''
# cambiare i nomi con le labels
def getLabeledImages(data_path, people_list):
  img_path = []
  people_idx = []
  labeledImages = {}
  i = 0
  # data_dict = dataset.class_to_idx

  for person in people_list:
    if int(person[1]) > 1:    # se ho più di una immagine per persona
      label = i
      # people_idx.append(data_dict[name])
      labeledImages[label] = []
      for j in range(int(person[1])):
        path = os.path.join(data_path, person[0], person[0] + '_' + '%04d' % int(person[1]) + '.jpg')
        img_path.append(path)
      labeledImages[i] = img_path
      img_path = []
      i += 1

  return labeledImages

In [84]:
# cambiare i nomi con le labels
def getLabeledImages(data_path, people_list):
  labeledImages = {}
  label = 0
  

  for person in people_list:
    if int(person[1]) > 1:    # se ho più di una immagine per persona
      for img_id in range(1, int(person[1])+1):
        path = os.path.join(data_path, person[0], person[0] + '_' + '%04d' % img_id + '.jpg')
        labeledImages[path] = label
      label += 1

  return labeledImages

In [88]:
labeledImages = getLabeledImages(data_path, people_list)
print(len(labeledImages.keys()))    # dizionario 'img_path: label'
print(len(set(labeledImages.values())))

9164
1680


In [93]:
class LFWDataset(torch.utils.data.Dataset):

  def __init__(self, labeledImages, transform, train = True):
    self.train = train
    self.transform = transform
    self.labeledImages = labeledImages
    self.images = list(labeledImages.keys())
    # self.index = list(range(len(labeledImages.key()))

  def __len__(self):
    return len(self.images)

  def __getitem__(self, index):
    anchor_path = self.images[index]

    if self.train:
      anchor_label = self.labeledImages[anchor_path]

      # get positive image path
      positive_list = [item for item in self.images if self.labeledImages[item] == anchor_label and item != anchor_path]
      positive_path = random.choice(positive_list)

      # get negative image path
      negative_list = [item for item in self.labeledImages.keys() if item not in positive_list]
      negative_path = random.choice(negative_list)

      # get images from paths
      anchor_img = PIL.Image.open(anchor_path)
      positive_img = PIL.Image.open(positive_path)
      negative_img = PIL.Image.open(negative_path)

      # transform images
      anchor_img = self.transform(anchor_img)
      positive_img = self.transform(positive_img)
      negative_img = self.transform(negative_img)

      return anchor_img, positive_img, negative_img

In [117]:
train_dataset = LFWDataset(labeledImages, train_transform)

TypeError: ignored

In [None]:
'''
num_data = len(dataset)
list_idx = list(range(num_data))
random.shuffle(list_idx)

train_frac = 0.5
test_frac = 0.3
val_frac = 0.2

# training
num_train = int(num_data*train_frac)
num_data = num_data - num_train
train_idx = list_idx[num_data:]
list_idx = list_idx[:num_data]

# test
num_test = int(num_data*test_frac)
num_data = num_data - num_test
test_idx = list_idx[num_data:]
list_idx = list_idx[:num_data]

# val
num_val= int(num_data*val_frac)
num_data = num_data - num_val
val_idx = list_idx[num_data:]
list_idx = list_idx[:num_data]

# create subsets
train_dataset = Subset(dataset, train_idx)
test_dataset = Subset(dataset, test_idx)
val_dataset = Subset(dataset, val_idx)
'''

In [None]:
'''
print(
    len(train_dataset),
    len(test_dataset),
    len(val_dataset)
)
'''

6616 1985 926
