In [1]:
import os
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from facenet_pytorch import InceptionResnetV1
import random
from PIL import Image
import torch.nn.functional as F
import heapq
import matplotlib.pyplot as plt
from collections import Counter

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
class TripletDataset(Dataset):
    def __init__(self, root_dir, transform=None, has_faces_label=True):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []
        self.labels = []
        self.has_faces_label = has_faces_label
        
        individuals = os.listdir(root_dir)
        self.class_to_idx = {ind: i for i, ind in enumerate(individuals)} #lie les individus (AD, BS, ...) à un ID
        
        #Pour chaque individu, on prend chaque image et le label correspondant et on les ajoute dans les attributs de classe
        for ind in individuals:
            if self.has_faces_label:
                img_dir =f"{root_dir}/{ind}/images"
                label_dir = f"{root_dir}/{ind}/labels"
            else: 
                img_dir =f"{root_dir}/{ind}"
            if os.path.exists(img_dir): #and os.path.exists(label_dir):
                for img_name in os.listdir(img_dir):
                    img_path = os.path.join(img_dir, img_name)
                    if self.has_faces_label: 
                        label_path = os.path.join(label_dir, img_name.replace(".png", ".txt"))
                        if os.path.exists(label_path):
                           self.data.append((img_path, label_path)) #On ajoute une paire avec l'image et les annotations de sa bbox
                           self.labels.append(self.class_to_idx[ind])
                    else: 
                        self.data.append(img_path)
                        self.labels.append(self.class_to_idx[ind])
        
        self.labels = torch.tensor(self.labels)

    def __len__(self):
        return len(self.data)

    def read_yolo_label(self, label_path, img_width, img_height):
        with open(label_path, "r") as f:
            lines = f.readlines()
        if not lines:
            return None  # Pas de bounding box trouvée
        
        #On récupère la bbox du visage et pas celle du corps
        face_annotation = None
        for line in lines: 
            if line[0] == "0":  #0 est le label des annotations des visages
                face_annotation = line
        _, x_center, y_center, width, height = map(float, face_annotation.split())
        
        # Conversion des coordonnées normalisées en pixels
        x_center *= img_width
        y_center *= img_height
        width *= img_width
        height *= img_height
        
        x1 = int(x_center - width / 2)
        y1 = int(y_center - height / 2)
        x2 = int(x_center + width / 2)
        y2 = int(y_center + height / 2)
        
        return (x1, y1, x2, y2)

    def __getitem__(self, idx):
        anchor_label = self.labels[idx]

        positive_indices = torch.where(self.labels == anchor_label)[0]
        negative_indices = torch.where(self.labels != anchor_label)[0]
        
        positive_idx = random.choice(positive_indices)
        negative_idx = random.choice(negative_indices)

        if self.has_faces_label: 
                anchor_img_path, anchor_label_path = self.data[idx]
                pos_img_path, pos_label_path = self.data[positive_idx]
                neg_img_path, neg_label_path = self.data[negative_idx]

                anchor_image = Image.open(anchor_img_path).convert("RGB")
                pos_image = Image.open(pos_img_path).convert("RGB")
                neg_image = Image.open(neg_img_path).convert("RGB")

                anchor_bbox = self.read_yolo_label(anchor_label_path, anchor_image.width, anchor_image.height)
                pos_bbox = self.read_yolo_label(pos_label_path, pos_image.width, pos_image.height)
                neg_bbox = self.read_yolo_label(neg_label_path, neg_image.width, neg_image.height)
                
                if anchor_bbox:
                    anchor_image = anchor_image.crop(anchor_bbox)
                if pos_bbox:
                    pos_image = pos_image.crop(pos_bbox)
                if neg_bbox:
                    neg_image = neg_image.crop(neg_bbox)
        else:
            anchor_img_path = self.data[idx]
            pos_img_path = self.data[positive_idx]
            neg_img_path = self.data[negative_idx]

            anchor_image = Image.open(anchor_img_path).convert("RGB")
            pos_image = Image.open(pos_img_path).convert("RGB")
            neg_image = Image.open(neg_img_path).convert("RGB")
        
        if self.transform:
            anchor_img = self.transform(anchor_image)
            positive_img = self.transform(pos_image)
            negative_img = self.transform(neg_image)
        
        return anchor_img, positive_img, negative_img

In [3]:
def train_model(model, train_loader, val_loader, criterion, optimizer, device, save_model_file, num_epochs=10, patience=3):
    model.to(device)
    criterion.to(device)
    
    best_val_loss = float("inf")
    best_model_state = None 
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        print("-----------------------")
        
        model.train() #mets le modèle en mode entrainement (modèle màj ses poids en fonction des gradients)
        running_loss = 0.0
        
        # Pour chaque mini-batch, on calcule les embedding, la loss et on màj les poids en fonction
        for anchor, positive, negative in train_loader:
            anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)
            optimizer.zero_grad() #réinitialise les gradinets car .backward() les accumule
            
            #calcule les embeddings
            anchor_output = model(anchor)
            positive_output = model(positive)
            negative_output = model(negative)

            #calcule la loss en fonction des embeddings
            loss = criterion(anchor_output, positive_output, negative_output)
            loss.backward() #calcule le gradient de la loss grâce à la back propagation
            optimizer.step() #màj les poids du modèle

            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        print(f"Train Loss: {epoch_loss:.4f}")

        # Calcule la loss sur le validation set
        model.eval() #mets le modèle en mode evaluation: les poids sont figés (+Batch Normalization et Dropout, fonctionnent différemment)
        val_loss = 0.0
        with torch.no_grad():
            for anchor, positive, negative in val_loader:
                anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)
                
                anchor_output = model(anchor)
                positive_output = model(positive)
                negative_output = model(negative)
                
                loss = criterion(anchor_output, positive_output, negative_output)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Val Loss: {val_loss:.4f}")

        #Si le score ne s'est pas amélioré depuis un certains nombres d'epcoh, on stoppe plus tôt l'entrainement 
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict().copy()
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            print(f"Early stopping: {epochs_without_improvement}/{patience}")

            if epochs_without_improvement >= patience:
                print("Early stopping déclenché. Arrêt de l'entraînement.")
                break
    if best_model_state:
        model.load_state_dict(best_model_state)
        torch.save(best_model_state, save_model_file)

ChimpRec:

In [None]:
dataset_path = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/Chimprec Dataset/ChimpRec Recognition/Preprocess" 
model_file = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/ChimpRec/Code/recognition/FaceNet/facenet_triplet_loss_5_layers_fc.pth"

# Transformations des images
transform = transforms.Compose([
    transforms.Resize((160, 160)), #redimensionne en une image (160,160)
    transforms.ToTensor(), #convertit l'image en tensor
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) #normalise chaque canal de couleur RGB (passe de [0,1] -> [-1,1]
])

# Chargement des datasets
train_dataset = TripletDataset(f"{dataset_path}/train", transform=transform)
val_dataset = TripletDataset(f"{dataset_path}/val", transform=transform)

#DataLoader facilite le chargement, le traitement et la gestion des données lors de l'entraînement et l'évaluation d'un modèle
#permet de diviser un dataset en mini-batches, d'appliquer du shuffling et de paralléliser
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)

facenet = InceptionResnetV1(pretrained='vggface2').eval()

# Débloquer les 5 dernières couches
for param in facenet.parameters():
    param.requires_grad = False
for layer in list(facenet.children())[-5:]:
    for param in layer.parameters():
        param.requires_grad = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
facenet = facenet.to(device)

criterion = nn.TripletMarginLoss(margin=1.0)
optimizer = optim.Adam(facenet.parameters(), lr=0.001)

train_model(facenet, train_loader, val_loader, criterion, optimizer, device, model_file, num_epochs=100, patience=10)



Epoch 1/1
-----------------------
Train Loss: 0.8116
Val Loss: 0.7202


CCR:

In [None]:
dataset_path = "C:/Users/julie/Documents/Unif/Mémoire/CCR_recognition_dataset"
model_file = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/ChimpRec/Code/recognition/FaceNet/facenet_triplet_loss_5_layers_fc.pth"

# Transformations des images
transform = transforms.Compose([
    transforms.Resize((160, 160)), #redimensionne en une image (160,160)
    transforms.ToTensor(), #convertit l'image en tensor
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) #normalise chaque canal de couleur RGB (passe de [0,1] -> [-1,1]
])

# Chargement des datasets
train_dataset = TripletDataset(f"{dataset_path}/train", transform=transform, has_faces_label=False)
val_dataset = TripletDataset(f"{dataset_path}/val", transform=transform, has_faces_label=False)

#DataLoader facilite le chargement, le traitement et la gestion des données lors de l'entraînement et l'évaluation d'un modèle
#permet de diviser un dataset en mini-batches, d'appliquer du shuffling et de paralléliser
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)

facenet = InceptionResnetV1(pretrained='vggface2').eval()

# Débloquer les 5 dernières couches
for param in facenet.parameters():
    param.requires_grad = False
for layer in list(facenet.children())[-5:]:
    for param in layer.parameters():
        param.requires_grad = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
facenet = facenet.to(device)

criterion = nn.TripletMarginLoss(margin=1.0)
optimizer = optim.Adam(facenet.parameters(), lr=0.001)

train_model(facenet, train_loader, val_loader, criterion, optimizer, device, model_file, num_epochs=100, patience=10)



Epoch 1/1
-----------------------
Train Loss: 0.8840
Val Loss: 0.8122
