In [None]:
import os
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from facenet_pytorch import InceptionResnetV1
import random
from PIL import Image
import torch.nn.functional as F
import heapq
# import matplotlib.pyplot as plt
from collections import Counter

<h2>Entraînement:<h2>

In [10]:
class TripletDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []
        self.labels = []
        
        individuals = os.listdir(root_dir)
        self.class_to_idx = {ind: i for i, ind in enumerate(individuals)} #lie les individus (AD, BS, ...) à un ID
        
        #Pour chaque individu, on prend chaque image et le label correspondant et on les ajoute dans les attributs de classe
        for ind in individuals:
            #img_dir =f"{root_dir}/{ind}/images"
            #label_dir = f"{root_dir}/{ind}/labels"
            img_dir =f"{root_dir}/{ind}"
            if os.path.exists(img_dir): #and os.path.exists(label_dir):
                for img_name in os.listdir(img_dir):
                    img_path = os.path.join(img_dir, img_name)
                    #label_path = os.path.join(label_dir, img_name.replace(".png", ".txt"))
                    #if os.path.exists(label_path):
                     #   self.data.append((img_path, label_path)) #On ajoute une paire avec l'image et les annotations de sa bbox
                     #   self.labels.append(self.class_to_idx[ind])
                    self.data.append(img_path)
                    self.labels.append(self.class_to_idx[ind])
        
        self.labels = torch.tensor(self.labels)

    def __len__(self):
        return len(self.data)

    def read_yolo_label(self, label_path, img_width, img_height):
        with open(label_path, "r") as f:
            lines = f.readlines()
        if not lines:
            return None  # Pas de bounding box trouvée
        
        #On récupère la bbox du visage et pas celle du corps
        face_annotation = None
        for line in lines: 
            if line[0] == "0":  #0 est le label des annotations des visages
                face_annotation = line
        _, x_center, y_center, width, height = map(float, face_annotation.split())
        
        # Conversion des coordonnées normalisées en pixels
        x_center *= img_width
        y_center *= img_height
        width *= img_width
        height *= img_height
        
        x1 = int(x_center - width / 2)
        y1 = int(y_center - height / 2)
        x2 = int(x_center + width / 2)
        y2 = int(y_center + height / 2)
        
        return (x1, y1, x2, y2)

    def __getitem__(self, idx):
        #img_path, label_path = self.data[idx]
        img_path = self.data[idx]
        
        anchor_label = self.labels[idx]
        
        image = Image.open(img_path).convert("RGB")
        #bbox = self.read_yolo_label(label_path, image.width, image.height)
    
        #if bbox:
         #   image = image.crop(bbox)
        
        if self.transform:
            anchor_img = self.transform(image)
        
        positive_indices = torch.where(self.labels == anchor_label)[0]
        negative_indices = torch.where(self.labels != anchor_label)[0]
        
        positive_idx = random.choice(positive_indices)
        negative_idx = random.choice(negative_indices)
        
        """pos_img_path, pos_label_path = self.data[positive_idx]
        neg_img_path, neg_label_path = self.data[negative_idx]"""
        pos_img_path = self.data[positive_idx]
        neg_img_path = self.data[negative_idx]
        
        pos_image = Image.open(pos_img_path).convert("RGB")
        neg_image = Image.open(neg_img_path).convert("RGB")
        
        """pos_bbox = self.read_yolo_label(pos_label_path, pos_image.width, pos_image.height)
        neg_bbox = self.read_yolo_label(neg_label_path, neg_image.width, neg_image.height)
        
        if pos_bbox:
            pos_image = pos_image.crop(pos_bbox)
        if neg_bbox:
            neg_image = neg_image.crop(neg_bbox)"""
        
        if self.transform:
            positive_img = self.transform(pos_image)
            negative_img = self.transform(neg_image)
        
        return anchor_img, positive_img, negative_img

In [11]:
def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10, patience=3):
    model.to(device)
    criterion.to(device)
    
    best_val_loss = float("inf")
    best_model_state = None 
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        print("-----------------------")
        
        model.train() #mets le modèle en mode entrainement (modèle màj ses poids en fonction des gradients)
        running_loss = 0.0
        
        # Pour chaque mini-batch, on calcule les embedding, la loss et on màj les poids en fonction
        for anchor, positive, negative in train_loader:
            anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)
            optimizer.zero_grad() #réinitialise les gradinets car .backward() les accumule
            
            #calcule les embeddings
            anchor_output = model(anchor)
            positive_output = model(positive)
            negative_output = model(negative)

            #calcule la loss en fonction des embeddings
            loss = criterion(anchor_output, positive_output, negative_output)
            loss.backward() #calcule le gradient de la loss grâce à la back propagation
            optimizer.step() #màj les poids du modèle

            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        print(f"Train Loss: {epoch_loss:.4f}")

        # Calcule la loss sur le validation set
        model.eval() #mets le modèle en mode evaluation: les poids sont figés (+Batch Normalization et Dropout, fonctionnent différemment)
        val_loss = 0.0
        with torch.no_grad():
            for anchor, positive, negative in val_loader:
                anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)
                
                anchor_output = model(anchor)
                positive_output = model(positive)
                negative_output = model(negative)
                
                loss = criterion(anchor_output, positive_output, negative_output)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Val Loss: {val_loss:.4f}")

        #Si le score ne s'est pas amélioré depuis un certains nombres d'epcoh, on stoppe plus tôt l'entrainement 
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict().copy()
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            print(f"Early stopping: {epochs_without_improvement}/{patience}")

            if epochs_without_improvement >= patience:
                print("Early stopping déclenché. Arrêt de l'entraînement.")
                break
    if best_model_state:
        model.load_state_dict(best_model_state)
        #torch.save(best_model_state, "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/ChimpRec/Code/recognition/FaceNet/facenet_triplet_loss_5_layers.pth")
        torch.save(best_model_state,  "C:/Users/julie/Documents/Unif/Mémoire/CCR_recognition_dataset/facenet_triplet_loss_5_layers.pth")

In [None]:
#Chemin du dataset
#dataset_path = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/Chimprec Dataset/ChimpRec Recognition/Preprocess" C:\Users\julie\Documents\Unif\Mémoire\CCR_recognition_dataset
dataset_path = "C:/Users/Theo/Documents/Unif/test_recognition"

# Transformations des images
transform = transforms.Compose([
    transforms.Resize((160, 160)), #redimensionne en une image (160,160)
    transforms.ToTensor(), #convertit l'image en tensor
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) #normalise chaque canal de couleur RGB (passe de [0,1] -> [-1,1]
])


In [13]:
# Chargement des datasets
train_dataset = TripletDataset(f"{dataset_path}/train", transform=transform)
val_dataset = TripletDataset(f"{dataset_path}/val", transform=transform)

#DataLoader facilite le chargement, le traitement et la gestion des données lors de l'entraînement et l'évaluation d'un modèle
#permet de diviser un dataset en mini-batches, d'appliquer du shuffling et de paralléliser
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)

facenet = InceptionResnetV1(pretrained='vggface2').eval()

# Débloquer les 5 dernières couches
for param in facenet.parameters():
    param.requires_grad = False
for layer in list(facenet.children())[-5:]:
    for param in layer.parameters():
        param.requires_grad = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
facenet = facenet.to(device)

criterion = nn.TripletMarginLoss(margin=1.0)
optimizer = optim.Adam(facenet.parameters(), lr=0.001)

train_model(facenet, train_loader, val_loader, criterion, optimizer, device, num_epochs=100, patience=10)



Epoch 1/100
-----------------------
Train Loss: 0.9180
Val Loss: 0.7465

Epoch 2/100
-----------------------
Train Loss: 0.8625
Val Loss: 0.7578
Early stopping: 1/10

Epoch 3/100
-----------------------
Train Loss: 0.8251
Val Loss: 0.7321

Epoch 4/100
-----------------------
Train Loss: 0.8175
Val Loss: 0.7128

Epoch 5/100
-----------------------
Train Loss: 0.8094
Val Loss: 0.6656

Epoch 6/100
-----------------------
Train Loss: 0.7962
Val Loss: 0.6901
Early stopping: 1/10

Epoch 7/100
-----------------------
Train Loss: 0.7897
Val Loss: 0.6630

Epoch 8/100
-----------------------
Train Loss: 0.7821
Val Loss: 0.7056
Early stopping: 1/10

Epoch 9/100
-----------------------
Train Loss: 0.7813
Val Loss: 0.6970
Early stopping: 2/10

Epoch 10/100
-----------------------
Train Loss: 0.7770
Val Loss: 0.6898
Early stopping: 3/10

Epoch 11/100
-----------------------
Train Loss: 0.7717
Val Loss: 0.6765
Early stopping: 4/10

Epoch 12/100
-----------------------
Train Loss: 0.7701
Val Loss: 0.

<h2>Tests:<h2>

In [None]:
# Fonction pour obtenir l'embedding de l'image
def get_embedding(model, img_tensor):
    with torch.no_grad():
        embedding = model(img_tensor)
    return F.normalize(embedding, p=2, dim=1)  # Normalisation L2

def get_most_predicted_class(labels): 
    counter = {}
    for label in labels: 
        ind = list(train_dataset.class_to_idx.keys())[label.item()]
        if ind in counter.keys(): 
            counter[ind] = counter[ind] + 1
        else: 
            counter[ind] = 1
    return max(counter, key=counter.get)

# Comparer avec les embeddings des autres images dans le dataset pour identifier le chimpanzé
def compare_embeddings(input_embedding, dataset, model, k=5):
    distances = []  # Stocke (distance, label, image recadrée)
    
    for img_path, label_path in dataset.data:
        image = Image.open(img_path).convert("RGB")
        bbox = dataset.read_yolo_label(label_path, image.width, image.height)
        
        if bbox:
            image = image.crop(bbox) 
        
        img_tensor = transform(image).unsqueeze(0).to(device)
        embedding = get_embedding(model, img_tensor)
        distance = F.pairwise_distance(input_embedding, embedding).item()
        distances.append((distance, dataset.labels[dataset.data.index((img_path, label_path))], image))

    # Trouver les k plus proches voisins
    k_nearest = heapq.nsmallest(k, distances, key=lambda x: x[0])  # Trier par distance croissante

    # Récupérer les labels des k voisins
    k_labels = [label for _, label, _ in k_nearest]
    for x in k_labels:
        print(list(train_dataset.class_to_idx.keys())[x.item()])

    # Trouver la classe majoritaire
    predicted_label = get_most_predicted_class(k_labels)

    return predicted_label, k_nearest[0][0]  # Retourne la classe prédite et la plus petite distance


def predict_face(img_path, facenet, train_dataset): 
    img = Image.open(img_path).convert("RGB")

    # Appliquer les transformations
    transform = transforms.Compose([
        transforms.Resize((160, 160)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])

    img_tensor = transform(img).unsqueeze(0).to(device)  # Ajouter une dimension batch et déplacer sur GPU/CPU
    input_embedding = get_embedding(facenet, img_tensor)

    # Comparer l'embedding de l'image d'entrée avec ceux du dataset
    predicted_label, min_distance = compare_embeddings(input_embedding, train_dataset, facenet, 5)

    # Afficher l'identité prédite
    # Convertir les keys en liste et obtenir la clé correspondant à l'index

    print(f"Identité prédite : {predicted_label} avec une distance de {min_distance}")



In [None]:
"""# Charger le modèle
facenet = InceptionResnetV1(pretrained='vggface2').eval()

# Débloquer les dernières couches (même chose que lors de l'entraînement)
for param in facenet.parameters():
    param.requires_grad = False
for layer in list(facenet.children())[-5:]:
    for param in layer.parameters():
        param.requires_grad = True

# Charger les poids du modèle
model_path = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/ChimpRec/Code/recognition/FaceNet/facenet_triplet_loss_5_layers.pth"
facenet.load_state_dict(torch.load(model_path))

# Déplacer le modèle vers le bon device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
facenet = facenet.to(device)

train_dataset = TripletDataset(f"{dataset_path}/train", transform=transform)"""

In [None]:
"""test_dataset = "C:/Users/julie/Documents/Unif/Mémoire/Test_recognition3"
imgs = os.listdir(test_dataset)

for img in imgs: 
    print(img)
    predict_face(f"{test_dataset}/{img}", facenet, train_dataset)"""

AD.png
AD
AD
AD
AD
AD
{'AD': 5}
AD
Identité prédite : AD avec une distance de 0.09823525696992874
DK.png
DK
DK
DK
DK
DK
{'DK': 5}
DK
Identité prédite : DK avec une distance de 0.27688586711883545
KR.png
KR
KR
KR
KR
KR
{'KR': 5}
KR
Identité prédite : KR avec une distance de 0.0667252466082573
TC.png
TC
TC
TC
TC
TC
{'TC': 5}
TC
Identité prédite : TC avec une distance de 0.20659366250038147
TS.png
TS
TS
IV
TS
IV
{'TS': 3, 'IV': 2}
TS
Identité prédite : TS avec une distance de 0.2588600218296051


<h2>Test de classificateur:<h2>

In [17]:
import os
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from facenet_pytorch import InceptionResnetV1
import random
from PIL import Image
import torch.nn.functional as F
import heapq
import matplotlib.pyplot as plt
from collections import Counter

class TripletDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []
        self.labels = []
        
        individuals = os.listdir(root_dir)
        self.class_to_idx = {ind: i for i, ind in enumerate(individuals)}
        
        """ for ind in individuals:
            img_dir =f"{root_dir}/{ind}/images"
            label_dir = f"{root_dir}/{ind}/labels"
            if os.path.exists(img_dir) and os.path.exists(label_dir):
                for img_name in os.listdir(img_dir):
                    img_path = os.path.join(img_dir, img_name)
                    label_path = os.path.join(label_dir, img_name.replace(".png", ".txt"))
                    if os.path.exists(label_path):
                        with open(label_path, "r") as f:
                            lines = f.readlines()
                        if len(lines) != 0:
                            self.data.append((img_path, label_path))
                            self.labels.append(self.class_to_idx[ind])"""

        for ind in individuals:
            #img_dir =f"{root_dir}/{ind}/images"
            #label_dir = f"{root_dir}/{ind}/labels"
            img_dir =f"{root_dir}/{ind}"
            if os.path.exists(img_dir): #and os.path.exists(label_dir):
                for img_name in os.listdir(img_dir):
                    img_path = os.path.join(img_dir, img_name)
                    #label_path = os.path.join(label_dir, img_name.replace(".png", ".txt"))
                    #if os.path.exists(label_path):
                     #   self.data.append((img_path, label_path)) #On ajoute une paire avec l'image et les annotations de sa bbox
                     #   self.labels.append(self.class_to_idx[ind])
                    self.data.append(img_path)
                    self.labels.append(self.class_to_idx[ind])
        
        self.labels = torch.tensor(self.labels)

    def __len__(self):
        return len(self.data)

    def read_yolo_label(self, label_path, img_width, img_height):
        with open(label_path, "r") as f:
            lines = f.readlines()
        if not lines:
            return None  
        
        face_annotation = None
        for line in lines: 
            if line[0] == "0":
                face_annotation = line
        _, x_center, y_center, width, height = map(float, face_annotation.split())
        
        x_center *= img_width
        y_center *= img_height
        width *= img_width
        height *= img_height
        
        x1 = int(x_center - width / 2)
        y1 = int(y_center - height / 2)
        x2 = int(x_center + width / 2)
        y2 = int(y_center + height / 2)
        
        return (x1, y1, x2, y2)

    def __getitem__(self, idx):
        #img_path, label_path = self.data[idx]
        img_path = self.data[idx]
        label = self.labels[idx]
        
        image = Image.open(img_path).convert("RGB")
        """bbox = self.read_yolo_label(label_path, image.width, image.height)
    
        if bbox:
            image = image.crop(bbox)"""
        
        """plt.imshow(image)
        plt.axis("off")  # Enlever les axes pour une meilleure lisibilité
        plt.title(f"Label: {label}")  # Afficher le label de l'image
        plt.show()
        """
        if self.transform:
            image = self.transform(image)

        return image, label

def train_classifier(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10, patience=3):
    model.to(device)
    criterion.to(device)
    
    best_val_loss = float("inf")
    best_model_state = None 
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        print("-----------------------")
        
        model.train()
        running_loss = 0.0
        correct, total = 0, 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            
            logits = model(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(logits, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        epoch_loss = running_loss / len(train_loader)
        accuracy = correct / total
        print(f"Train Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.4f}")

        model.eval()
        val_loss = 0.0
        correct, total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                
                logits = model(images)
                loss = criterion(logits, labels)
                val_loss += loss.item()
                
                _, predicted = torch.max(logits, 1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = correct / total
        print(f"Val Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict().copy()
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            print(f"Early stopping: {epochs_without_improvement}/{patience}")

            if epochs_without_improvement >= patience:
                break
    if best_model_state:
        model.load_state_dict(best_model_state)
        #torch.save(best_model_state, "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/ChimpRec/Code/recognition/FaceNet/facenet_triplet_loss_5_layers_fc.pth")
        torch.save(best_model_state, "C:/Users/julie/Documents/Unif/Mémoire/CCR_recognition_dataset/facenet_triplet_loss_10_layers_fc.pth")

# Définition des transformations pour les images
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = len(os.listdir(dataset_path + "/train"))
facenet = InceptionResnetV1(pretrained='vggface2', classify=False)
facenet.fc = nn.Linear(512, num_classes)  # Ajout d'une couche fully connected pour la classification
for param in facenet.fc.parameters():
    param.requires_grad = True
facenet = facenet.to(device)
for param in facenet.parameters():
    param.requires_grad = False
for layer in list(facenet.children())[-10:]:
    for param in layer.parameters():
        param.requires_grad = True

#dataset_path = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/Chimprec Dataset/ChimpRec Recognition/Preprocess" C:/Users/julie/Documents/Unif/Mémoire/CCR_recognition_dataset
dataset_path = "C:/Users/julie/Documents/Unif/Mémoire/CCR_recognition_dataset"
train_dataset = TripletDataset(f"{dataset_path}/train", transform=transform)
val_dataset = TripletDataset(f"{dataset_path}/val", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, facenet.parameters()), lr=0.001)

train_classifier(facenet, train_loader, val_loader, criterion, optimizer, device, num_epochs=50, patience=10)



Epoch 1/50
-----------------------
Train Loss: 5.7695, Accuracy: 0.5590
Val Loss: 6.2758, Accuracy: 0.0672

Epoch 2/50
-----------------------
Train Loss: 5.5332, Accuracy: 0.7391
Val Loss: 5.8671, Accuracy: 0.3104

Epoch 3/50
-----------------------
Train Loss: 5.4308, Accuracy: 0.8262
Val Loss: 5.6438, Accuracy: 0.5412

Epoch 4/50
-----------------------
Train Loss: 5.4203, Accuracy: 0.8352
Val Loss: 5.8924, Accuracy: 0.3352
Early stopping: 1/10

Epoch 5/50
-----------------------
Train Loss: 5.4481, Accuracy: 0.7960
Val Loss: 5.8364, Accuracy: 0.3216
Early stopping: 2/10

Epoch 6/50
-----------------------
Train Loss: 5.3729, Accuracy: 0.8762
Val Loss: 5.7791, Accuracy: 0.4160
Early stopping: 3/10

Epoch 7/50
-----------------------
Train Loss: 5.4014, Accuracy: 0.8424
Val Loss: 5.6073, Accuracy: 0.5848

Epoch 8/50
-----------------------
Train Loss: 5.3809, Accuracy: 0.8732
Val Loss: 5.7284, Accuracy: 0.4684
Early stopping: 1/10

Epoch 9/50
-----------------------
Train Loss: 5.38