In [2]:
import os
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from facenet_pytorch import InceptionResnetV1
from PIL import Image

In [3]:
class TripletDataset(Dataset):
    def __init__(self, root_dir, transform=None, has_faces_label=True):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []
        self.labels = []
        self.has_faces_label = has_faces_label
        
        individuals = os.listdir(root_dir)
        self.class_to_idx = {ind: i for i, ind in enumerate(individuals)}

        for ind in individuals:
            if self.has_faces_label:
                img_dir =f"{root_dir}/{ind}/images"
                label_dir = f"{root_dir}/{ind}/labels"
            else: 
                img_dir =f"{root_dir}/{ind}"
            if os.path.exists(img_dir):
                for img_name in os.listdir(img_dir):
                    img_path = os.path.join(img_dir, img_name)
                    if self.has_faces_label: 
                        label_path = os.path.join(label_dir, img_name.replace(".png", ".txt"))
                        if os.path.exists(label_path):
                           self.data.append((img_path, label_path)) #On ajoute une paire avec l'image et les annotations de sa bbox
                           self.labels.append(self.class_to_idx[ind])
                    else: 
                        self.data.append(img_path)
                        self.labels.append(self.class_to_idx[ind])
        
        self.labels = torch.tensor(self.labels)

    def __len__(self):
        return len(self.data)

    def read_yolo_label(self, label_path, img_width, img_height):
        with open(label_path, "r") as f:
            lines = f.readlines()
        if not lines:
            return None  
        
        face_annotation = None
        for line in lines: 
            if line[0] == "0":
                face_annotation = line
        _, x_center, y_center, width, height = map(float, face_annotation.split())
        
        x_center *= img_width
        y_center *= img_height
        width *= img_width
        height *= img_height
        
        x1 = int(x_center - width / 2)
        y1 = int(y_center - height / 2)
        x2 = int(x_center + width / 2)
        y2 = int(y_center + height / 2)
        
        return (x1, y1, x2, y2)

    def __getitem__(self, idx):
        label = self.labels[idx]
        if self.has_faces_label: 
            img_path, label_path = self.data[idx]
        else: 
            img_path = self.data[idx]
        
        image = Image.open(img_path).convert("RGB")
        if self.has_faces_label:
            bbox = self.read_yolo_label(label_path, image.width, image.height)
        
            if bbox:
                image = image.crop(bbox)
        
        """plt.imshow(image)
        plt.axis("off")  # Enlever les axes pour une meilleure lisibilité
        plt.title(f"Label: {label}")  # Afficher le label de l'image
        plt.show()
        """
        if self.transform:
            image = self.transform(image)

        return image, label



In [4]:
def train_classifier(model, train_loader, val_loader, criterion, optimizer, device, save_model_file, num_epochs=10, patience=3):
    model.to(device)
    criterion.to(device)
    
    best_val_loss = float("inf")
    best_model_state = None 
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        print("-----------------------")
        
        model.train()
        running_loss = 0.0
        correct, total = 0, 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            
            logits = model(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(logits, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        epoch_loss = running_loss / len(train_loader)
        accuracy = correct / total
        print(f"Train Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.4f}")

        model.eval()
        val_loss = 0.0
        correct, total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                
                logits = model(images)
                loss = criterion(logits, labels)
                val_loss += loss.item()
                
                _, predicted = torch.max(logits, 1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)

        val_loss /= len(val_loader)
        val_accuracy = correct / total
        print(f"Val Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict().copy()
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            print(f"Early stopping: {epochs_without_improvement}/{patience}")

            if epochs_without_improvement >= patience:
                break
    if best_model_state:
        model.load_state_dict(best_model_state)
        torch.save(best_model_state, save_model_file)

ChimpRec:

In [6]:
dataset_path = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/Chimprec Dataset/ChimpRec Recognition/Preprocess"
model_file = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/ChimpRec/Code/recognition/FaceNet/facenet_triplet_loss_5_layers_fc.pth"

# Définition des transformations pour les images
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = len(os.listdir(dataset_path + "/train"))
facenet = InceptionResnetV1(pretrained='vggface2', classify=False)
facenet.fc = nn.Linear(512, num_classes)  # Ajout d'une couche fully connected pour la classification
for param in facenet.fc.parameters():
    param.requires_grad = True
facenet = facenet.to(device)
for param in facenet.parameters():
    param.requires_grad = False
for layer in list(facenet.children())[-10:]:
    for param in layer.parameters():
        param.requires_grad = True

train_dataset = TripletDataset(f"{dataset_path}/train", transform=transform)
val_dataset = TripletDataset(f"{dataset_path}/val", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, facenet.parameters()), lr=0.001)

train_classifier(facenet, train_loader, val_loader, criterion, optimizer, device, model_file, num_epochs=1, patience=10)


Epoch 1/1
-----------------------
Train Loss: 6.1029, Accuracy: 0.4286
Val Loss: 6.0374, Accuracy: 0.3529


CCR:

In [8]:
dataset_path = "C:/Users/julie/Documents/Unif/Mémoire/CCR_recognition_dataset"
model_file = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/ChimpRec/Code/recognition/FaceNet/facenet_triplet_loss_5_layers_fc.pth"

# Définition des transformations pour les images
transform = transforms.Compose([
    transforms.Resize((160, 160)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_classes = len(os.listdir(dataset_path + "/train"))
facenet = InceptionResnetV1(pretrained='vggface2', classify=False)
facenet.fc = nn.Linear(512, num_classes)  # Ajout d'une couche fully connected pour la classification
for param in facenet.fc.parameters():
    param.requires_grad = True
facenet = facenet.to(device)
for param in facenet.parameters():
    param.requires_grad = False
for layer in list(facenet.children())[-10:]:
    for param in layer.parameters():
        param.requires_grad = True

train_dataset = TripletDataset(f"{dataset_path}/train", transform=transform, has_faces_label=False)
val_dataset = TripletDataset(f"{dataset_path}/val", transform=transform, has_faces_label=False)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=0, pin_memory=True)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, facenet.parameters()), lr=0.001)

train_classifier(facenet, train_loader, val_loader, criterion, optimizer, device, model_file, num_epochs=1, patience=10)



Epoch 1/1
-----------------------
Train Loss: 5.7227, Accuracy: 0.5874
Val Loss: 6.1138, Accuracy: 0.0996
