Création d'un dataset adapté pour utiliser le model de reconnaissance sur le dataset CCR:

In [74]:
import pandas as pd
import cv2
import os
import random
import math
from tqdm import tqdm
import numpy as np
import random
import matplotlib.pyplot as plt
from PIL import Image

In [75]:
videos_path = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/ChimpRec/ChimpRec-Dataset/CCR/data/videos"
annotations_path = "C:/Users/julie/OneDrive - UCL/Master_2/Mémoire/ChimpRec/ChimpRec-Dataset/CCR/annotations/face_data.csv"
dataset_path = "C:/Users/julie/Documents/Unif/Mémoire/CCR_recognition_dataset"

In [76]:
def get_video_resolution(video_path):
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cap.release()
    return width, height


In [77]:
df = pd.read_csv(annotations_path)

individuals_name = list(df['label'].unique())
for name in ['NEGATIVE', 'VELU', 'PAMA', 'YO']:
    if name in individuals_name:
        individuals_name.remove(name)

videos_resolution = {}
for video in list(df['video'].unique()): 
    year = 0
    if video in os.listdir(os.path.join(videos_path, "2012")): 
        year = 2012
    else: 
        year = 2013
    videos_resolution[video] = get_video_resolution(os.path.join(videos_path, str(year), video))

data_split = {}

for individual in individuals_name:
    print(individual)
    videos = list(df[df['label'] == individual]['video'].unique())
    random.shuffle(videos)

    # Répartition 70% train, 15% validation, 15% test
    train_videos = videos[:int(0.7 * len(videos))]
    val_videos = videos[int(0.7 * len(videos)):int(0.85 * len(videos))]
    test_videos = videos[int(0.85 * len(videos)):]

    # Filtrer les crops trop petits
    df_individual = df[df['label'] == individual].copy()

    #Ne garder que les crop assez grands
    valid_rows = []
    for _, row in df_individual.iterrows():
        width, height = videos_resolution[row["video"]]
        crop_width = row['w'] * width
        crop_height = row['h'] * height

        if crop_width > 100 and crop_height > 100:
            valid_rows.append(row)

    df_filtered = pd.DataFrame(valid_rows)

    train_subset = df_filtered[df_filtered['video'].isin(train_videos)]
    
    #Si y a pas assez d'image dans le test set ou le validation set, on va en prendre dans le train set
    val_subset = df_filtered[df_filtered['video'].isin(val_videos)]
    if len(val_subset) < 250: 
        extra_val_images = train_subset.sample(n=250-len(val_subset), random_state=42, replace=False)
        val_subset = pd.concat([val_subset, extra_val_images])
        train_subset = train_subset.drop(extra_val_images.index) 

    test_subset = df_filtered[df_filtered['video'].isin(test_videos)]
    if len(test_subset) < 250: 
        extra_test_images = train_subset.sample(n=250-len(test_subset), random_state=42, replace=False)
        test_subset = pd.concat([test_subset, extra_test_images])
        train_subset = train_subset.drop(extra_test_images.index) 

    # Vérification pour éviter l'erreur
    train_frames = train_subset.sample(n=1000, random_state=42, replace=False)
    val_frames = val_subset.sample(n=250, random_state=42, replace=False)
    test_frames = test_subset.sample(n=250, random_state=42, replace=False)

    # Stocker les résultats
    data_split[individual] = {
        "train": train_frames,
        "validation": val_frames,
        "test": test_frames
    }


JEJE
PELEY
FANLE
TUA
FANWA
FANA
JIRE
JOYA
FLANLE
FOAF


In [None]:
# Fonction pour extraire et sauvegarder les images
def save_cropped_faces(data, split):
    for individual, frames in data.items():
        split_path = os.path.join(dataset_path, split)

        if split in ["train", "val"]:
            individual_path = os.path.join(split_path, individual)
            os.makedirs(individual_path, exist_ok=True)
        else:
            individual_path = split_path

        id  = 0

        for idx, row in frames.iterrows():
            video_path = os.path.join(videos_path, str(row["year"]), row["video"])
            cap = cv2.VideoCapture(video_path)

            if not cap.isOpened():
                print(f"+Erreur ouverture vidéo : {row['video']}")
                continue

            cap.set(cv2.CAP_PROP_POS_FRAMES, row["frame"])
            ret, frame = cap.read()

            if not ret or frame is None:
                print(f"Impossible de lire la frame {row['frame']} de {row['video']}")
                cap.release()
                continue

            cap.release()

            h, w, _ = frame.shape
            x1 = int(row["x"] * w)
            y1 = int(row["y"] * h)
            x2 = x1 + int(row["w"] * w)
            y2 = y1 + int(row["h"] * h)

            # Correction des bords
            x1, y1 = max(0, x1), max(0, y1)
            x2, y2 = min(w, x2), min(h, y2)

            face_crop = frame[y1:y2, x1:x2]

            # Vérification si le crop est vide
            if face_crop is None or face_crop.size == 0:
                print(f"face_crop vide pour {row['video']} frame {row['frame']}")
                continue

            face_crop = face_crop.astype(np.uint8)  # Correction de type
            """plt.imshow(cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB))  # Convertir BGR → RGB
            plt.axis("off")  # Cacher les axes
            plt.show()"""

            filename = f"{individual}_{id}.jpg"
            id += 1

            filepath = os.path.join(individual_path, filename)
            image_pil = Image.fromarray(np.uint8(face_crop))
            image_pil.save(filepath)

In [79]:
# Créer les dossiers si inexistants
for split in ["train", "val", "test"]:
    split_path = os.path.join(dataset_path, split)
    os.makedirs(split_path, exist_ok=True)

# Sauvegarde des images
save_cropped_faces({k: v["train"] for k, v in data_split.items()}, "train")
save_cropped_faces({k: v["validation"] for k, v in data_split.items()}, "val")
save_cropped_faces({k: v["test"] for k, v in data_split.items()}, "test")
