In [None]:
import sys 
#path to append: "./ChimpRec/Code"
sys.path.append(...)

from chimplib.imports import pd, cv2, os, np, Image
from chimplib.utils import yolo_to_pixel_coord, face_to_yolo_relative_to_body

In [None]:
#Path to CCR dataset video folder
videos_path = ...
#Path to CCR body annotations (body_data.csv)
bodies_annotations_path = ...
#Path to CCR face annotations (face_data.csv)
faces_annotations_path = ...
#Path to CCR frame annotations (frame_data.csv)
frames_annotations_path = ...
#Path to the folder where the dataset is to be created
output_dataset_path = ...

bodies_annotations = pd.read_csv(bodies_annotations_path)
faces_annotations = pd.read_csv(faces_annotations_path)
frames_annotations = pd.read_csv(frames_annotations_path)

In [None]:
# @input:
# video_path: path to the video file
# frame_number: frame number to extract
# @output:
# frame: the extracted frame in RGB format, or None if error
def get_frame(video_path, frame_number): 
    if not os.path.exists(video_path):
        print(f"Erreur : Impossible de trouver {video_path}")
    else:
        cap = cv2.VideoCapture(video_path)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        ret, frame = cap.read()
        cap.release()

        if not ret:
            print(f"Erreur : Impossible de lire la frame {frame_number} de {video_path}")
        else:
            return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        

# @input:
# annot_row: annotation from the CCR dataset (pd.Series or dict)
# box_type: type of object to crop ("face" or "body")
# @output:
# cropped_img: cropped image (RGB) of the object or None if frame missing
# bbox_pixels: pixel coordinates (x1, y1, x2, y2) of the cropped bbox or None if error
def get_cropped_object(annot_row, box_type):
    year = annot_row["year"]
    video = annot_row["video"]
    frame_number = annot_row["frame"]
    coord = annot_row["x"], annot_row["y"], annot_row["w"], annot_row["h"]

    video_path = f"{videos_path}/{year}/{video}"
    frame = get_frame(video_path, frame_number)
    if frame is None:
        print(f"Frame {frame_number} form {video_path} is missing.")
        return None, None

    height, width, _ = frame.shape
    #retrieves pixel coordinates and modifies if outside image boundaries
    x1, y1, x2, y2 = yolo_to_pixel_coord(coord, width, height, box_type=box_type)
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(width, x2), min(height, y2)

    #return cropped image and coordinates
    return frame[y1:y2, x1:x2], (x1, y1, x2, y2)

<h3>Creation of the dataset with 154 images per individual crop around the chimpanzees' bodies and the face bboxes adapted to the new image<h3>

In [None]:
#ignore annotations linked to NEGATIVE
filtered_labels = faces_annotations[faces_annotations["label"] != "NEGATIVE"]["label"].unique()

for name in filtered_labels:
    counter = 0

    #Keep only the annotations on the face which have a corresponding labelled body
    valid_faces = faces_annotations[faces_annotations["label"] == name].merge(
        bodies_annotations[['year', 'video', 'frame', 'label']],
        on=['year', 'video', 'frame', 'label'],
        how='inner'
    )

    # Keep 154 random annotations for each chimp (13 individuals so approximetely 2000 annotations kept)
    keeped_face_annotations = valid_faces.sample(n=154, random_state=42)

    for _, face in keeped_face_annotations.iterrows():
        # Find the corresponding body annotation
        matching_body = bodies_annotations[
            (bodies_annotations["year"] == face["year"]) &
            (bodies_annotations["video"] == face["video"]) &
            (bodies_annotations["frame"] == face["frame"]) & 
            (bodies_annotations["label"] == face["label"])
        ]

        if matching_body.empty:
            print(counter)
            continue

        
        cropped_body, body_coord = get_cropped_object(matching_body, "body")
        cropped_face, face_coord = get_cropped_object(face, "face")

        face_relative_to_body = face_to_yolo_relative_to_body(body_coord, face_coord)

        #save image
        image_pil = Image.fromarray(np.uint8(cropped_body))
        image_path = f"{output_dataset_path}images/train/{name}{counter}.png"
        image_pil.save(image_path)

        #save annotations
        label_path = f"{output_dataset_path}/labels/train/{name}{counter}.txt"
        with open(label_path, "w") as file:
            file.write("0 " + " ".join(map(str, face_relative_to_body)))

        counter += 1
