In [1]:
import os
import pandas as pd
import json
import cv2
from tqdm import tqdm
import random
import math

In [2]:
bboxes_path = "/home/theo/Documents/Unif/Master/ChimpRec/ChimpRec-Dataset/chimpbehave/bboxes"
videos_path = "/home/theo/Documents/Unif/Master/ChimpRec/ChimpRec-Dataset/chimpbehave/original"

dataset_path = "/home/theo/Documents/Unif/Master/ChimpRec/ChimpRec-Dataset/Chimpanzee_detection_dataset" # output dataset

video_dim = (1080, 1920) # height, width

In [3]:
# @input: n: size of the sample
# proportion: (proportion*100)% of the numbers in [0, n-1]
# @outputs
# No numbers in common between the outputs
def segment_dataset(n, proportion):
    numbers = list(range(n))
    random.shuffle(numbers)

    return numbers[:math.ceil(n*proportion)]

In [4]:
# @input:
# path: path of the json file to be read
# @output:
# frame_annotations: a list of lists: [[class x1 y1 x2 y2 confidence], ...] corresponding to the frames of the video
def read_json(path):

    try:
        with open(path, 'r') as file:
            data = json.load(file)
    except: 
        print(f"file: <{path}> not found")
        return "none"

    frame_annotations = []

    for block in data:
        annotations = block["track_bboxes"][0][0]
        frame_annotations.append(annotations)

    return frame_annotations

In [5]:
# @input:
# type: "train", "val" or "test" --> indicates which part of the dataset it is
# sample_id: name of the video without the extension
# idxs: indexes of the images to keep
# @output:
# nothing, the image are saved in the output folder
def extract_images_from_video(type, sample_id, idxs):

    output_folder = f"{dataset_path}/images/{type}"
    video = cv2.VideoCapture(f"{videos_path}/{sample_id}.mp4")

    idxs = sorted(idxs)

    for frame_count in idxs:
        # Set the video to the specific frame
        video.set(cv2.CAP_PROP_POS_FRAMES, frame_count)

        success, frame = video.read()
        
        # If frame was successfully read, save it
        if success:
            frame_filename = f"{output_folder}/{sample_id}_{frame_count}.jpg"
            cv2.imwrite(frame_filename, frame)
        else:
            print(f"Could not read frame {frame_count} in video {sample_id}")

    # Release the video capture object
    video.release()

In [6]:
# @inputs:
# x1: top    left  corner of the box (x coordinate)
# y1: top    left  corner of the box (y coordinate)
# x2: bottom right corner of the box (x coordinate)
# y2: bottom right corner of the box (y coordinate)
# @outputs:
# cx: relative coordinate of the center of the image (x coordinate) 
# cy: relative coordinate of the center of the image (y coordinate) 
# h : height of the bbox
# w : width of the bbox
def convert_coordinates_to_yolo_format(x1, y1, x2, y2):
    H, W = video_dim

    cx = ((x1 + x2) / 2) / W
    cy = ((y1 + y2) / 2) / H

    h = (y2 - y1) / H
    w = (x2 - x1) / W
    
    return cx, cy, w, h

In [7]:
# @inputs:
# type: "train", "val" or "test" --> indicates which part of the dataset it is
# annotations: data extracted from the json file
# filename: name of the file to be saved
# idxs: indexes of the images to keep
# img_dim: dimensions of the images in the video
def extract_data(type, annotations, filename, idxs):
    output_folder = f"{dataset_path}/labels/{type}"
    for idx in idxs:
        clss, x1, y1, x2, y2, confidence = annotations[idx]
        cx, cy, w, h = convert_coordinates_to_yolo_format(x1, y1, x2, y2)
        with open(f"{output_folder}/{filename}_{idx}.txt", 'w') as file:
            file.write(f"{clss} {cx} {cy} {w} {h}")
        file.close()


In [8]:
csv_path = "/home/theo/Documents/Unif/Master/ChimpRec/ChimpRec-Dataset/chimpbehave/labels.csv"
labels = pd.read_csv(csv_path)

# in the labels.csv file, the action performed by the chimpanzee is indicated. Here is the correspondence:
# 0 : sitting
# 1 : standing
# 2 : walking
# 3 : hanging
# 4 : climbing_up
# 5 : climbing_down 
# 6 : running
# some actions are more static than others. 
# Static actions provide more redundant information in videos
# This is the reason why we want to keep a diffenrent proportion
# according to the actions performed by the chimps.
# These proportions are defined below:

prop_wrt_action = {
    0:         0.001, # 0.1%
    1:         0.005, # 0.5%
    2:         0.4,  # 40%
    3:         0.6,  # 60%
    4:         1,  # 100%
    5:         1,   # 100%
    6:         1,   # 100%
    "default": 0.1    # 10%
}

In [9]:
filenames = os.listdir("/home/theo/Documents/Unif/Master/ChimpRec/ChimpRec-Dataset/chimpbehave/bboxes")
n_files = len(filenames)

proportion_kept = 0.8

train_prop = 0.85*proportion_kept
val_prop = (1-train_prop)*proportion_kept

numbers = list(range(n_files))
random.shuffle(numbers)

train_video_idxs = numbers[:math.ceil(train_prop*n_files)]
val_video_idxs = numbers[math.ceil(train_prop*n_files):]

def process_dataset(video_idxs, type):
    # progression bar added
    for video_idx in tqdm(video_idxs, desc=f"Processing videos: {type}", colour="green"):
        filename = filenames[video_idx]
        sample_id = filename.strip("_bboxes.json")

        # type of movement performed by the chimp
        try: class_id = int(labels.loc[labels["new_filename_with_ext"] == f"{sample_id}.mp4"]["class_id"])
        except: 
            class_id = "default" # the correspondence between the file names is wrong, default class
            print("default class")

        proportion = prop_wrt_action[class_id]

        try :
            json_path = f'{bboxes_path}/{sample_id}_bboxes.json'
            annotations = read_json(json_path)

            idxs = segment_dataset(len(annotations), proportion*0.1)

            extract_data(type, annotations, sample_id, idxs)
            extract_images_from_video(type, sample_id, idxs)
        except:
            print(f"sample <{sample_id}> couldn't be treated.")

In [10]:
process_dataset(train_video_idxs, "train")
process_dataset(val_video_idxs, "val")

  try: class_id = int(labels.loc[labels["new_filename_with_ext"] == f"{sample_id}.mp4"]["class_id"])
Processing videos: train:  32%|[32m███▏      [0m| 279/869 [00:40<01:46,  5.54it/s]

default class


Processing videos: train:  67%|[32m██████▋   [0m| 578/869 [01:23<00:31,  9.29it/s]

default class


Processing videos: train:  74%|[32m███████▎  [0m| 639/869 [01:34<00:23,  9.94it/s]

default class
file: </home/theo/Documents/Unif/Master/ChimpRec/ChimpRec-Dataset/chimpbehave/bboxes/ipynb_checkpoint_bboxes.json> not found
sample <ipynb_checkpoint> couldn't be treated.


Processing videos: train: 100%|[32m██████████[0m| 869/869 [02:09<00:00,  6.73it/s]
Processing videos: val: 100%|[32m██████████[0m| 153/153 [00:19<00:00,  7.86it/s]
