In [12]:
import pandas as pd
import cv2
import os
import random
import math
from tqdm import tqdm

In [13]:
video_path = "/home/theo/Documents/Unif/Master/ChimpRec/ChimpRec-Dataset/CCR/videos"
annotations_path = "/home/theo/Documents/Unif/Master/ChimpRec/ChimpRec-Dataset/CCR/metadata/annotations/body_data.csv"
dataset_path = "/home/theo/Documents/Unif/Master/ChimpRec/ChimpRec-Dataset/Chimpanzee_detection_dataset"

In [14]:
df = pd.read_csv(annotations_path)
print(df.loc[df["video"] == "17.mp4"])

         year   video  frame  track         x         y         w         h  \
1894695  2013  17.mp4      1      1  0.275000  0.303125  0.666667  0.461111   
1894696  2013  17.mp4      2      1  0.279167  0.303125  0.662500  0.461111   
1894697  2013  17.mp4      3      1  0.280208  0.302083  0.665625  0.462963   
1894698  2013  17.mp4      4      1  0.284375  0.303125  0.660417  0.461111   
1894699  2013  17.mp4      5      1  0.280208  0.303125  0.666667  0.461111   
...       ...     ...    ...    ...       ...       ...       ...       ...   
1910112  2013  17.mp4  11036    272  0.276042  0.109375  0.354167  0.748148   
1910113  2013  17.mp4  11037    272  0.275000  0.110417  0.353125  0.738889   
1910114  2013  17.mp4  11038    272  0.272917  0.110417  0.355208  0.744444   
1910115  2013  17.mp4  11039    272  0.273958  0.109375  0.352083  0.737037   
1910116  2013  17.mp4  11040    272  0.271875  0.111458  0.352083  0.733333   

            label  
1894695  NOTCHIMP  
1894696  NO

In [15]:
# proportion_kept = 0.002 # approximately 1500 images
proportion_kept = 0.0007

In [16]:
# @input: n: size of the sample
# proportion: (proportion*100)% of the numbers in [0, n-1]
# @outputs
# No numbers in common between the outputs
def segment_dataset(n, proportion):
    # Calculate the number of elements to select based on the proportion
    num_elements = int(n * proportion)
    if num_elements == 0: return []
    
    # Generate evenly spaced numbers within the range [0, n-1]
    step = n / num_elements
    selected_numbers = [int(i * step) for i in range(num_elements)]
    
    return selected_numbers

In [17]:
def get_len_video(path_to_video):
    video = cv2.VideoCapture(f"{video_path}/{path_to_video}")

    if not video.isOpened():
        print(f"Could not open video: {path_to_video}")
        return 0
    
    length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    video.release()
    return length

In [18]:
def preprocess_coordinates(coord, img_width, img_height):

    x, y, w, h = coord
    
    x1 = int(x*img_width)
    y1 = int(y*img_width)
    x2 = int(x*img_width+w*img_width)
    y2 = int((y*img_width)+h*img_height)

    cx = (x1 + x2) / 2
    cy = (y1 + y2) / 2

    width = x2 - x1
    height = y2 - y1

    cx_norm = cx / img_width
    cy_norm = cy / img_height

    width_norm = width / img_width
    height_norm = height / img_height

    return [cx_norm, cy_norm, width_norm, height_norm]

In [19]:
def extract_metadata(video_type, video_id, frame_number, img_dim):
    img_W, img_H = img_dim
    sample_id = video_id.replace(".mp4", "")
    sub_df = df.loc[(df["video"] == video_id) & (df["frame"] == frame_number)]
    output_file = f"{dataset_path}/labels/{video_type}/{sample_id}_{frame_number}.txt"

    string_output = ""

    for index, row in sub_df.iterrows():
        x, y, w, h = row['x'], row['y'], row['w'], row['h']
        cx, cy, W, H = preprocess_coordinates((x, y, w, h), img_W, img_H)
        string_output += f"\n0 {cx} {cy} {W} {H}"

    with open(output_file, "w") as file:
        file.write(string_output.strip())
    file.close()
    
    


In [20]:
# @input:
# type: "train", "val" or "test" --> indicates which part of the dataset it is
# sample_id: name of the video with the extension
# idxs: indexes of the images to keep
# @output:
# nothing, the image are saved in the output folder
def extract_data(type, video_id):

    sample_id = video_id[:-4]

    output_folder = f"{dataset_path}/images/{type}"
    video = cv2.VideoCapture(f"{video_path}/{video_id}")
    len_video = get_len_video(video_id)

    idxs = segment_dataset(len_video, proportion_kept)
    idxs = sorted(idxs)

    for frame_count in idxs:
        df_ = df.loc[df["video"] == video_id]
        df_ = df_.loc[df_["frame"] == frame_count] # .loc[df["label"] != "NOTCHIMP"]

        # Set the video to the specific frame
        video.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
        success, frame = video.read()

        if success and df_.size != 0:
            frame_filename = f"{output_folder}/{sample_id}_{frame_count}.jpg"
            cv2.imwrite(frame_filename, frame)
            
            # extract the annotations
            extract_metadata(type, video_id, frame_count, (frame.shape[1], frame.shape[0]))
        # else:
        #     print(f"Could not read frame {frame_count} in video {video_id}")
        #     print(df_)

    # Release the video capture object
    video.release()

In [21]:
filenames = os.listdir(video_path)
n_files = len(filenames)

train_prop = 0.7

numbers = list(range(n_files))
random.shuffle(numbers)

train_video_idxs = numbers[:math.ceil(train_prop*n_files)]
val_video_idxs = numbers[math.ceil(train_prop*n_files):]

def process_dataset(video_idxs, type):
    # progression bar added
    for video_idx in tqdm(video_idxs, desc=f"Processing videos: {type}", colour="green"):
        video_id = filenames[video_idx]
        # try:
        #     extract_data(type, video_id)
        # except:
        #     print(f"Error: {video_id}")
        extract_data(type, video_id)
        

        

In [None]:
process_dataset(train_video_idxs, "train")
process_dataset(val_video_idxs, "val")

Processing videos: train:  21%|[32m██        [0m| 4/19 [00:23<01:20,  5.38s/it]