In [None]:
import sys
sys.path.append('C:/Users/Theo/Documents/Unif/ChimpRec/Code')

from chimplib.imports import pd, cv2, os, random, math, tqdm

In [None]:
annotations_path = "..." # path to labels
videos_path = "..." #path to videos

dataset_path = "..." # output dataset path


df = pd.read_csv(annotations_path)

In [None]:
# proportion of the whole dataset that is kept (approximately 1500 images)
proportion_kept = 0.0015

In [16]:
# @input: n: size of the sample
# proportion: (proportion*100)% of the numbers in [0, n-1]
# @outputs
# No numbers in common between the outputs
def segment_dataset(n, proportion):
    # Calculate the number of elements to select based on the proportion
    num_elements = int(n * proportion)
    if num_elements == 0: return []
    
    # Generate evenly spaced numbers within the range [0, n-1]
    step = n / num_elements
    selected_numbers = [int(i * step) for i in range(num_elements)]
    
    return selected_numbers

In [None]:
# @input: path to a video
# @outputs
# Length of the video in terms of frames
def get_len_video(path_to_video):
    video = cv2.VideoCapture(f"{videos_path}/{path_to_video}")

    if not video.isOpened():
        print(f"Could not open video: {path_to_video}")
        return 0
    
    length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    video.release()
    return length

In [None]:
# @input: 
# coord: the coordinate to be converted
# img_width: width of the image (in pixels)
# img_height: height of the image (in pixels)
# @outputs
# coordinates expressed in YOLO format
def preprocess_coordinates(coord, img_width, img_height):

    x, y, w, h = coord
    
    x1 = int(x*img_width)
    y1 = int(y*img_width)
    x2 = int(x*img_width+w*img_width)
    y2 = int((y*img_width)+h*img_height)

    cx = (x1 + x2) / 2
    cy = (y1 + y2) / 2

    width = x2 - x1
    height = y2 - y1

    cx_norm = cx / img_width
    cy_norm = cy / img_height

    width_norm = width / img_width
    height_norm = height / img_height

    return [cx_norm, cy_norm, width_norm, height_norm]

In [None]:
# extracts the annotation data
def extract_metadata(video_type, video_id, frame_number, img_dim):
    img_W, img_H = img_dim
    sample_id = video_id.replace(".mp4", "")
    sub_df = df.loc[(df["video"] == video_id) & (df["frame"] == frame_number)]
    output_file = f"{dataset_path}/labels/{video_type}/{sample_id}_{frame_number}.txt"

    string_output = ""

    for index, row in sub_df.iterrows():
        x, y, w, h = row['x'], row['y'], row['w'], row['h']
        cx, cy, W, H = preprocess_coordinates((x, y, w, h), img_W, img_H)
        string_output += f"\n0 {cx} {cy} {W} {H}"

    with open(output_file, "w") as file:
        file.write(string_output.strip())
    file.close()   


In [None]:
# @input:
# type: "train", "val" or "test" --> indicates which part of the dataset it is
# sample_id: name of the video with the extension
# idxs: indexes of the images to keep
# @output:
# nothing, the image are saved in the output folder
def extract_data(type, video_id):

    sample_id = video_id[:-4]

    output_folder = f"{dataset_path}/images/{type}"
    video = cv2.VideoCapture(f"{videos_path}/{video_id}")
    len_video = get_len_video(video_id)

    idxs = segment_dataset(len_video, proportion_kept)
    idxs = sorted(idxs)

    for frame_count in idxs:
        df_ = df.loc[df["video"] == video_id]
        df_ = df_.loc[df_["frame"] == frame_count] # .loc[df["label"] != "NOTCHIMP"]

        # Set the video to the specific frame
        video.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
        success, frame = video.read()

        if success and df_.size != 0:
            frame_filename = f"{output_folder}/{sample_id}_{frame_count}.jpg"
            cv2.imwrite(frame_filename, frame)
            
            # extract the annotations
            extract_metadata(type, video_id, frame_count, (frame.shape[1], frame.shape[0]))

    # Release the video capture object
    video.release()

In [None]:
filenames = os.listdir(videos_path)
n_files = len(filenames)

# training proportion
train_prop = 0.85

numbers = list(range(n_files))
random.shuffle(numbers)

train_video_idxs = numbers[:math.ceil(train_prop*n_files)]
val_video_idxs = numbers[math.ceil(train_prop*n_files):]

def process_dataset(video_idxs, type):
    # progression bar added
    for video_idx in tqdm(video_idxs, desc=f"Processing videos: {type}", colour="green"):
        video_id = filenames[video_idx]
        extract_data(type, video_id)    

In [None]:
process_dataset(train_video_idxs, "train")
process_dataset(val_video_idxs, "val")