# Prepare MKR splits

In [2]:
# Set up imports
import os
import cv2
import sys
import glob
import json
import random
from tqdm import tqdm
from natsort import natsorted

# Imports from endo utils project
sys.path.append("../ext/endo_utils/data_utils/")
from io_utils import write_list_to_text_file,\
                                check_and_create_folder, print_elements_of_list
from process_utils import get_sub_dirs

# ------- Pseudo code ------- #
# 1. Initialize paths:
# For folds equal to number of surgeries:
# Iterate over the surgeries,
#   Create fold name 
#   If surgery number == fold number
#   Assign to validation 
#   Else assign to training 
#   Write validation and test text files
# --------------------------- #

In [None]:
num_folds = 4
dataroot = "/home/lalith/data/13_adaptor_mkr_dataset/"
splits_root = "../splits/"
splits_name = "mkr_dataset"

op_dirs = get_sub_dirs(dataroot)

def get_images_in_surgery_folder(surgery_path):
    indices = []
    videos = get_sub_dirs(surgery_path)
    for video in videos:
        image_filepaths = natsorted(glob.glob(os.path.join(videos, "images", "*.png")))

        # Prepare text file information
        rel_path_name = os.path.join(os.path.basename(surgery_path), os.path.basename(video))  # Folder name
        frame_indices = [os.path.basename(os.path.splitext(path)[0]) for path in image_filepaths]
        
        newline_batch = [' '.join((rel_path_name, frame_index)) for frame_index in frame_indices]
        indices += newline_batch
    return indices


for fold in num_folds:
    for i, surgery in enumerate(op_dirs):
        train_indices, val_indices = [], []
        success = check_and_create_folder(os.path.join(splits_root, splits_name, "fold_"+str(fold+1)))  # splits/mkr_dataset/fold_1
        for i, session in enumerate(op_dirs):
            # If fold then validation else put into training list
            if i==fold:
                val_indices += get_images_in_surgery_folder(surgery_path=session)
            else: train_indices += get_images_in_surgery_folder(surgery_path=session)
        f_writepath = os.path.join(splits_root, splits_name, "fold_{}", "{}_files.txt")
        write_list_to_text_file(save_path = f_writepath.format(fold+1, "train"), 
                                text_list=train_indices,
                                verbose=False)
        write_list_to_text_file(save_path = f_writepath.format(fold+1, "val"), 
                                text_list=val_indices,
                                verbose=False)
        print("Fold {}: Extracted {} training files and {} validation files and wrote them to disk".format(str(fold+1),
                                                                                                           len(train_indices),
                                                                                                           len(val_indices)))