# **Libraries**

In [None]:
import os

### Making csv files for each split
This code is used the first time to create the train, val and test splits csv files

In [None]:
def get_split_videos(split, light):
    """
    Gets a list of unique video names for the specified split (train or test) and light condition.

    Parameters:
    split (str): The split for which the video names are to be retrieved. Can be either 'train' or 'test'.
    light (str): The light condition, which will be used to construct the path to the video files.

    Returns:
    clases (list): A list containing unique video names corresponding to the specified split and light condition.

    Notes:
    - The function assumes that the video files are organized in specific directories based on the split and light conditions.
    - The 'light' parameter should be a string indicating the light condition (e.g., 'NBI', 'WLI', etc.).
    - The 'split' parameter should be a string indicating the split (either 'train' or 'test').
    - The function retrieves the video names from the specified directory and returns a list containing those names.
    - The resulting list of video names is sorted based on class ('ade', 'hyp', 'ser') to group videos of the same class.
    """
    clases = []
    
    if split == 'test':
        path = '../../../data/binary/test_' + light + '/'
    else:
        path = '../../../data/binary/train_' + light + '/'

    videos = set()
    for file in os.listdir(path):
        clase = file.split('_')[0]
        video = file.split('_')[-3]
        name = clase + '_' + light + '/video_' + video
        videos.add(name)

    videos = list(videos)

    # Sort and group the video names based on class ('ade', 'hyp', 'ser')
    check = 'a'
    res_ade = [video for video in videos if video[0].lower() == check.lower()]
    check = 'h'
    res_hyp = [video for video in videos if video[0].lower() == check.lower()]
    check = 's'
    res_ser = [video for video in videos if video[0].lower() == check.lower()]

    clases.extend(res_ade)
    clases.extend(res_hyp)
    clases.extend(res_ser)

    return clases


In [None]:
gen_path = '../../../../../data/polyp_original/' + light + '/'
random.seed(14)

train_videos = get_split_videos(split= 'train', light=light)
test_videos = get_split_videos(split= 'test', light=light)

check = 'a'
train_ade = [video for video in train_videos if video[0].lower() == check.lower()]
check = 'h'
train_hyp = [video for video in train_videos if video[0].lower() == check.lower()]
check = 's'
train_ser = [video for video in train_videos if video[0].lower() == check.lower()]

train_ade_rate, train_hyp_rate, train_ser_rate = int(len(train_ade)*0.8), int(len(train_hyp)*0.8), int(len(train_ser)*0.8)
train_ade2 = random.sample(train_ade, train_ade_rate)
train_hyp2 = random.sample(train_hyp, train_hyp_rate)
train_ser2 = random.sample(train_ser, train_ser_rate)

val_ade = list(set(train_ade) - set(train_ade2))
val_hyp = list(set(train_hyp) - set(train_hyp2))
    
if task == 'multiclass':    
    train = [train_ade2, train_hyp2, train_ser2] 
    val_ser = list(set(train_ser) - set(train_ser2))
    val = [val_ade, val_hyp, val_ser]

else:
    train = [train_ade2, train_hyp2] 
    val = [val_ade, val_hyp]
    check = 's'
    test_ser = [video for video in test_videos if video[0].lower() == check.lower()]
    test_videos = list(set(test_videos) - set(test_ser))
    
    
train_videos = []
val_videos = []
for element in train:
    train_videos.extend(element)

for element in val:
    val_videos.extend(element)

print("for task: ", task)
print("amount of train: ", len(train_videos))
print("amount of val: ", len(val_videos))
print("amount of test: ", len(test_videos))

In [None]:
clases = [train_videos, val_videos, test_videos]

for i, tipo in enumerate(clases):
    if i == 0:
        csvfile = open('train'+light+'.csv', '+w')        
    elif i == 1:
        csvfile = open('val'+light+'.csv', '+w')    
    else:
        csvfile = open('test'+light+'.csv', '+w')        
    for video in tipo:
        name = video.split('/')[0]
        current_name = name.split('_')[0]
        
        if (task == 'binary') and (current_name=='serrated'):
            None
        else:
            col_name = ',' + current_name + '\n'
            video_path = gen_path + video + '/'
            images = os.listdir(video_path)
            for image in images:
                img_path = video_path + image
                csvfile.write(img_path+col_name)        
    csvfile.close()
            
print("Finished!") 