In [1]:
# Splitting the data stored in the data folder to train test and validation data

import os
import shutil
import random

# Path to the directory where the original dataset is stored
data_dir = "data"

# Removing empty folders from each phase of the dataset
for phase in os.listdir(data_dir):
    for video in os.listdir(os.path.join(data_dir,phase)):
        if len(os.listdir(os.path.join(data_dir,phase,video))) == 0:
            os.rmdir(os.path.join(data_dir,phase,video))

In [2]:
# Phases of the dataset
phases = os.listdir(data_dir)

# Creating train test and validation folders
os.mkdir(f"{data_dir}/train")
os.mkdir(f"{data_dir}/test")
os.mkdir(f"{data_dir}/validation")

In [7]:
# Dividing the data into train test and validation

import os
import shutil
import random

train_ratio = 0.8
test_ratio = 0.1
validation_ratio = 0.1
data_dir = "data"

phases = []
for phase in os.listdir(data_dir):
    if phase not in ["train","test","validation"]:
        phases.append(phase)

Phase: CalotTriangleDissection Train: 64 Test: 8 Validation: 8
Phase: GallbladderDissection Train: 64 Test: 8 Validation: 8
Phase: CleaningCoagulation Train: 59 Test: 7 Validation: 8
Phase: GallbladderRetraction Train: 64 Test: 8 Validation: 8
Phase: ClippingCutting Train: 64 Test: 8 Validation: 8
Phase: Preparation Train: 56 Test: 7 Validation: 8
Phase: GallbladderPackaging Train: 64 Test: 8 Validation: 8


In [7]:
# Get the video with minimum number of frames in each phase of the dataset 
# and print the phase the video belongs to and the number of frames in the video
for phase in phases:
    min_frames = 10000000
    min_video = ""
    for video in os.listdir(os.path.join(data_dir,phase)):
        if len(os.listdir(os.path.join(data_dir,phase,video))) < min_frames:
            min_frames = len(os.listdir(os.path.join(data_dir,phase,video)))
            min_video = video
    print(f"Phase: {phase} Video: {min_video} Frames: {min_frames}")

Phase: CalotTriangleDissection Video: video78 Frames: 288
Phase: GallbladderDissection Video: video60 Frames: 106
Phase: CleaningCoagulation Video: video80 Frames: 22
Phase: GallbladderRetraction Video: video31 Frames: 4
Phase: ClippingCutting Video: video50 Frames: 35
Phase: Preparation Video: video62 Frames: 20
Phase: GallbladderPackaging Video: video04 Frames: 38


In [8]:

print(phases)

['CalotTriangleDissection', 'GallbladderDissection', 'CleaningCoagulation', 'GallbladderRetraction', 'ClippingCutting', 'Preparation', 'GallbladderPackaging']


In [16]:
# Copy the videos to the train, test and validation folders
# Eg: data/preparation/video001 -> data/train/preparation/video001

import tqdm 

for phase in phases:
    videos = os.listdir(os.path.join(data_dir,phase))
    random.shuffle(videos)
    # Splitting the videos into train, test and validation without overlapping
    train_videos = videos[:int(train_ratio*len(videos))]
    test_videos = videos[int(train_ratio*len(videos)):int((train_ratio+test_ratio)*len(videos))]
    validation_videos = videos[int((train_ratio+test_ratio)*len(videos)):]
    # Check overlap between train, test and validation
    assert len(set(train_videos).intersection(set(test_videos))) == 0
    assert len(set(train_videos).intersection(set(validation_videos))) == 0
    assert len(set(test_videos).intersection(set(validation_videos))) == 0

    print(f"Phase: {phase} Train: {len(train_videos)} Test: {len(test_videos)} Validation: {len(validation_videos)}")

    for video in tqdm.tqdm(train_videos):
        shutil.copytree(os.path.join(data_dir,phase,video),os.path.join(data_dir,"train",phase,video))

    for video in tqdm.tqdm(test_videos):
        shutil.copytree(os.path.join(data_dir,phase,video),os.path.join(data_dir,"test",phase,video))

    for video in tqdm.tqdm(validation_videos):
        shutil.copytree(os.path.join(data_dir,phase,video),os.path.join(data_dir,"validation",phase,video))

    print(f"{phase} phase completed")

Phase: CalotTriangleDissection Train: 64 Test: 8 Validation: 8


100%|██████████| 64/64 [02:26<00:00,  2.28s/it]
100%|██████████| 8/8 [00:47<00:00,  5.95s/it]
100%|██████████| 8/8 [00:24<00:00,  3.06s/it]


CalotTriangleDissection phase completed
Phase: GallbladderDissection Train: 64 Test: 8 Validation: 8


100%|██████████| 64/64 [04:05<00:00,  3.84s/it]
100%|██████████| 8/8 [00:26<00:00,  3.35s/it]
100%|██████████| 8/8 [00:37<00:00,  4.68s/it]


GallbladderDissection phase completed
Phase: CleaningCoagulation Train: 59 Test: 7 Validation: 8


100%|██████████| 59/59 [01:10<00:00,  1.19s/it]
100%|██████████| 7/7 [00:10<00:00,  1.44s/it]
100%|██████████| 8/8 [00:08<00:00,  1.07s/it]


CleaningCoagulation phase completed
Phase: GallbladderRetraction Train: 64 Test: 8 Validation: 8


100%|██████████| 64/64 [00:25<00:00,  2.53it/s]
100%|██████████| 8/8 [00:04<00:00,  1.82it/s]
100%|██████████| 8/8 [00:02<00:00,  3.55it/s]


GallbladderRetraction phase completed
Phase: ClippingCutting Train: 64 Test: 8 Validation: 8


100%|██████████| 64/64 [01:20<00:00,  1.26s/it]
100%|██████████| 8/8 [00:09<00:00,  1.25s/it]
100%|██████████| 8/8 [00:07<00:00,  1.12it/s]


ClippingCutting phase completed
Phase: Preparation Train: 56 Test: 7 Validation: 8


100%|██████████| 56/56 [00:35<00:00,  1.56it/s]
100%|██████████| 7/7 [00:04<00:00,  1.72it/s]
100%|██████████| 8/8 [00:08<00:00,  1.05s/it]


Preparation phase completed
Phase: GallbladderPackaging Train: 64 Test: 8 Validation: 8


100%|██████████| 64/64 [00:33<00:00,  1.90it/s]
100%|██████████| 8/8 [00:04<00:00,  1.74it/s]
100%|██████████| 8/8 [00:04<00:00,  1.88it/s]

GallbladderPackaging phase completed



