In [1]:
import os
import sys
import cv2
import shutil
import random
import pandas as pd
import numpy as np
from os.path import join
from tqdm import tqdm
import inspect

sys.path.append('../include')
from utils import get_frame_from_time, NameMapping, get_video_info

In [2]:
# Step 0: Load all necessary data
base_dir = 'C:\\Users\\Simon\\Data\\2024_SICS_Phase\\'
videos_folder_name = 'Videos 2'
videos = os.listdir(join(base_dir, videos_folder_name))
df = pd.read_csv(join(base_dir, '20240408_1529_cleaned_annotations.csv'))

print(f'Number of found videos: {len(videos)}')
print(f'Number of annotations: {len(df)}')

# Step 0.1: Create directories for the frames
if not os.path.exists(join(base_dir, 'groundTruth')):
    os.mkdir(join(base_dir, 'groundTruth'))
if not os.path.exists(join(base_dir, 'features')):
    os.mkdir(join(base_dir, 'features'))
if not os.path.exists(join(base_dir, 'videos')):
    os.mkdir(join(base_dir, 'videos'))
    
df.head()

Number of found videos: 72
Number of annotations: 62


Unnamed: 0,Video ID,Annotator,SR_bridle_suture_start,SR_bridle_suture_stop,peritomy_start,peritomy_stop,cautery_start,cautery_stop,scleral_groove_start,scleral_groove_stop,...,OVD_wash_start,OVD_wash_stop,stromal_hydration_start,stromal_hydration_stop,tunnel_suture_start,tunnel_suture_stop,conjunctival_cautery_start,conjunctival_cautery_stop,mapped_video_id,new_video_id
0,Video 5,Dr. SNP,,,0:00,0:23,0:29,1:14,1:19,1:26,...,12:15,13:55,14:02,14:32,,,15:08,15:37,Video 5.mp4,case_100.mp4
1,Video 7,Dr. SNP,,,,,,,0:00,0:32,...,13:38,14:21,14:30,15:01,,,15:15,15:58,Video 7.mp4,case_101.mp4
2,Video 8,Dr. SNP,,,,,0:12,0:26,0:36,0:47,...,11:56,12:36,12:43,13:21,,,13:33,14:00,Video 8.mp4,case_102.mp4
3,Video 9,Dr. SNP,,,0:00,0:23,,,0:31,0:48,...,14:39,16:07,16:28,17:23,,,20:06,21:05,Video 9.mp4,case_103.mp4
4,Video 10,Dr. SNP,,,0:00,0:04,,,0:10,0:13,...,7:46,8:19,8:24,9:18,,,9:28,9:45,Video 10.mp4,case_104.mp4


In [3]:
# Step 1: Generate class mappings

#df = df.rename(columns={'capsulorrhexix_stop': 'capsulorrhexis_stop'})

def generate_mapping(dataframe):
    mapping = {"background": 0}
    mapping_count = 1
    for col_name in dataframe.columns[2:-2]:
        #print(f'Class: {col_name}')
        class_name = "_".join(col_name.split('_')[:-1])
        if mapping.get(class_name) is None:
            mapping[class_name] = mapping_count
            mapping_count += 1
    return mapping
        
mapping = generate_mapping(df)
print(mapping)

# Step 1.1: Write class mapping to file
with open(join(base_dir, 'class_mapping.txt'), 'w') as f:
    for key, value in mapping.items():
        f.write(f'{value}: {key}\n')

{'background': 0, 'SR_bridle_suture': 1, 'peritomy': 2, 'cautery': 3, 'scleral_groove': 4, 'incision': 5, 'tunnel': 6, 'sideport': 7, 'AB_injection_and_wash': 8, 'OVD_injection': 9, 'capsulorrhexis': 10, 'main_incision_entry': 11, 'hydroprocedure': 12, 'nucleus_prolapse': 13, 'nucleus_delivery': 14, 'cortical_wash': 15, 'OVD_IOL_insertion': 16, 'OVD_wash': 17, 'stromal_hydration': 18, 'tunnel_suture': 19, 'conjunctival_cautery': 20}


In [4]:
#[print(elm + ',') for elm in mapping.keys()]

In [5]:
# Step 2: Generate frame-wise annotations and write them to file 
 
def process_annotation(row, base_dir):
    duration, vid_len, fps = get_video_info(join(base_dir, videos_folder_name, row['mapped_video_id']))
    print(f"Working on video {row['Video ID']} with {int(vid_len)} frames and {fps} fps...")
    
    annotation = np.zeros(int(vid_len))
    for i in range(2, len(row) - 2, 2):
        #print(row[row.index[i]])
        start_frame = get_frame_from_time(row.iloc[i], fps=30)
        end_frame = get_frame_from_time(row.iloc[i+1], fps=30)
        if start_frame == None or end_frame == None:
            continue
        
        cur_class = "_".join(row.index[i].split('_')[:-1])
        annotation[start_frame:end_frame] = int(mapping[cur_class])
        #print(row.iloc[i], row.iloc[i+1], mapping[cur_class], np.count_nonzero(annotation == 0)/30.0)
    return annotation


# Generate frame-wise annotaitons for each annoated video
print(f'####### Processing {len(df)} annotations #######')
bg_perc = []
for i, row in df.iterrows():
    anno = process_annotation(row, base_dir)
    #print(len(anno), np.count_nonzero(anno))
    #print(anno[500:1000])
    
    # sanity check
    class_dict = dict(zip(*np.unique(anno, return_counts=True)))
    inv_class = {v: k for k, v in mapping.items()}
    class_occurence = {inv_class[k]: v/30.0 for k, v in class_dict.items()}
    background_count, total_count = class_occurence["background"], sum(class_occurence.values())
    bg_perc.append(background_count / total_count)
    print(f'Percentage of background: {background_count / total_count}, {class_occurence}')
    
    #convert and write to file
    anno_list = [inv_class[elm] for elm in anno.tolist()]
    with open(join(base_dir, 'groundTruth', f'{os.path.splitext(row["new_video_id"])[0]}.txt'), 'w') as f:
        f.write("\n".join(anno_list))
        pass
    
print(f'Average background percentage: {np.mean(bg_perc)}')

####### Processing 62 annotations #######
Working on video Video 5 with 28043 frames and 29.17378281668799 fps...
Percentage of background: 0.21823627999857362, {'background': 204.0, 'peritomy': 23.0, 'cautery': 45.0, 'scleral_groove': 7.0, 'incision': 8.0, 'tunnel': 83.0, 'sideport': 12.0, 'AB_injection_and_wash': 35.0, 'OVD_injection': 9.0, 'capsulorrhexis': 95.0, 'main_incision_entry': 22.0, 'hydroprocedure': 9.0, 'nucleus_prolapse': 21.0, 'nucleus_delivery': 8.0, 'cortical_wash': 111.0, 'OVD_IOL_insertion': 86.0, 'OVD_wash': 100.0, 'stromal_hydration': 30.0, 'conjunctival_cautery': 26.766666666666666}
Working on video Video 7 with 30599 frames and 29.997873776148097 fps...
Percentage of background: 0.3431157880976502, {'background': 349.96666666666664, 'scleral_groove': 32.0, 'incision': 114.0, 'tunnel': 50.0, 'sideport': 10.0, 'AB_injection_and_wash': 16.0, 'OVD_injection': 5.0, 'capsulorrhexis': 85.0, 'main_incision_entry': 22.0, 'hydroprocedure': 28.0, 'nucleus_prolapse': 15.0, 

In [22]:
# Step 3: Rename files to unified format
# In some cases this should be executed first, also think about rescaling first / or after
# See downscale script in script folder

CHANGE_FILES = False # Set to True if you want to rename files

for i, row in tqdm(df.iterrows()):
    if not CHANGE_FILES: 
        break
    shutil.copy2(join(base_dir, videos_folder_name, row['mapped_video_id']), join(base_dir, 'videos', f'{row["new_video_id"]}'))

0it [00:00, ?it/s]


In [6]:
# Implement sanity check on extracted groud truth

gt_files = os.listdir(join(base_dir, 'groundTruth'))
selected_files = random.sample(gt_files, 5) # Randomly select 10 files

# Iterate over the selected files
for file in selected_files:
    print(f"Sanity check on gt file: {file}")
    
    # Read the file
    with open(join(base_dir, 'groundTruth', file), 'r') as f:
        lines = f.readlines()
        selected_lines = random.sample(list(enumerate(lines)), 10) # Randomly select 10 lines with line numbers
        
        # Print the selected lines with line numbers
        for line_number, line in selected_lines:
            print(f"Timestamp: {int((line_number + 1) / 30.0)}sec - {int((line_number + 1) / 30.0 // 60)}:{int((line_number + 1) / 30.0 % 60):02d}, class {line.strip()}")
    print()


Sanity check on gt file: case_133.txt
Timestamp: 421sec - 7:01, class conjunctival_cautery
Timestamp: 468sec - 7:48, class background
Timestamp: 251sec - 4:11, class OVD_IOL_insertion
Timestamp: 529sec - 8:49, class background
Timestamp: 404sec - 6:44, class conjunctival_cautery
Timestamp: 209sec - 3:29, class OVD_IOL_insertion
Timestamp: 289sec - 4:49, class background
Timestamp: 53sec - 0:53, class sideport
Timestamp: 73sec - 1:13, class capsulorrhexis
Timestamp: 516sec - 8:36, class background

Sanity check on gt file: case_106.txt
Timestamp: 133sec - 2:13, class incision
Timestamp: 776sec - 12:56, class cortical_wash
Timestamp: 630sec - 10:30, class background
Timestamp: 144sec - 2:24, class incision
Timestamp: 1020sec - 17:00, class OVD_IOL_insertion
Timestamp: 505sec - 8:25, class background
Timestamp: 509sec - 8:29, class hydroprocedure
Timestamp: 1252sec - 20:52, class conjunctival_cautery
Timestamp: 989sec - 16:29, class OVD_IOL_insertion
Timestamp: 411sec - 6:51, class backgr

In [7]:
# Incoperate additional remarks into ground truth files
remarks = pd.read_csv(join(base_dir, 'processed_remarks2.csv'))
remarks.head(50)

name_mapping = NameMapping(r'C:\Users\Simon\Data\2024_SICS_Phase\cleaned_annotations_v1.csv')

def parse_remark_cell(cell1, cell2, fps=30):
    cell_class = cell1.split(':')[0]
    cell_class = "_".join(cell_class.split('_')[:-1])
    #cell_class = mapping.get(cell_class)
    
    if mapping.get(cell_class) is None:
        #raise ValueError(f'Class {cell_class} not found in mapping')
        print(f'Class {cell_class} not found in mapping')
    
    start_time = ":".join(cell1.split(':')[1:])
    start_time = get_frame_from_time(start_time, fps=fps)
    
    end_time = ":".join(cell2.split(':')[1:])
    end_time = get_frame_from_time(end_time, fps=fps)
    
    return start_time, end_time, cell_class
    
    

for i, row in remarks.iterrows():
    print('Working on remarks for video: ', row.iloc[0])
    video_id = row.iloc[0]
    
    gt_file = name_mapping.get_gt_from_id(video_id)
    if not gt_file or not os.path.exists(join(base_dir, 'groundTruth', gt_file)):
        print(f'Ground truth file {gt_file} not found')
        continue
    gt_lines = open(join(base_dir, 'groundTruth', gt_file), 'r').readlines()
    
    for i in range(1, len(row), 2):
        if pd.isna(row.iloc[i]) or pd.isna(row.iloc[i+1]):
            continue
        start, end, cur_class = parse_remark_cell(row.iloc[i], row.iloc[i+1], fps=30)
        # print('Parsed remark: ', start, end, cur_class)
        
        for i in range(start, end):
            gt_lines[i] = cur_class + '\n'
    
    open(join(base_dir, 'groundTruth', gt_file), 'w').writelines(gt_lines)
        
        
        
        
        

Working on remarks for video:  Video 7
Working on remarks for video:  Video 8
Working on remarks for video:  Video 9
Working on remarks for video:  Video 19
Working on remarks for video:  Video 20
Working on remarks for video:  Video 21
Working on remarks for video:  Video 22
Working on remarks for video:  Easy cataract 3
Working on remarks for video:  video N18
Working on remarks for video:  Video 17
Working on remarks for video:  Video 15
Working on remarks for video:  Video N29
Working on remarks for video:  Video N39
Working on remarks for video:  Video N40 
Working on remarks for video:  Video N42
Working on remarks for video:  Video 58
Working on remarks for video:  Video 68
Working on remarks for video:  Video 12
Working on remarks for video:  Video 13
Working on remarks for video:  Video 5
Working on remarks for video:  Video 9
Working on remarks for video:  Video 8
Working on remarks for video:  video n10
Working on remarks for video:  video n11
Working on remarks for video:  