## Dataset preparation CholecT50

In [5]:
def find_videos_with_overlapping_actions(data):
    annotations = data.get('annotations', {})
    for frame_id, instances in annotations.items():
        # Count only valid instances (triplet ID is not -1)
        valid_instances = [inst for inst in instances if inst[0] not in [-1,]]
        if len(valid_instances) > 1:
            print("valid_instances", valid_instances)
            return True
    return False

def find_videos_with_chiral_actions(data, chirality_map):
    # Create a set of all verb IDs that are part of a chiral pair for quick lookup.
    chiral_verb_ids = set(chirality_map.keys()) | set(chirality_map.values())

    annotations = data.get('annotations', {})
    for frame_id, instances in annotations.items():
        for instance in instances:
            # Verb ID is the 8th item (index 7) in the instance vector.
            verb_id = str(instance[7])
            if verb_id in chiral_verb_ids:
                print("verb_id",verb_id)
                return True
    return False

def find_videos_with_bounding_boxes(data):
    annotations = data.get('annotations', {})
    for frame_id, instances in annotations.items():
        for instance in instances:
            # A triplet instance is a vector of 15 items.
            # BBox for instrument: indices 3, 4, 5, 6 (x, y, w, h)
            # BBox for target: indices 10, 11, 12, 13 (x, y, w, h)
            instrument_bbox_coords = instance[3:7]
            target_bbox_coords = instance[10:14]

            # The value is -1.0 for null/absence. Check if any value is not -1.0.
            if any(coord != -1.0 for coord in instrument_bbox_coords):
                print("instrument_bbox_coords",instrument_bbox_coords)
                return True
            if any(coord != -1.0 for coord in target_bbox_coords):
                print("target_bbox_coords",target_bbox_coords)
                return True
    return False


In [None]:
import os 
import json
from collections import defaultdict, Counter
from dataset_mapping import *
from cholect50 import find_videos_with_overlapping_actions, find_videos_with_chiral_actions, find_videos_with_bounding_boxes, 

DATA_DIRECTORY = r"C:\Users\rezow\Rezowan\Datasets\CholecT50\labels"
json_dir = DATA_DIRECTORY
if not os.path.exists(json_dir):
    print(f"Directory not found: {json_dir}")

    # Lists to store the video IDs based on findings
videos_with_bboxes = []
videos_with_overlaps = []
videos_with_chirality = []
    # find_all_cooccurring_triplets(json_dir)
    # create_consolidated_sequence_file(DATA_DIRECTORY, CONSOLIDATED_OUTPUT_FILE)

# Iterate over each file in the specified directory
for filename in os.listdir(json_dir):
    if filename.endswith('.json'):
        filepath = os.path.join(json_dir, filename)
        try:
            with open(filepath, 'r') as f:
                video_data = json.load(f)

            video_id = video_data.get('video', filename)
            print(f"Processing file {filename} with video ID: {video_id}")
            # Run each analysis function
            if find_videos_with_bounding_boxes(video_data):
                videos_with_bboxes.append(video_id)

            if find_videos_with_overlapping_actions(video_data):
                videos_with_overlaps.append(video_id)

            if find_videos_with_chiral_actions(video_data, chirality_verb_pairs):
                videos_with_chirality.append(video_id)
            # most_common_pairs = find_cooccurring_triplets(DUMMY_DATA_DIR, top_n=10)
            # print("most_common_pairs",most_common_pairs)

        except Exception as e:
            print(f"Could not process file {filename}: {e}")

# --- Print the Final Report ---
print("--- Surgical Video Annotation Analysis Report ---")
print(f"\nVideos with Bounding Boxes: {sorted(videos_with_bboxes)}")
print(f"Total: {len(videos_with_bboxes)}")

print(f"\nVideos with Overlapping Actions: {sorted(videos_with_overlaps)}")
print(f"Total: {len(videos_with_overlaps)}")

print(f"\nVideos with Chiral Actions: {sorted(videos_with_chirality)}")
print(f"Total: {len(videos_with_chirality)}")
print("\n-------------------------------------------------")

Processing file VID01.json with video ID: 1
valid_instances [[7, 0, 1, -1, -1, -1, -1, 0, 0, 1, -1, -1, -1, -1, 0], [96, 2, 1, -1, -1, -1, -1, 9, 14, 1, -1, -1, -1, -1, 0]]
verb_id 0
Processing file VID02.json with video ID: 2
valid_instances [[19, 0, 1.0, -1.0, -1.0, -1.0, -1.0, 1, 8, 1.0, -1.0, -1.0, -1.0, -1.0, 5], [44, 1, 1.0, -1.0, -1.0, -1.0, -1.0, 1, 8, 1.0, -1.0, -1.0, -1.0, -1.0, 5], [88, 5, 1.0, -1.0, -1.0, -1.0, -1.0, 7, 7, 1.0, -1.0, -1.0, -1.0, -1.0, 5]]
verb_id 5
Processing file VID04.json with video ID: 4
valid_instances [[7, 0, 1.0, -1.0, -1.0, -1.0, -1.0, 0, 0, 1.0, -1.0, -1.0, -1.0, -1.0, 0], [17, 0, 1.0, -1.0, -1.0, -1.0, -1.0, 1, 0, 1.0, -1.0, -1.0, -1.0, -1.0, 0]]
verb_id 0
Processing file VID05.json with video ID: 5
valid_instances [[17, 0, 1.0, -1.0, -1.0, -1.0, -1.0, 1, 0, 1.0, -1.0, -1.0, -1.0, -1.0, 3], [96, 2, 1.0, -1.0, -1.0, -1.0, -1.0, 9, 14, 1.0, -1.0, -1.0, -1.0, -1.0, 3]]
verb_id 0
Processing file VID06.json with video ID: 6
valid_instances [[92, 5, 1.0

In [None]:
import os 
from pathlib import Path
from tqdm import tqdm
import argparse 
import json 

gesture_map = {
    "G1": "reaching for the needle with right hand",
    "G2": "positioning the tip of the needle", 
    "G3": "pushing needle through the tissue",
    "G4": "transferring needle from left to right",
    "G5": "moving to center of workspace with needle in grip",
    "G6": "pulling suture with left hand",
    "G7": "pulling suture with right hand",
    "G8": "orienting needle",
    "G9": "using right hand to help tighten suture",
    "G10": "loosening more suture",
    "G11": "dropping suture and moving to end points",
    "G12": "reaching for needle with left hand",
    "G13": "making C loop around right hand",
    "G14": "reaching for suture with right hand",
    "G15": "pulling suture with both hands"
}


def load_jigsaw_data(json_file_path):
    """Load and parse JIGSAW dataset from JSON file with debugging"""
    try:
        with open(json_file_path, 'r') as f:
            data = json.load(f)
        print(f"DEBUG: Loaded JSON data structure keys: {list(data.keys())}")
    except Exception as e:
        print(f"DEBUG: Error loading JSON file: {e}")
        return [], {}
    
    all_sequences = []
    metadata_dict = {}  # Store metadata for each sequence
    
    # Parse the JSON structure
    tasks = data.get("tasks", {})
    print(f"DEBUG: Found {len(tasks)} tasks")
    
    for task_name, task_data in tasks.items():
        print(f"DEBUG: Processing task: {task_name}")
        trials = task_data.get("videos", {})
        print(f"DEBUG: Task {task_name} has {len(trials)} trials")
        
        for trial_name, trial_data in trials.items():
            print(f"DEBUG: Processing trial: {trial_name}")
            gesture_sequence = trial_data.get("gesture_sequence", [])
            skill_level = trial_data.get("skill_level_self_proclaimed", "Unknown")
            
            if gesture_sequence:
                # Extract gesture IDs with timing information
                gesture_data = []
                gesture_ids = []  # This is what we need to collect
                
                for g in gesture_sequence:
                    if isinstance(g, dict):  # Make sure it's a dictionary
                        gesture_id = g.get("gesture_id", "")
                        start_frame = g.get("start_frame", 0)
                        end_frame = g.get("end_frame", 0)
                        duration = end_frame - start_frame if end_frame > start_frame else 1
                        
                        if gesture_id:  # Only add if we have a valid gesture ID
                            gesture_data.append({
                                "id": gesture_id,
                                "action": gesture_map[gesture_id],
                                "start": start_frame,
                                "end": end_frame,
                                "duration": duration
                            })
                            gesture_ids.append(gesture_id)
                
                if gesture_ids:  # Use gesture_ids instead of gesture_data for sequences
                    all_sequences.append(gesture_ids)
                    
                    # Store metadata
                    metadata_dict[len(all_sequences)-1] = {
                        "trial_name": trial_name,
                        "task_name": task_name,
                        "skill_level": skill_level,
                        "durations": [g["duration"] for g in gesture_data],
                        "gesture_data": gesture_data
                    }
                    print(f"DEBUG: Added sequence {len(all_sequences)-1} with {len(gesture_ids)} gestures: {gesture_ids[:5]}... gesture {gesture_data}")
    
    print(f"DEBUG: Total sequences loaded: {len(all_sequences)}")
    if all_sequences:
        print(f"DEBUG: First sequence example: {all_sequences[0][:5]}...")
        if metadata_dict and 0 in metadata_dict:
            sample_meta = metadata_dict[0]
            print(f"DEBUG: First metadata example - trial: {sample_meta.get('trial_name', 'N/A')}, task: {sample_meta.get('task_name', 'N/A')}")
    
    return all_sequences, metadata_dict



def parse_meta_file(meta_file_path):
    videos ={}
    try:
        with open(meta_file_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if not parts:
                    print(f"Skipping empty line in {meta_file_path}")
                    continue # skip empty lines
                trial_id = parts[0]
                skill_level_self_proclaimed = parts[1]
                skill_level_GRS = int(parts[2])
                grs_scores_raw = [int(score) for score in parts[3:9]]
                # Map GRS scores to their names based on readme.txt order
                grs_score_names = [
                    "Respect_for_tissue",
                    "Suture_needle_handling", # Note: For Needle_Passing/Knot_Tying, this is Needle Handling
                    "Time_and_motion",
                    "Flow_of_operation",
                    "Overall_performance",
                    "Quality_of_final_product"
                ]
                grs_scores = dict(zip(grs_score_names, grs_scores_raw))

                videos[trial_id] = {
                    "video_uid": trial_id,
                    "video_path": None, # Will be populated later
                    "annotation_path": None, # Will be populated later
                    "task": None, # Will be populated later
                    "fps": None, # Will be populated later
                    "features_path": None, # Will be populated later
                    "frames": None, # Will be populated later
                    "duration": None, # Will be populated later
                    "description": None, # Will be populated later
                    "skill_level_self_proclaimed": skill_level_self_proclaimed,
                    "skill_level_GRS": skill_level_GRS,
                    "GRS_scores": grs_scores,
                    "clips": [] # Will be populated later
                }
    except FileNotFoundError:
        print(f"Warning: Meta file not found: {meta_file_path}. Skipping metadata.")
    except Exception as e:
        print(f"Error parsing meta file {meta_file_path}: {e}")
    return videos

jigsaw_knot_tying = r"C:\Users\rezow\Rezowan\Datasets\jigsaw\Knot_Tying\meta_file_Knot_Tying.txt"
Videos = parse_meta_file(os.path.join(jigsaw_knot_tying))


In [7]:
Videos

{'Knot_Tying_B001': {'video_uid': 'Knot_Tying_B001',
  'video_path': None,
  'annotation_path': None,
  'task': None,
  'fps': None,
  'features_path': None,
  'frames': None,
  'duration': None,
  'description': None,
  'skill_level_self_proclaimed': 'N',
  'skill_level_GRS': 13,
  'GRS_scores': {'Respect_for_tissue': 2,
   'Suture_needle_handling': 2,
   'Time_and_motion': 2,
   'Flow_of_operation': 2,
   'Overall_performance': 2,
   'Quality_of_final_product': 3},
  'clips': []},
 'Knot_Tying_B002': {'video_uid': 'Knot_Tying_B002',
  'video_path': None,
  'annotation_path': None,
  'task': None,
  'fps': None,
  'features_path': None,
  'frames': None,
  'duration': None,
  'description': None,
  'skill_level_self_proclaimed': 'N',
  'skill_level_GRS': 9,
  'GRS_scores': {'Respect_for_tissue': 1,
   'Suture_needle_handling': 1,
   'Time_and_motion': 2,
   'Flow_of_operation': 2,
   'Overall_performance': 1,
   'Quality_of_final_product': 2},
  'clips': []},
 'Knot_Tying_B003': {'vid