In [None]:
import numpy as np
from PIL import Image
import pycocotools.mask as mask_util
import json
import cv2


In [None]:
img_count = 1
ann_count = 1

In [None]:
colors = set()

In [None]:
def get_colors(image_path, max_colors=1000):
    # 打开图像
    image = Image.open(image_path)
    # 将图像转换为RGB模式
    image = image.convert("RGB")
    # 获取图像的宽度和高度
    width, height = image.size
    
    # 创建一个集合来存储颜色
    
    # 遍历图像的每个像素
    for x in range(width):
        for y in range(height):
            # 获取像素的颜色
            r, g, b = image.getpixel((x, y))
            # 将颜色添加到集合中
            colors.add((r, g, b))
            
            # 如果颜色数量达到最大值，停止遍历
            if len(colors) >= max_colors:
                break
        if len(colors) >= max_colors:
            break
    
    return colors

In [None]:
def create_coco_categories(class_dict):
    categories = []
    for class_id, class_name in class_dict.items():
        category = {
            "id": int(class_id) + 1,
            "name": class_name,
            "supercategory": ""  # 如果需要supercategory，可以在这里添加
        }
        categories.append(category)
    return categories

In [None]:
def get_dicts_by_field_value(data, field_name, target_value):
    return [item for item in data if item.get(field_name) == target_value]

def sort_dicts_by_field(data, field_name, reverse=False):
    return sorted(data, key=lambda item: item.get(field_name), reverse=reverse)

In [None]:
def rgb_to_coco(image_path, image_id, color_to_category_id):
    # 打开RGB图像
    img = Image.open(image_path)
    img_array = np.array(img)
    
    # 初始化COCO格式的annotation列表
    annotations = []
    
    # 遍历每个颜色到类别ID的映射
    for color, category_id in color_to_category_id.items():
        # 获取当前颜色的掩码
        color_mask = np.all(img_array == np.array(color).reshape(1, 1, 3), axis=2).astype(np.uint8)
        
        # 如果当前颜色在图像中不存在，跳过
        if np.sum(color_mask) == 0:
            continue
        
        # 将掩码转换为RLE格式
        rle = mask_util.encode(np.asfortranarray(color_mask))
        
        # 计算面积
        area = mask_util.area(rle)
        
        # 获取边界框
        bbox = mask_util.toBbox(rle).tolist()
        
        # 创建annotation字典
        annotation = {
            "id": ann_count,  # 假设annotation ID从1开始递增
            "image_id": image_id,
            "category_id": category_id,
            "segmentation": rle,
            "area": area.item(),
            "bbox": bbox,
            "iscrowd": 0
        }
        
        # 添加到annotations列表
        annotations.append(annotation)
        ann_count += 1
    
    return annotations

In [None]:
def gray_to_coco(image_path, image_id, category_ids):
    global ann_count
    # 打开灰度图像
    img = Image.open(image_path).convert('L')
    img_array = np.array(img)
    unique = np.unique(img_array)
    
    # 初始化COCO格式的annotation列表
    annotations = []

    
    for category_id in category_ids:
        # print(np.unique(img_array))

        # 获取当前类别的掩码
        mask = (img_array == category_id).astype(np.uint8)

        # 如果当前类别在图像中不存在，跳过
        if np.sum(mask) == 0:
            continue

        # 将掩码转换为RLE格式
        rle = mask_util.encode(np.asfortranarray(mask))

        # 计算面积
        area = mask_util.area(rle)

        # 获取边界框
        bbox = mask_util.toBbox(rle).tolist()

        # 创建annotation字典
        annotation = {
            "id": ann_count,  # 假设annotation ID从1开始递增
            "image_id": image_id,
            "category_id": category_id,
            "segmentation": rle,
            "area": area.item(),
            "bbox": bbox,
            "iscrowd": 0,
        }

        # 添加到annotations列表
        annotations.append(annotation)
        ann_count += 1

    return annotations

In [None]:
with open("raw_info_cholec_seg8k.json", "r") as f:
    raw_info = json.load(f)

In [None]:
video_id_set = set()

for frame in raw_info['frames']:
    video_id_set.add(frame['video_id'])

In [None]:
all_frames = []
cat_ids = raw_info['class_dict'].keys()

In [None]:
cat_ids = range(0,13)

In [None]:
for video_id in video_id_set:
    frames = get_dicts_by_field_value(raw_info['frames'], "video_id", video_id)
    frames = sort_dicts_by_field(frames, "file_name")
    
    for idx, frame in enumerate(frames):
        frame['order_in_video'] = idx
        all_frames.append(frame)
        
        

In [None]:
all_frames

In [None]:
coco_images = []
coco_annotations = []
    

In [None]:

for frame in all_frames:
    is_det_keyframe = frame['gt_path'] is not None
    if is_det_keyframe == False:
        continue
    file_name = frame['file_name']
    file_path = frame['file_path']
    gt = cv2.imread(frame['gt_path'])
    height, width = gt.shape[:2]
    
    img_info = {
        'file_name': file_name,
        'height': height,
        'width': width,
        'id': img_count,
        'is_det_keyframe': is_det_keyframe,
        'video_id': video_id,
        'frame_id': None,
        'order_in_video': frame['order_in_video']
    }
    
    coco_images.append(img_info)
    
    # get_colors(file_path, max_colors=1000)
    coco_annotations.extend(gray_to_coco(frame['gt_path'], img_count, np.arange(13)))
    img_count += 1
    
    # break
    
    
    

In [None]:
for ann in coco_annotations:
    ann["segmentation"]["counts"] = ann["segmentation"]["counts"].decode("utf-8")
    # print(type(ann["category_id"]))

In [None]:
coco_categories = create_coco_categories(raw_info['class_dict'])

In [None]:
coco_info = {
    "images": coco_images,
    "annotations": coco_annotations,
    "categories": coco_categories,
}

In [None]:
with open("coco_annotations.json", "w") as f:
    json.dump(coco_info, f, indent=4)

In [None]:
videos_info = []

In [None]:
for video_id in video_id_set:
    current_video_frames = []
    frames = get_dicts_by_field_value(raw_info['frames'], "video_id", video_id)
    frames = sort_dicts_by_field(frames, "file_name")
    
    for idx, frame in enumerate(frames):
        coco_frame = [img for img in coco_images if img['file_name'] == frame['file_name']][0]
        frame['id'] = coco_frame['id']
        frame['path'] = frame['file_path']
        current_video_frames.append(frame)
    videos_info.append({
        "video_id": video_id,
        "frames": current_video_frames
    })

In [None]:
videos_info

In [None]:
with open("cholecseg8k_videos.json","w") as f:
    json.dump(videos_info,f, indent=4)