In [None]:
import os
from glob import glob
from PIL import Image
import numpy as np
import json
import random
from copy import deepcopy
from tqdm import tqdm
import cv2

In [None]:
coco_json_template={
    "info": {
        "description": "IHC Biomarker Dataset",
        "url": "http://cocodataset.org",
        "version": "1.0",
        "year": 2025,
        "contributor": "Research Team",
        "date_created": "2025/09/29"
    },
    "licenses": [
        {
            "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
            "id": 1,
            "name": "Attribution-NonCommercial-ShareAlike License"
        }
    ],
    
    "categories": [
        {
            "id": 1,
            "name": "pd-l1 negative tumor cell",  # T Cell Positive
            "supercategory": "biomarker"
        },
        {
            "id": 2,
            "name": "pd-l1 positive tumor cell",  # Programmed Death-Ligand 1
            "supercategory": "biomarker"
        },
        {
            "id": 3,
            "name": "non-tumor cell",  # Programmed Death-Ligand 1
            "supercategory": "biomarker"
        }
    ],
    "images": [],
    "annotations": []
}

def crop_label(crop_h,crop_w,label,input_size):
    # Crop image
    #args: crop_h, crop_w, image, label, input_size
    temp_labels = []
    for k in range(len(label['annotations'])):
        x = label['annotations'][k]['bbox'][0]
        y = label['annotations'][k]['bbox'][1]
        w = label['annotations'][k]['bbox'][2]
        h = label['annotations'][k]['bbox'][3]
        if x >= crop_h and y >= crop_w and x <= crop_h + input_size and y <= crop_w + input_size:
            label['annotations'][k]['bbox'][0] = x - crop_w
            label['annotations'][k]['bbox'][1] = y - crop_h
            
            temp_labels.append(label['annotations'][k])
    return temp_labels

def custom_coco_json_creation(json_data,file_path,image_id,annotation_id,input_size=512,class_name='train'):
    w=json_data['image']['width']
    h=json_data['image']['height']
    r = input_size / min(h, w)
    image=Image.open(os.path.join(file_path,json_data['image']['file_name']))
    image_info=[]
    annotaion_info=[]
    if r < 1:
        h_count=h//input_size
        w_count=w//input_size
        for hi in range(h_count):
            for wi in range(w_count):
                h1 = hi*input_size
                w1 = wi*input_size
                if h1+input_size>h:
                    h1=h-input_size
                if w1+input_size>w:
                    w1=w-input_size
                crop_image = image.crop((w1, h1, w1+input_size, h1+input_size))
                crop_image.save('../../data/coco_IGNITE/'+class_name+'/'+json_data['image']['file_name'].split('.')[0]+'_'+str(hi)+'_'+str(wi)+'.png')
                crop_labels=crop_label(h1,w1,json_data,input_size)
                if len(crop_labels)==0:
                    continue   
                image_info.append({
                    "id": image_id,
                    "width": input_size,
                    "height": input_size,
                    "file_name": json_data['image']['file_name'].split('.')[0]+'_'+str(hi)+'_'+str(wi)+'.png',
                    "license": 1,
                })
                for j in range(len(crop_labels)):
                    annotaion_info.append({
                        "id": annotation_id,
                        "image_id": image_id,
                        "category_id": crop_labels[j]['category_id'],
                        "bbox":crop_labels[j]['bbox'],  # [x, y, width, height]
                        "iscrowd": 0
                    })
                    annotation_id+=1
                image_id+=1
                
                
    else:
        # 이미지가 input_size보다 작거나 같은 경우 (r >= 1) -> 패딩
        h1 = 0
        w1 = 0
        pad_image = np.ones((input_size, input_size, 3), dtype=np.uint8)*255
        pad_image[:min(h,input_size), :min(w,input_size), :] = np.array(image)[:min(h,input_size), :min(w,input_size), :]
        image = pad_image
        crop_labels=crop_label(h1,w1,json_data,input_size)
        cv2.imwrite('../../data/coco_IGNITE/'+class_name+'/'+json_data['image']['file_name'].split('.')[0]+'_'+str(h1)+'_'+str(w1)+'.png', image)
        if len(crop_labels)!=0: 
            image_info.append({
                    "id": image_id,
                    "width": input_size,
                    "height": input_size,
                    "file_name": json_data['image']['file_name'].split('.')[0]+'_'+str(h1)+'_'+str(w1)+'.png',
                    "license": 1,
                })
            for j in range(len(crop_labels)):
                annotaion_info.append({
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": crop_labels[j]['category_id'],
                    "bbox":crop_labels[j]['bbox'],  # [x, y, width, height]
                    "iscrowd": 0
                })
                annotation_id+=1
            image_id+=1
    
    return image_info, annotaion_info,image_id, annotation_id



In [None]:
json_list=glob('../../data/IGNITE/annotations/pdl1/individual/*.json')
file_path='../../data/IGNITE/images/pdl1/pdl1/'

os.makedirs('../../data/coco_IGNITE/train/',exist_ok=True)
os.makedirs('../../data/coco_IGNITE/val/',exist_ok=True)
os.makedirs('../../data/coco_IGNITE/test/',exist_ok=True)
train_image_id=1  
val_image_id=1
test_image_id=1
train_annotation_id=1
val_annotation_id=1
test_annotation_id=1
train_json=deepcopy(coco_json_template)
val_json=deepcopy(coco_json_template)
test_json=deepcopy(coco_json_template)
class_name='train'
for i in tqdm(range(len(json_list))):

    dataset_classification=random.randint(0,10)
    with open(json_list[i],'r') as f:
        data=json.load(f)
    img=Image.open(os.path.join(file_path,data['image']['file_name']))
    
    if dataset_classification<8:
        class_name='train'
        image_info, annotaion_info,train_image_id, train_annotation_id=custom_coco_json_creation(data,file_path,train_image_id,train_annotation_id,class_name=class_name)
        train_json['images'].extend(image_info)
        train_json['annotations'].extend(annotaion_info)
    elif dataset_classification==8:
        class_name='val'
        image_info, annotaion_info,val_image_id, val_annotation_id=custom_coco_json_creation(data,file_path,val_image_id,val_annotation_id,class_name=class_name)
        val_json['images'].extend(image_info)
        val_json['annotations'].extend(annotaion_info)
    else:
        class_name='test'
        image_info, annotaion_info,test_image_id, test_annotation_id=custom_coco_json_creation(data,file_path,test_image_id,test_annotation_id,class_name=class_name)
        test_json['images'].extend(image_info)
        test_json['annotations'].extend(annotaion_info)
        

with open('../../data/coco_IGNITE/train/_annotations.coco.json','w') as f:
    json.dump(train_json,f)
with open('../../data/coco_IGNITE/val/_annotations.coco.json','w') as f:
    json.dump(val_json,f)
with open('../../data/coco_IGNITE/test/_annotations.coco.json','w') as f:
    json.dump(test_json,f)