In [None]:
import json
import pandas as pd
import random
import skimage.io as io
import os
from pycocotools.coco import COCO
import matplotlib.pyplot as plt
import matplotlib.patches as patches


# Confidence Threshold
CONF_THR = 0.7


# json 파일 위치 (백업 해주고 진행하시는거 추천드려요)
train_json_path = ${train_json_dir}

# 합칠 CSV 경로 설정 (자신의 경로로 바꿔주세요)
submission_path = "AVG_csv_dir"

# 결과물이 저장될 root와 이름 설정
output_root = '../dataset_pseudo'
output_json_name = 'pseudo_train'
output_path = os.path.join(output_root,output_json_name + '.json',)

# ---------------------------------------------------------------------------------------------------------------- #

with open(train_json_path) as f:
    train_json = json.load(f)

df = pd.read_csv(submission_path)
submission_list = df.values.tolist()

images = []
annotations = []

start_id = train_json['images'][-1]['id']
start_anno_id = train_json['annotations'][-1]['id']

print('PSEUDO LABELING START...')

for bboxes, image_name in submission_list:
    # image
    start_id += 1
    bboxes_splited = bboxes.split()
    num_bbox = 0
    
    for i in range(6, len(bboxes_splited)+1, 6):
        bbox = bboxes_splited[i-6:i]
        _class, conf, left, top, right, bottom = bbox[0], float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4]), float(bbox[5])
        width, height = right - left, bottom - top
        area = round(width * height, 2)

        if conf < CONF_THR:
            continue
        start_anno_id += 1
        num_bbox += 1
        
        annotation = dict()
        annotation['image_id'] = start_id
        annotation['category_id'] = int(_class)
        annotation['area'] = area
        annotation['bbox'] = [round(left, 1), round(top, 1), round(width, 1), round(height, 1)]
        annotation['iscrowd'] = 0
        annotation['id'] = start_anno_id
        annotations.append(annotation)

    if num_bbox != 0:
        image = dict()
        image['width'] = 1024
        image['height'] = 1024
        image['file_name'] = image_name
        image['license'] = 0
        image['flickr_url'] = None
        image['coco_ur'] = None
        image['date_captured'] = "2023-05-17 16:16:16"
        image['id'] = start_id
        images.append(image)

print('PSEUDO LABELING COMPLETED...\n')
print(f'{len(images)} 개의 images가 추가됨.')
print(f'{len(annotations)} 개의 annotations이 추가됨.\n')

train_json['images'] += images
train_json['annotations'] += annotations

print(f'{output_path} 에 저장됨.')

with open(output_path, 'w') as f:
    json.dump(train_json, f)

In [None]:
classes = ("General trash", "Paper", "Paper pack", "Metal", "Glass", 
           "Plastic", "Styrofoam", "Plastic bag", "Battery", "Clothing")

index = random.randrange(len(images))

file_name, id = images[index]['file_name'], images[index]['id']
print(file_name)
I = io.imread(os.path.join('../dataset_pseudo', file_name))
plt.rcParams["figure.figsize"] = (12,12)
plt.axis('off')
plt.imshow(I)
ax = plt.gca()
for annotation in annotations:
    if id == annotation['image_id']:
        print(annotation)
        box = annotation['bbox']
        bb = patches.Rectangle((box[0], box[1]), box[2], box[3], linewidth = 2, edgecolor = 'tomato',facecolor = 'none')
        ax.add_patch(bb)
        ax.text(box[0], box[1] - 10 , classes[int(annotation['category_id'])], weight = 'bold', color = 'tomato', fontsize = 13)
plt.show()