In [None]:
# DACON 병변검출 AI 경진대회에 공유된 코드를 수정했습니다.
# https://dacon.io/competitions/official/235855/codeshare/3743?page=1&dtype=recent

# Prepare Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import cv2
import base64
import time
import math
import datetime
import os
from PIL import Image
from io import BytesIO
from tqdm import tqdm
from glob import glob

import torch
import torchvision
import torch.distributed as dist
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from collections import defaultdict, deque

import seaborn as sns
from pathlib import Path
import scipy 
from joblib import Parallel , delayed

In [None]:
# Colab 환경에서 실행할 시 '/content'에 train 및 test 폴더를 압축해제합니다.
# data.zip 은 https://dacon.io/competitions/official/235855/data 에서 다운로드 받을 수 있습니다.
!unzip -uq '/content/drive/MyDrive/Detection_of_lesions/data.zip' -d '/content'

In [None]:
# test 폴더 내의 파일만 필요하므로 glob 함수를 이용해 경로를 리스트에 저장합니다.
test_files = sorted(glob('/content/test/*'))

In [None]:
# 테스트 데이터가 잘 로드되었는지 확인합니다.
if len(test_files) == 20874:
  print('데이터를 정상적으로 불러왔습니다.')
else:
  print('데이터 길이에 문제가 있습니다.')

In [None]:
IMG_SIZE = 256
base_path = Path('/content')
test_path = list((base_path / 'test').glob('test*'))

In [None]:
label_info = pd.read_csv((base_path /'class_id_info.csv'))
categories = {i[0]:i[1]-1 for i in label_info.to_numpy()}
label_info

In [None]:
# json file 을 읽은후 yolo format으로 bbox를 만들고 image를 decoding하여 image를 해당폴더에 생성합니다.

def xyxy2coco(xyxy):
    x1,y1,x2,y2 =xyxy
    w,h =  x2-x1, y2-y1
    return [x1,y1,w,h] 

def xyxy2yolo(xyxy):
    
    x1,y1,x2,y2 =xyxy
    w,h =  x2-x1, y2-y1
    xc = x1 + int(np.round(w/2)) # xmin + width/2
    yc = y1 + int(np.round(h/2)) # ymin + height/2
    return [xc/IMG_SIZE,yc/IMG_SIZE,w/IMG_SIZE,h/IMG_SIZE] 

def scale_bbox(img, xyxy):
    # Get scaling factor
    scale_x = IMG_SIZE/img.shape[1]
    scale_y = IMG_SIZE/img.shape[0]
    
    x1,y1,x2,y2 =xyxy
    x1 = int(np.round(x1*scale_x,4))
    y1 = int(np.round(y1*scale_y, 4))
    x2 = int(np.round(x2*scale_x, 4))
    y2= int(np.round(y2*scale_y, 4))

    return [x1, y1, x2, y2] # xmin, ymin, xmax, ymax

def save_image_label(json_file,mode): 
    with open(json_file,'r') as f: 
        json_file =json.load(f)

    image_id = json_file['file_name'].replace('.json','')
    
    # decode image data
    image = np.frombuffer(base64.b64decode(json_file['imageData']), np.uint8)
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    cv2.imwrite(str(new_image_path / (image_id + '.png')) ,image)
    
    test_size_memo[image_id + '.txt'] = image.shape[0]
    
    # extract bbox
    origin_bbox = []
    if mode == 'train':
        with open(new_label_path / (image_id + '.txt'), 'w') as f:
            for i in json_file['shapes']: 
                bbox = i['points'][0] + i['points'][2]
                origin_bbox.append(bbox)
                bbox = scale_bbox(image,bbox)
                bbox = xyxy2yolo(bbox)
                
                labels = [categories[i['label']]]+bbox
                f.writelines([f'{i} ' for i in labels] + ['\n']) 
    return origin_bbox

In [None]:
# Test set prepare
import multiprocessing as mp 

# 저장할 파일 경로
save_path = Path('./test_data')
new_image_path = save_path / 'images' # image폴더 
new_label_path = save_path / 'labels' # label폴더

new_image_path.mkdir(parents=True,exist_ok=True)
new_label_path.mkdir(parents=True,exist_ok=True)

# data를 생성하기 위해 mlutiprocessing 적용
test_size_memo = {}
tmp = Parallel(n_jobs=mp.cpu_count(),prefer="threads")(delayed(save_image_label)(str(test_json),'test') for test_json in tqdm(test_path))

In [None]:
# 욜로 다운로드
! git clone https://github.com/ultralytics/yolov5
! pip install -r yolov5/requirements.txt

# Get Result

In [None]:
!python ./yolov5/detect.py --weights /content/drive/MyDrive/best.pt --img 256 --source ./test_data/images --save-conf --save-txt

# Convert output

In [None]:
# convert txt to df
# path example : '/content/yolov5/runs/detect/exp3/labels'

def conv_txt_to_df(path):
    path += ('*.txt' if path[-1] == '/' else '/*.txt')
    test_label_txts = sorted(glob(path))
    result_list = []
    for txt in test_label_txts:
        with open(txt, 'r') as t:
            filename = t.name.split('/')[-1]
            for bbox in t.readlines():
                result_list.append([filename] + bbox.strip().split())
    result_df = pd.DataFrame(result_list, columns = ['file_name', 'class_id', 'x_cent', 'y_cent', 'width', 'height', 'confidence'])
    return result_df

In [None]:
############## 모델 런 2번이상 했으면 exp -> exp2 등으로 바꿔줘야 합니다 ###################
result_df = conv_txt_to_df('/content/yolov5/runs/detect/exp/labels')
result_df

In [None]:
result_df.info()

In [None]:
def make_size(x):
    return test_size_memo[x]


result_df['size'] = result_df.file_name.apply(make_size)
result_df['x_cent'] = result_df['x_cent'].astype(float) * result_df['size']
result_df['y_cent'] = result_df['y_cent'].astype(float) * result_df['size']
result_df['width'] = result_df['width'].astype(float) * result_df['size']
result_df['height'] = result_df['height'].astype(float) * result_df['size']

result_df = result_df.drop('size', axis = 1)
result_df

In [None]:
result_df.file_name = result_df.file_name.apply(lambda x : x.replace('.txt', '.json'))
result_df.class_id = result_df.class_id.apply(lambda x : int(x) + 1)
result_df.confidence = result_df.confidence.apply(float)

result_df['point1_x'] = result_df.x_cent - (result_df.width / 2)
result_df['point1_y'] = result_df.y_cent - (result_df.height / 2)

result_df['point2_x'] = result_df.x_cent + (result_df.width / 2)
result_df['point2_y'] = result_df.y_cent - (result_df.height / 2)

result_df['point3_x'] = result_df.x_cent + (result_df.width / 2)
result_df['point3_y'] = result_df.y_cent + (result_df.height / 2)

result_df['point4_x'] = result_df.x_cent - (result_df.width / 2)
result_df['point4_y'] = result_df.y_cent + (result_df.height / 2)

result_df = result_df.drop(['x_cent', 'y_cent', 'width', 'height'], axis = 1)
result_df

In [None]:
result_df

In [None]:
result_df=result_df.sort_values(by='confidence',ascending=False)[:30000]

In [None]:
################ 파일명 수정 필요합니다! ##################
result_df.to_csv('/content/drive/MyDrive/Detection_of_lesions/result/result.csv', index = False)

In [None]:
result_df.confidence.min()

# Check

In [None]:
test_output_list = sorted(glob('/content/yolov5/runs/detect/exp/*.png'))

plt.figure(figsize=(25,30))
for i in range(30):
    plt.subplot(6,5,i+1)
    # base64 형식을 array로 변환
    img = Image.open(BytesIO(test_output_list))
    img = np.array(img, np.uint8)
    title = []
    for shape in train_json_list[i]['shapes']:
        points = np.array(shape['points'], np.int32)
        cv2.polylines(img, [points], True, (0,255,0), 3)
        title.append(shape['label'])
    title = ','.join(title)
    plt.imshow(img)
    plt.subplot(6,5,i+1).set_title(title)
plt.show()