In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import seaborn as sns
from pathlib import Path
import base64
import cv2
import matplotlib.pyplot as plt
import json 
from tqdm.notebook import tqdm 
import scipy 
from joblib import Parallel , delayed

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!unzip -uq '/content/drive/MyDrive/Detection_of_lesions/data.zip' -d '/content'

In [None]:
IMG_SIZE = 128
base_path = Path('/content')
train_path = list((base_path /'train').glob('train*'))
test_path = list((base_path / 'test').glob('test*'))

In [None]:
label_info = pd.read_csv((base_path /'class_id_info.csv'))
categories = {i[0]:i[1]-1 for i in label_info.to_numpy()}
label_info

Unnamed: 0,class,class_id
0,01_ulcer,1
1,02_mass,2
2,04_lymph,3
3,05_bleeding,4


In [None]:
def xyxy2coco(xyxy):
    x1,y1,x2,y2 =xyxy
    w,h =  x2-x1, y2-y1
    return [x1,y1,w,h] 

def xyxy2yolo(xyxy):
    
    x1,y1,x2,y2 =xyxy
    w,h =  x2-x1, y2-y1
    xc = x1 + int(np.round(w/2)) # xmin + width/2
    yc = y1 + int(np.round(h/2)) # ymin + height/2
    return [xc/IMG_SIZE,yc/IMG_SIZE,w/IMG_SIZE,h/IMG_SIZE] 

def scale_bbox(img, xyxy):
    # Get scaling factor
    scale_x = IMG_SIZE/img.shape[1]
    scale_y = IMG_SIZE/img.shape[0]
    
    x1,y1,x2,y2 =xyxy
    x1 = int(np.round(x1*scale_x, 4))
    y1 = int(np.round(y1*scale_y, 4))
    x2 = int(np.round(x2*scale_x, 4))
    y2= int(np.round(y2*scale_y, 4))

    return [x1, y1, x2, y2] # xmin, ymin, xmax, ymax

def save_image_label(json_file,mode): 
    with open(json_file,'r') as f: 
        json_file =json.load(f)

    image_id = json_file['file_name'].replace('.json','')
    
    # decode image data
    image = np.frombuffer(base64.b64decode(json_file['imageData']), np.uint8)
    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
    cv2.imwrite(str(new_image_path / (image_id + '.png')) ,image)
    
    # extract bbox
    origin_bbox = []
    if mode == 'train':
        with open(new_label_path / (image_id + '.txt'), 'w') as f:
            for i in json_file['shapes']: 
                bbox = i['points'][0] + i['points'][2]
                origin_bbox.append(bbox)
                bbox = scale_bbox(image,bbox)
                bbox = xyxy2yolo(bbox)
                
                labels = [categories[i['label']]]+bbox
                f.writelines([f'{i} ' for i in labels] + ['\n']) 
    return origin_bbox

In [None]:
import multiprocessing as mp

# 저장할 파일 경로
save_path = Path('./train_data')
new_image_path = save_path / 'images' # image폴더 
new_label_path = save_path / 'labels' # label폴더

new_image_path.mkdir(parents=True,exist_ok=True)
new_label_path.mkdir(parents=True,exist_ok=True)

# data를 생성하기 위해 mlutiprocessing 적용
tmp = Parallel(n_jobs=mp.cpu_count(),prefer="threads")(delayed(save_image_label)(str(train_json),'train') for train_json in tqdm(train_path))

  0%|          | 0/62622 [00:00<?, ?it/s]

In [None]:
from sklearn.model_selection import train_test_split
# 학습 이미지가 많은 관계로 10000개만 사용
images_path = list(new_image_path.glob('*'))

train_path_list,valid_path_list = train_test_split(images_path,test_size=0.4,random_state=1)

In [None]:
with open('train_dataset.txt', 'w') as f:
    f.writelines([f'{i}\n' for i in train_path_list])
with open('valid_dataset.txt', 'w') as f:
    f.writelines([f'{i}\n ' for i in valid_path_list]) 

In [None]:
import torch
from IPython.display import Image, clear_output  # to display images

clear_output()
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

In [None]:
# 욜로 다운로드
! git clone https://github.com/ultralytics/yolov5
! pip install -r yolov5/requirements.txt

Collecting PyYAML>=5.3.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 12.5 MB/s 
Collecting thop
  Downloading thop-0.0.31.post2005241907-py3-none-any.whl (8.7 kB)
Installing collected packages: thop, PyYAML
  Attempting uninstall: PyYAML
    Found existing installation: PyYAML 3.13
    Uninstalling PyYAML-3.13:
      Successfully uninstalled PyYAML-3.13
Successfully installed PyYAML-6.0 thop-0.0.31.post2005241907
Collecting scipy
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 79.4 MB/s 
Installing collected packages: scipy
  Attempting uninstall: scipy
    Found existing installation: scipy 1.4.1
    Uninstalling scipy-1.4.1:
      Successfully uninstalled scipy-1.4.1
[31mERROR: pip's dependency resolver does not currently take into account all the packa

In [None]:
!pip install -q --upgrade wandb
# Login 
import wandb
wandb.login()

[K     |████████████████████████████████| 1.7 MB 13.0 MB/s 
[K     |████████████████████████████████| 180 kB 74.2 MB/s 
[K     |████████████████████████████████| 97 kB 7.4 MB/s 
[K     |████████████████████████████████| 140 kB 74.9 MB/s 
[K     |████████████████████████████████| 63 kB 2.1 MB/s 
[?25h  Building wheel for subprocess32 (setup.py) ... [?25l[?25hdone
  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
# Create .yaml file 
import yaml

data_yaml = dict(
    train = './train_dataset.txt',
    val = './valid_dataset.txt',
    nc = 4,
    names = ['01_ulcer','02_mass','04_lymph','05_bleeding']
)

# Note that I am creating the file in the yolov5/data/ directory.
with open('endoscopy.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)

In [None]:
!python ./yolov5/train.py --img 128 \
                          --batch 100\
                          --epochs 30\
                          --data ./endoscopy.yaml\
                          --weights ../input/ultralyticsyolov5aweights/yolov5s.pt\
                          --project yolov5-endoscopy\
                          --save-period 1\
                          --name endoscopy

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[34m[1mval: [0mNew cache created: valid_dataset.cache
Plotting labels to yolov5-endoscopy/endoscopy/labels.jpg... 

[34m[1mAutoAnchor: [0m4.92 anchors/target, 1.000 Best Possible Recall (BPR). Current anchors are a good fit to dataset ✅
Image sizes 128 train, 128 val
Using 8 dataloader workers
Logging results to [1myolov5-endoscopy/endoscopy[0m
Starting training for 30 epochs...

     Epoch   gpu_mem       box       obj       cls    labels  img_size
      0/29     1.04G   0.08343  0.009928   0.03167       116       128: 100% 564/564 [05:12<00:00,  1.81it/s]
               Class     Images     Labels          P          R     mAP@.5 mAP@.5:.95: 100% 1/1 [00:00<00:00,  5.68it/s]
                 all          1          1      0.988          1      0.995     0.0995
Saving model artifact on epoch 1

     Epoch   gpu_mem       box       obj       cls    labels  img_size
      1/29     1.04G   0.06813  0.009849   0.0190

In [None]:
import multiprocessing as mp 

save_path = Path('./test_data').resolve()
new_image_path = save_path / 'images'

new_image_path.mkdir(parents=True,exist_ok=True)

test_path_list = list(new_image_path.glob('*'))

## test를 위해서 100장만 Inference
tmp = Parallel(n_jobs=16,prefer="threads")(delayed(save_image_label)(str(train_json),'test') for train_json in tqdm(test_path))

  0%|          | 0/20874 [00:00<?, ?it/s]

In [None]:
!python ./yolov5/detect.py --weights /content/yolov5-endoscopy/endoscopy/weights/best.pt \
                           --source ./test_data/images \
                           --save-txt \
                           --save-conf

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
image 15878/20874 /content/test_data/images/test_215877.png: 640x640 Done. (0.007s)
image 15879/20874 /content/test_data/images/test_215878.png: 640x640 Done. (0.007s)
image 15880/20874 /content/test_data/images/test_215879.png: 640x640 Done. (0.007s)
image 15881/20874 /content/test_data/images/test_215880.png: 640x640 Done. (0.007s)
image 15882/20874 /content/test_data/images/test_215881.png: 640x640 Done. (0.007s)
image 15883/20874 /content/test_data/images/test_215882.png: 640x640 Done. (0.007s)
image 15884/20874 /content/test_data/images/test_215883.png: 640x640 Done. (0.007s)
image 15885/20874 /content/test_data/images/test_215884.png: 640x640 Done. (0.007s)
image 15886/20874 /content/test_data/images/test_215885.png: 640x640 Done. (0.007s)
image 15887/20874 /content/test_data/images/test_215886.png: 640x640 Done. (0.007s)
image 15888/20874 /content/test_data/images/test_215887.png: 640x640 Done. (0.007s)
image 15889/20874 /content

In [None]:
def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.copy()
    y[0] = x[0] - x[2] / 2  # top left x
    y[1] = x[1] - x[3] / 2  # top left y
    y[2] = x[0] + x[2] / 2  # bottom right x
    y[3] = x[1] + x[3] / 2  # bottom right y
    return y

total_list = []
results = {
    'file_name':[], 'class_id':[], 'confidence':[], 'point1_x':[], 'point1_y':[],
    'point2_x':[], 'point2_y':[], 'point3_x':[], 'point3_y':[], 'point4_x':[], 'point4_y':[]
}

result_path = Path('./yolov5/runs/detect/exp5')
result_img = list(result_path.glob('*.png'))
result_label = list(result_path.glob('labels/*.txt'))

for i in result_label:

    with open(str(i),'r') as f:

        file_name = i.name.replace('.txt','.json')
        img_name = file_name.replace('.json','.png')
        ow,oh,_ = cv2.imread(str(result_path / img_name))[:,:,::-1].shape
        for line in f.readlines():
            corrdi = line[:-1].split(' ')
            label,xc,yc,w,h,score = corrdi
            xc,yc,w,h,score = list(map(float,[xc,yc,w,h,score]))
            xc,w = np.array([xc,w]) * ow
            yc,h = np.array([yc,h]) * oh

            refine_cordi = xywh2xyxy([xc,yc,w,h])
            refine_cordi = np.array(refine_cordi).astype(int)
            x_min,y_min,x_max,y_max = refine_cordi

            results['file_name'].append(file_name)
            results['class_id'].append(label)
            results['confidence'].append(score)
            results['point1_x'].append(x_min)
            results['point1_y'].append(y_min)
            results['point2_x'].append(x_max)
            results['point2_y'].append(y_min)
            results['point3_x'].append(x_max)
            results['point3_y'].append(y_max)
            results['point4_x'].append(x_min)
            results['point4_y'].append(y_max)

In [None]:
df = pd.DataFrame(results)
df['class_id'] = df['class_id'].apply(lambda x:int(x)+1)
df

Unnamed: 0,file_name,class_id,confidence,point1_x,point1_y,point2_x,point2_y,point3_x,point3_y,point4_x,point4_y


In [None]:
pd.DataFrame(df).to_csv('/content/drive/MyDrive/Detection_of_lesions/result/final_submission.csv', index = False)