In [1]:
#dataloader
from __future__ import print_function, division
import sys
import os
import torch
import glob
import numpy as np
import random
import csv
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches

import torch.nn as nn
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torch.utils.data.sampler import Sampler
import albumentations
import albumentations.pytorch

from pycocotools.coco import COCO

import skimage.io
import skimage.transform
import skimage.color
import skimage

from PIL import Image

In [4]:
# utils
import time
import shutil
import numpy as np
from random import sample
from tqdm import tqdm
import json

In [5]:
#test data 경로
path='C:/Users/user/Desktop/sample_flexink'

In [15]:
# new data path
save_path='C:/Users/user/Desktop'

In [16]:
classes=['halibut'] #탐지할 클래스명

In [17]:
os.listdir(path)

['01_원천데이터', '02_라벨링데이터']

# dataset 경로 저장(전체 데이터 저장 후 TVT)

In [18]:
# 파일 리스트화
img_list=[]
lab_list=[]


img_path=glob.glob(path+'/'+'01_원천데이터'+'/*')
lab_path=glob.glob(path+'/'+'02_라벨링데이터'+'/*')
for i in range(len(img_path)):
    img_path[i]=img_path[i].replace('\\','/')
    lab_path[i]=lab_path[i].replace('\\','/')
img_list.extend(img_path)
lab_list.extend(lab_path)

In [19]:
print(len(img_list)) #이미지 파일명
print(len(lab_list)) #라벨링 파일명

100
100


In [31]:
img_list[0]

'C:/Users/user/Desktop/sample_flexink/01_원천데이터/SA12_T01_FR_000002.jpg'

In [32]:
lab_list[0]

'C:/Users/user/Desktop/sample_flexink/02_라벨링데이터/SA12_T01_FR_000002.json'

# TVT변환 진행

### 1. 새 폴더 생성

### 2. 이미지, 라벨링 8:1:1 비율로 분할 진행

# 1.새 폴더 생성

In [25]:
def createFolder(save_dir):
    try:
        for tvt in ['train', 'test', 'valid']:
            for IL in ['images', 'labels']:
                dir = save_dir + '/' + 'coco_flexing' + '/' + tvt + '/' + IL 
                if not os.path.exists(dir):
                    os.makedirs(dir)
                else:
                    print('Folder has already been created')
    except OSError:
        print('Error: Creating directory. ' + save_dir)

In [26]:
createFolder(save_dir=save_path) #바탕화면에 새 폴더 생성->해당 폴더에 TVT나누어 이미지 라벨링 파일 저장 해야함

Folder has already been created
Folder has already been created
Folder has already been created
Folder has already been created
Folder has already been created
Folder has already been created


# 2. 이미지, 라벨링 8:1:1 비율로 분할 진행

In [33]:
def get_tv_idx(tl, p = 0.8, m = 0.5):
    total_idx = range(tl)
    train_idx = sample(total_idx, int(tl * p)) #전체에서 80% 데이터 추출
    temp_idx = set(total_idx) - set(train_idx) #전체 인덱스값에서 트레인 데이터에 해당하는 인덱스 제거
    valid_idx = sample(temp_idx, int(len(temp_idx) * m)) #남은 인덱스값에서 50%추출->전체데이터에서 10%추출
    test_idx = set(temp_idx) - set(valid_idx) #전체에서 10%데이터 추출

    return train_idx, valid_idx, list(test_idx)

# 데이터 분할(index)

In [34]:
train_idx, valid_idx, test_idx = get_tv_idx(len(img_list))

In [35]:
print(len(train_idx))
print(len(valid_idx))
print(len(test_idx))

80
10
10


# TVT별 이미지, 라벨링 데이터 경로 추출

In [38]:
train_img_list = [img_list[idx] for idx in train_idx]
train_lab_list= [lab_list[idx] for idx in train_idx]
valid_img_list = [img_list[idx] for idx in valid_idx]
valid_lab_list=[lab_list[idx] for idx in valid_idx]
test_img_list = [img_list[idx] for idx in test_idx]
test_lab_list= [lab_list[idx] for idx in test_idx]

# COCO dataset 형식 작성

## RetinaNet은 Train, Valid, Test 당 각각 한개의 annotations 파일이 필요함

In [14]:
coco_data = {
    "info": {
        "year": "2023",
        "version": "1.0",
        "description": "MYUNGSUN",
        "contributor": "",
        "url": "MYUNGSUN",
        "date_created": "2022-11-16 09:27:18"
    },
    "licenses": [
        {
            "id": 1,
            "url": "MYUNGSUN",
            "name": "Image of halibut"
        }
    ],
    "categories": [
        {
            "id": 0,
            "name": "background",
            "supercategory": "none"
        },
        {
            "id": 1,
            "name": "halibut",
            "supercategory": "none"
        }
    ],
    "images": [],
    "annotations": []
}

# 이미지 및 객체 ID 변수 초기화
image_id = 0
annotation_id = 0

In [9]:
for label_file in lab_list: 
    with open(label_file, "r",encoding='UTF-8') as f:
        label_data = json.load(f)
    
    # coco_dataset의 images항목 채우기
    image_info=label_data['IMAGE']
    image_info['id']=image_id
    coco_data['images'].append(image_info)
    
    # annotations 항목 채우기 객체 정보 추가
    annotation_info = label_data["ANNOTATION_INFO"]
    for annotation in annotation_info:
        annotation["id"] = annotation_id
        annotation["image_id"] = image_id
        coco_data["annotations"].append(annotation)
        annotation_id+=1
    image_id += 1 #image_id +1씩

In [10]:
print(len(coco_data['images']))
print(len(coco_data['annotations']))

2065
20294


In [11]:
#이미지 라벨링 파일 변경
for image in coco_data['images']:
    image['width']=image.pop('WIDTH')
    image['height']=image.pop('HEIGHT')
    image['file_name']=image.pop('IMAGE_FILE_NAME')

In [14]:
for annotation in coco_data['annotations']:
    if annotation['LIFECYCLE'] == 'LA' and annotation['DISEASES'] == 0:
        annotation['category_id'] = 1
    elif annotation['LIFECYCLE'] == 'LA' and annotation['DISEASES'] == 1:
        annotation['category_id'] = 2
    ### 좌표값 반환
    x=annotation['XTL']
    y=annotation['YTL']
    width=annotation['XBR']-annotation['XTL']
    height=annotation['YBR']-annotation['YTL']
    annotation['bbox']=[x,y,width,height]
    #좌표값 삭제
    annotation.pop('XTL')
    annotation.pop('YTL')
    annotation.pop('XBR')
    annotation.pop('YBR')
    #필요없는 항목 삭제
    annotation.pop('ID')
    annotation.pop('SPECIES')
    annotation.pop('LIFECYCLE')
    annotation.pop('DISEASES')
    annotation.pop('TYPE')
    annotation.pop('CROWDSOURSING_OPERATION_ALTERNATIVE')
    # area 계산
    bbox=annotation['bbox']
    area = bbox[2] * bbox[3]
    annotation['area']=area
    annotation['segmentation']=[]
    annotation['iscrowd']=0
    

In [19]:
output_file = "instances_train.json"
with open(train_path+'/'+'annotations'+'/'+output_file, "w") as f:
    json.dump(coco_data, f)

# Vaild COCO dataset

In [33]:
# 파일 리스트화
val_img_list=[]
val_lab_list=[]

for i in classes:
    val_img=glob.glob(valid_path+'/'+'images'+'/'+i+'/*')
    val_lab=glob.glob(valid_path+'/'+'annotations'+'/'+i+'/*')
    for j in range(len(val_img)):
        val_img[j]=val_img[j].replace('\\','/')
        val_lab[j]=val_lab[j].replace('\\','/')
    val_img_list.extend(val_img)
    val_lab_list.extend(val_lab)

In [34]:
print(len(val_img_list))
print(len(val_lab_list))

2065
2065


In [35]:
coco_data_val = {
    "info": {
        "year": "2023",
        "version": "1.0",
        "description": "Exported from labelon.kr",
        "contributor": "",
        "url": "https://www.labelon.kr",
        "date_created": "2022-11-16 09:27:18"
    },
    "licenses": [
        {
            "id": 1,
            "url": "https://images.labelon.kr",
            "name": "Image of honeybee disease diagnosis"
        }
    ],
    "categories": [
        {
            "id": 0,
            "name": "background",
            "supercategory": "none"
        },
        {
            "id": 1,
            "name": "LA",
            "supercategory": "none"
        },
        {
            "id": 2,
            "name": "disease",
            "supercategory": "none"
        }
    ],
    "images": [],
    "annotations": []
}

# 이미지 및 객체 ID 변수 초기화
image_id = 0
annotation_id = 0

In [36]:
for label_file in val_lab_list:
    with open(label_file, "r",encoding='UTF-8') as f:
        label_data = json.load(f)
    
    # coco_dataset의 images항목 채우기
    image_info=label_data['IMAGE']
    image_info['id']=image_id
    coco_data_val['images'].append(image_info)
    
    # annotations 항목 채우기 객체 정보 추가
    annotation_info = label_data["ANNOTATION_INFO"]
    for annotation in annotation_info:
        annotation["id"] = annotation_id
        annotation["image_id"] = image_id
        coco_data_val["annotations"].append(annotation)
        annotation_id+=1
    image_id += 1 #image_id +1씩

In [37]:
print(len(coco_data_val['images']))
print(len(coco_data_val['annotations']))

2065
20258


In [38]:
#이미지 라벨링 파일 변경
for image in coco_data_val['images']:
    image['width']=image.pop('WIDTH')
    image['height']=image.pop('HEIGHT')
    image['file_name']=image.pop('IMAGE_FILE_NAME')

In [39]:
for annotation in coco_data_val['annotations']:
    if annotation['LIFECYCLE'] == 'LA' and annotation['DISEASES'] == 0:
        annotation['category_id'] = 1
    elif annotation['LIFECYCLE'] == 'LA' and annotation['DISEASES'] == 1:
        annotation['category_id'] = 2
    ### 좌표값 반환
    x=annotation['XTL']
    y=annotation['YTL']
    width=annotation['XBR']-annotation['XTL']
    height=annotation['YBR']-annotation['YTL']
    annotation['bbox']=[x,y,width,height]
    #좌표값 삭제
    annotation.pop('XTL')
    annotation.pop('YTL')
    annotation.pop('XBR')
    annotation.pop('YBR')
    #필요없는 항목 삭제
    annotation.pop('ID')
    annotation.pop('SPECIES')
    annotation.pop('LIFECYCLE')
    annotation.pop('DISEASES')
    annotation.pop('TYPE')
    annotation.pop('CROWDSOURSING_OPERATION_ALTERNATIVE')
    # area 계산
    bbox=annotation['bbox']
    area = bbox[2] * bbox[3]
    annotation['area']=area
    annotation['segmentation']=[]
    annotation['iscrowd']=0

In [40]:
output_file = "instances_val.json"
with open(valid_path+'/'+'annotations'+'/'+output_file, "w") as f:
    json.dump(coco_data_val, f)

# Test COCO build

In [5]:
# 파일 리스트화
test_img_list=[]
test_lab_list=[]

for i in classes:
    test_img=glob.glob(test_path+'/'+'images'+'/'+i+'/*')
    test_lab=glob.glob(test_path+'/'+'annotations'+'/'+i+'/*')
    for j in range(len(test_img)):
        test_img[j]=test_img[j].replace('\\','/')
        test_lab[j]=test_lab[j].replace('\\','/')
    test_img_list.extend(test_img)
    test_lab_list.extend(test_lab)

In [6]:
len(test_img_list)

450

In [7]:
test_img_list

['C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220707_01_053.jpg',
 'C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220707_02_248.jpg',
 'C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220707_05_118.jpg',
 'C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220708_02_245.jpg',
 'C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220712_01_621.jpg',
 'C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220714_02_1206.jpg',
 'C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220714_02_1949.jpg',
 'C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220714_03_0730.jpg',
 'C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220714_03_0966.jpg',
 'C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220714_04_0976.jpg',
 'C:/Users/user/Desktop/bee_disease/test/images/Larva/01_1_R_LA_NA_20220714_05_0092.jpg',
 'C:/Users/user

In [11]:
coco_data_test = {
    "info": {
        "year": "2023",
        "version": "1.0",
        "description": "Exported from labelon.kr",
        "contributor": "",
        "url": "https://www.labelon.kr",
        "date_created": "2022-11-16 09:27:18"
    },
    "licenses": [
        {
            "id": 1,
            "url": "https://images.labelon.kr",
            "name": "Image of honeybee disease diagnosis"
        }
    ],
    "categories": [
        {
            "id": 0,
            "name": "background",
            "supercategory": "none"
        },
        {
            "id": 1,
            "name": "LA",
            "supercategory": "none"
        },
        {
            "id": 2,
            "name": "disease",
            "supercategory": "none"
        }
    ],
    "images": [],
    "annotations": []
}

# 이미지 및 객체 ID 변수 초기화
image_id = 0
annotation_id = 0

In [12]:
for label_file in test_lab_list:
    with open(label_file, "r",encoding='UTF-8') as f:
        label_data = json.load(f)
    
    # coco_dataset의 images항목 채우기
    image_info=label_data['IMAGE']
    image_info['id']=image_id
    coco_data_test['images'].append(image_info)
    
    # annotations 항목 채우기 객체 정보 추가
    annotation_info = label_data["ANNOTATION_INFO"]
    for annotation in annotation_info:
        annotation["id"] = annotation_id
        annotation["image_id"] = image_id
        coco_data_test["annotations"].append(annotation)
        annotation_id+=1
    image_id += 1 #image_id +1씩

In [13]:
coco_data_test

{'info': {'year': '2023',
  'version': '1.0',
  'description': 'Exported from labelon.kr',
  'contributor': '',
  'url': 'https://www.labelon.kr',
  'date_created': '2022-11-16 09:27:18'},
 'licenses': [{'id': 1,
   'url': 'https://images.labelon.kr',
   'name': 'Image of honeybee disease diagnosis'}],
 'categories': [{'id': 0, 'name': 'background', 'supercategory': 'none'},
  {'id': 1, 'name': 'LA', 'supercategory': 'none'},
  {'id': 2, 'name': 'disease', 'supercategory': 'none'}],
 'images': [{'IMAGE_URL': 'https://images.labelon.kr/2022/07/14/7fcea8fc45e541e4b4daf6c2eeaa23e4.jpg',
   'IMAGE_FILE_NAME': '01_1_R_LA_NA_20220707_01_053.jpg',
   'WIDTH': 1920,
   'HEIGHT': 1080,
   'id': 0},
  {'IMAGE_URL': 'https://images.labelon.kr/2022/07/14/07f287cb02cd4e4dad0b8df5283d7848.jpg',
   'IMAGE_FILE_NAME': '01_1_R_LA_NA_20220707_02_248.jpg',
   'WIDTH': 1920,
   'HEIGHT': 1080,
   'id': 1},
  {'IMAGE_URL': 'https://images.labelon.kr/2022/07/14/efe65ae0fb2b48669ba97203b11b4e95.jpg',
   'IMA

In [14]:
#이미지 라벨링 파일 변경
for image in coco_data_test['images']:
    image['width']=image.pop('WIDTH')
    image['height']=image.pop('HEIGHT')
    image['file_name']=image.pop('IMAGE_FILE_NAME')

In [15]:
for annotation in coco_data_test['annotations']:
    if annotation['LIFECYCLE'] == 'LA' and annotation['DISEASES'] == 0:
        annotation['category_id'] = 1
    elif annotation['LIFECYCLE'] == 'LA' and annotation['DISEASES'] == 1:
        annotation['category_id'] = 2
    ### 좌표값 반환
    x=annotation['XTL']
    y=annotation['YTL']
    width=annotation['XBR']-annotation['XTL']
    height=annotation['YBR']-annotation['YTL']
    annotation['bbox']=[x,y,width,height]
    #좌표값 삭제
    annotation.pop('XTL')
    annotation.pop('YTL')
    annotation.pop('XBR')
    annotation.pop('YBR')
    #필요없는 항목 삭제
    annotation.pop('ID')
    annotation.pop('SPECIES')
    annotation.pop('LIFECYCLE')
    annotation.pop('DISEASES')
    annotation.pop('TYPE')
    annotation.pop('CROWDSOURSING_OPERATION_ALTERNATIVE')
    # area 계산
    bbox=annotation['bbox']
    area = bbox[2] * bbox[3]
    annotation['area']=area
    annotation['segmentation']=[]
    annotation['iscrowd']=0

In [16]:
output_file = "instances_test.json"
with open(test_path+'/'+'annotations'+'/'+output_file, "w") as f:
    json.dump(coco_data_test, f)