# STEP BY STEP

1. train.txt / val.txt 생성 -> ImageSets/Main으로 이동
2. 이미지와 라벨 하나로 통합
3. Annotations xml파일 생성
4. label 이름 파일 만들어주기 (dataset 아래.names)

In [2]:
import os
import json
import random
import shutil
import tqdm
import cv2

# uniform먼저 실행 후 natural로 변경하기

# 1. train/val .txt 생성

In [4]:
# 경로 설정
root_path = './dataset/Trash_data/uniform_data'
trainfile_path = os.path.join(root_path,'train','labels')
validfile_path = os.path.join(root_path,'val','labels')

# 경로에 있는 파일 확장자 제거 후 리스트로 저장
train_list = [os.path.splitext(f)[0] for f in os.listdir(trainfile_path)]
valid_list = [os.path.splitext(f)[0] for f in os.listdir(validfile_path)]

# 각각 train.txt / val.txt에 한줄 씩 저장
for file_list , file_txt in zip([train_list,valid_list],['train.txt','val.txt']):
    with open(os.path.join(root_path,'ImageSets','Main',file_txt), 'w') as file:
        for obj in tqdm.tqdm(file_list):
            file.write(''.join(map(str, obj)) + '\n')

100%|██████████| 25935/25935 [00:00<00:00, 632459.70it/s]
100%|██████████| 6527/6527 [00:00<00:00, 543362.29it/s]


# 2.이미지와 라벨 통합

In [5]:
# 경로 설정
root_path = './dataset/Trash_data/uniform_data'

# 반복문으로 이미지 파일 , 라벨링 파일 옮기기
for mode_il in ['images','labels']:
    for mode_tv in ['train','val']:
        file_path = os.path.join(root_path,mode_tv,mode_il)
        file_list = os.listdir(file_path)
        to_path = "JPEGImages" if mode_il=='images' else "labels"
        for file_name in tqdm.tqdm(file_list):
            shutil.move(os.path.join(root_path,mode_tv,mode_il,file_name),os.path.join(root_path,to_path))

100%|██████████| 25935/25935 [00:28<00:00, 899.10it/s] 
100%|██████████| 6527/6527 [00:07<00:00, 917.92it/s] 
100%|██████████| 25935/25935 [00:24<00:00, 1044.35it/s]
100%|██████████| 6527/6527 [00:06<00:00, 1073.81it/s]


# 3. xml파일 생성

In [18]:
# xml파일 생성을 위한 라이브러리
import xml.etree.ElementTree as ET
from xml.dom import minidom

# 경로 설정
root_path = './dataset/Trash_data/uniform_data'
image_path = os.path.join(root_path,'JPEGImages')
label_path = os.path.join(root_path,'labels')
to_xml_path = os.path.join(root_path,'Annotations')

# xml 트리 형식 반환
def create_xml_annotation(image_path, image_filename, width, height, depth, objects):
    annotation = ET.Element('annotation')
    folder = ET.SubElement(annotation, 'folder')
    filename = ET.SubElement(annotation, 'filename')
    path = ET.SubElement(annotation, 'path')
    source = ET.SubElement(annotation, 'source')
    database = ET.SubElement(source, 'database')
    size = ET.SubElement(annotation, 'size')
    width_elem = ET.SubElement(size, 'width')
    height_elem = ET.SubElement(size, 'height')
    depth_elem = ET.SubElement(size, 'depth')
    segmented = ET.SubElement(annotation, 'segmented')

    folder.text = os.path.dirname(image_path)
    filename.text = image_filename
    path.text = image_path
    database.text = 'Unknown'
    width_elem.text = str(width)
    height_elem.text = str(height)
    depth_elem.text = str(depth)
    segmented.text = '0'

    for line in objects:
        object_elem = ET.SubElement(annotation, 'object')
        name = ET.SubElement(object_elem, 'name')
        pose = ET.SubElement(object_elem, 'pose')
        truncated = ET.SubElement(object_elem, 'truncated')
        difficult = ET.SubElement(object_elem, 'difficult')
        bndbox = ET.SubElement(object_elem, 'bndbox')
        xmin = ET.SubElement(bndbox, 'xmin')
        ymin = ET.SubElement(bndbox, 'ymin')
        xmax = ET.SubElement(bndbox, 'xmax')
        ymax = ET.SubElement(bndbox, 'ymax')

        values = list(map(float, line.split(" ")))
        xmin_v = (values[1] - values[3]/2)*width
        xmax_v = (values[1] + values[3]/2)*width
        ymin_v = (values[2] - values[4]/2)*height
        ymax_v = (values[2] + values[4]/2)*height

        name.text = str(int(values[0]))
        pose.text = 'Unspecified'
        truncated.text = '0'
        difficult.text = '0'
        xmin.text = str(int(xmin_v))
        ymin.text = str(int(ymin_v))
        xmax.text = str(int(xmax_v))
        ymax.text = str(int(ymax_v))

    return annotation

# xml파일 생성
label_list = os.listdir(label_path)
for label_name in tqdm.tqdm(label_list):
    with open(os.path.join(label_path,label_name), "r") as file:
        objects = file.readlines()
    file_name = os.path.splitext(label_name)[0]
    image_name = file_name + '.jpg'
    annotation = create_xml_annotation(os.path.join(image_path,image_name), image_name, 640, 640, 3, objects)
    xml_string = ET.tostring(annotation, encoding='unicode')
    xml_pretty = minidom.parseString(xml_string).toprettyxml(indent="\t")
    xml_pretty = "\n".join(xml_pretty.split("\n")[1:])
    with open(os.path.join(to_xml_path,file_name+'.xml'), 'w') as xml_file:
        xml_file.write(xml_pretty)

100%|██████████| 32462/32462 [03:06<00:00, 173.62it/s]


# 모델 학습

In [2]:
!python train.py --model-type d1  --use-pretrain True --dataset-type voc --dataset dataset/Trash_data --num-classes 29 --class-names dataset/trash.names --voc-train-set uniform_data,train --voc-val-set uniform_data,val  --epochs 1 --batch-size 16 --augment ssd_random_crop 

Tensorboard engine is running at http://localhost:6006/
loading dataset...
creating index...
index created!
lr: 1e-06


2023-07-19 17:27:34.066005: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE SSE2 SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  ag__.for_stmt(ag__.ld(self).updates, None, loop_body, get_state, set_state, (), {'iterate_names': 'u'})
  ag__.for_stmt(ag__.ld(self).updates, None, loop_body, get_state, set_state, (), {'iterate_names': 'u'})
  for u in self.updates:
'rm'��(��) ���� �Ǵ� �ܺ� ����, ������ �� �ִ� ���α׷�, �Ǵ�
��ġ ������ �ƴմϴ�.

  0%|          | 0/408 [00:00<?, ?it/s]
  0%|          | 1/408 [00:00<03:24,  1.99it/s]
  0%|          | 2/408 [00:01<03:28,  1.95it/s]
  1%|          | 3/408 [00:01<03:21,  2.01it/s]
  1%|          | 4/408 [00:01<03:18,  2.04it/s]
  1%|          | 5/408 [00:02<03:08,  2.14it/s]
  1%|▏         | 6/408 [00:02<03:07,  2.15it/s]
  2%|▏         |

In [1]:
!python train.py --model-type d0  --use-pretrain True --dataset-type voc --dataset dataset/pothole_voc --num-classes 1 --class-names dataset/pothole.names --voc-train-set dataset_1,train --voc-val-set dataset_1,val  --epochs 1 --batch-size 8 --augment ssd_random_crop 

height : 16 / width : 16
height : <class 'int'> / width : <class 'int'>
height : 8 / width : 8
height : <class 'int'> / width : <class 'int'>
height : 16 / width : 16
height : <class 'int'> / width : <class 'int'>
height : 8 / width : 8
height : <class 'int'> / width : <class 'int'>
height : 16 / width : 16
height : <class 'int'> / width : <class 'int'>
height : 8 / width : 8
height : <class 'int'> / width : <class 'int'>
height : 16 / width : 16
height : <class 'int'> / width : <class 'int'>
height : 8 / width : 8
height : <class 'int'> / width : <class 'int'>
Tensorboard engine is running at http://localhost:6006/
loading dataset...
creating index...
index created!
lr: 1e-06
height : None / width : None
height : <class 'NoneType'> / width : <class 'NoneType'>


2023-07-19 19:49:44.565726: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE SSE2 SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  ag__.for_stmt(ag__.ld(self).updates, None, loop_body, get_state, set_state, (), {'iterate_names': 'u'})
  ag__.for_stmt(ag__.ld(self).updates, None, loop_body, get_state, set_state, (), {'iterate_names': 'u'})
  for u in self.updates:
'rm'��(��) ���� �Ǵ� �ܺ� ����, ������ �� �ִ� ���α׷�, �Ǵ�
��ġ ������ �ƴմϴ�.

  0%|          | 0/17 [00:00<?, ?it/s]
  6%|▌         | 1/17 [00:00<00:03,  5.14it/s]
 12%|█▏        | 2/17 [00:00<00:02,  5.22it/s]
 18%|█▊        | 3/17 [00:00<00:02,  5.70it/s]
 24%|██▎       | 4/17 [00:00<00:02,  6.28it/s]
 29%|██▉       | 5/17 [00:00<00:01,  6.21it/s]
 35%|███▌      | 6/17 [00:00<00:01,  6.60it/s]
 41%|████      | 7/17 [