In [1]:
!pip install pybboxes albumentations==1.3.0 --upgrade typing-extensions torch

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [2]:
import os
from sklearn.model_selection import train_test_split
import shutil
from shutil import copyfile
import numpy as np
from collections import defaultdict
import glob
import random
import cv2
import albumentations as A

In [3]:
def seed_everything(seed=42):
    random.seed(seed) # random
    np.random.seed(seed) # np
    
seed_everything()

In [4]:
DATA_PATH = os.getcwd()
DATA_PATH

'/home/work/KISTI_PCB2'

In [5]:
# ipynb.checkpoint 삭제하는 코드
# rm -rf `find -type d -name .ipynb_checkpoints`

In [6]:
BOXING_TXT = 'BOXING_TXT_1013'
Client_PCB = ['C1_PCB', 'C2_PCB']
zip_list = ['POP_CORN_TXT', 'DELAMINATION_TXT', 'SCRATCH_TXT']

all_files = []

for folder_name in zip_list:
    path_an = f"./{BOXING_TXT}/{folder_name}"
    for path, subdirs, files in os.walk(path_an):
#         print([path, subdirs, files])
        for name in files:
            all_files.append(os.path.join(folder_name, name))

new_files = []
all_labels = []
for file_name in all_files:
    new_name = file_name.replace("SCRATCH_TXT", "BASIC_MATERIAL_SCRATCH_TXT")
    new_files.append(new_name)
    
    if 'SCRATCH' in new_name:
        all_labels.append(0)
    elif 'DELAMINATION' in new_name:
        all_labels.append(1)
    elif 'POP_CORN' in new_name:
        all_labels.append(2)
    else:
        print('no named file')
        break

all_files = new_files
print(len(all_files), len(all_labels))

485 485


# Object Detection에 사용될 데이터셋 생성
- *BADPCB만 사용*
- PCB (중앙집중형 Object Detection에 사용)
- C1_PCB (Client 1 Object Detection에 사용) - 중앙집중형 및 연합학습에 사용
- C2_PCB (Client 2 Object Detection에 사용) - 중앙집중형 및 연합학습에 사용

In [7]:
X_train, X_test, y_train, y_test = train_test_split(all_files, all_labels, test_size = 0.1, stratify=all_labels, random_state=2023)

In [8]:
print('전체 데이터 이미지 수:' , end = ' ')
print(all_labels.count(0), all_labels.count(1), all_labels.count(2))
print('X_train 이미지 수:' , end = ' ')
print(sum(1 for item in X_train if "SCRATCH" in item), sum(1 for item in X_train if "DELAMINATION" in item), sum(1 for item in X_train if "POP_CORN" in item))
print('X_test 이미지 수:' , end = ' ')
print(sum(1 for item in X_test if "SCRATCH" in item), sum(1 for item in X_test if "DELAMINATION" in item), sum(1 for item in X_test if "POP_CORN" in item))

전체 데이터 이미지 수: 79 53 353
X_train 이미지 수: 71 48 317
X_test 이미지 수: 8 5 36


In [9]:
# directory 삭제 함수
def remove_directory(directory_path):
    if os.path.exists(directory_path):
        shutil.rmtree(directory_path)
        print(f"Removed directory: {directory_path}")
    else:
        print(f"Directory does not exist: {directory_path}")

# directory 생성 함수
def create_directory(directory_path):
    os.makedirs(directory_path, exist_ok=True)
    print(f"Created directory: {directory_path}")
    
# 파일 복사 및 이름 변경 함수
def copy_and_rename_files(source_path, destination_path, subdirectory_name):
    for root, _, files in os.walk(source_path):
        for file_name in files:
            source_file_path = os.path.join(root, file_name)
            if os.path.exists(source_file_path):
                new_file_name = file_name.replace("/", "_")  # Replace '/' with '_'
                new_subdirectory_name = subdirectory_name
                if "SCRATCH" in subdirectory_name:
                    new_subdirectory_name = subdirectory_name.replace("SCRATCH", "BASIC_MATERIAL_SCRATCH")
                new_file_name = f"{new_subdirectory_name}_{new_file_name}"  # Add subdirectory name
                destination_file_path = os.path.join(destination_path, new_file_name)
                shutil.copy2(source_file_path, destination_file_path)
                # print(f"Copied {file_name} to {destination_path}{new_file_name}")
            else:
                print(f"{file_name} not found in {source_path}")

# label 복사 함수                
def shutil_labels(index_labels, source_directory, dest_directory):
    for filename in index_labels:
        source_path = os.path.join(source_directory, filename)
        dest_path = os.path.join(dest_directory, filename)

        # 원본 디렉토리에 해당 파일이 있는지 확인 후 복사
        if os.path.exists(source_path):
            shutil.copy(source_path, dest_path)
#             print(f"{source_directory + filename} 복사 완료")
        else:
            print(f"{source_directory + filename} 찾을 수 없음")

# image 복사 함수
def shutil_images(index_images, source_directory, dest_directory):
    for filename in index_images:
        source_path = os.path.join(source_directory, filename)
        dest_path = os.path.join(dest_directory, filename.replace('/', '_'))

        # 원본 디렉토리에 해당 파일이 있는지 확인 후 복사
        if os.path.exists(source_path):
            shutil.copy(source_path, dest_path)
#             print(f"{source_path} 복사 완료")
        else:
            print(f"{source_path} 찾을 수 없음")
            
# 파일 개수 확인
def get_files_count(folder_path):
    dirListing = os.listdir(folder_path)
    return len(dirListing)

## PCB생성

In [10]:
# 디렉토리 삭제 및 생성
remove_directory('./PCB/dataset/images')
remove_directory('./PCB/dataset/labels')
remove_directory('./PCB/train/images')
remove_directory('./PCB/train/labels')
remove_directory('./PCB/valid/images')
remove_directory('./PCB/valid/labels')
remove_directory('./PCB/test/images')
remove_directory('./PCB/test/labels')

create_directory('./PCB/dataset/images')
create_directory('./PCB/dataset/labels')
create_directory('./PCB/train/images')
create_directory('./PCB/train/labels')
create_directory('./PCB/valid/images')
create_directory('./PCB/valid/labels')
create_directory('./PCB/test/images')
create_directory('./PCB/test/labels')

Removed directory: ./PCB/dataset/images
Removed directory: ./PCB/dataset/labels
Removed directory: ./PCB/train/images
Removed directory: ./PCB/train/labels
Removed directory: ./PCB/valid/images
Removed directory: ./PCB/valid/labels
Removed directory: ./PCB/test/images
Removed directory: ./PCB/test/labels
Created directory: ./PCB/dataset/images
Created directory: ./PCB/dataset/labels
Created directory: ./PCB/train/images
Created directory: ./PCB/train/labels
Created directory: ./PCB/valid/images
Created directory: ./PCB/valid/labels
Created directory: ./PCB/test/images
Created directory: ./PCB/test/labels


In [11]:
# dataset/labels에 txt파일 복사
file_paths = [
    f"./{BOXING_TXT}/POP_CORN_TXT/",
    f"./{BOXING_TXT}/DELAMINATION_TXT/",
    f"./{BOXING_TXT}/SCRATCH_TXT/"
]

In [12]:
destination_directory = "./PCB/dataset/labels/"

# 주어진 파일 경로들에 대해 파일 복사 및 이름 변경 수행
for source_path in file_paths:
    subdirectory_name = source_path.split("/")[-2]  # Get the second-to-last part of the path
    copy_and_rename_files(source_path, destination_directory, subdirectory_name[:-4])

In [13]:
train_index, valid_index = train_test_split(X_train, test_size = 0.2, stratify=y_train, random_state=1234)
test_index = X_test

In [14]:
train_index_labels = np.array([filename.replace("_TXT", "").replace('/', '_') for filename in train_index])
valid_index_labels = np.array([filename.replace("_TXT", "").replace('/', '_') for filename in valid_index])
test_index_labels = np.array([filename.replace("_TXT", "").replace('/', '_') for filename in test_index])
train_index_images = np.array([filename.replace('_TXT', '').replace('.txt', '.png') for filename in train_index])
valid_index_images = np.array([filename.replace('_TXT', '').replace('.txt', '.png') for filename in valid_index])
test_index_images = np.array([filename.replace('_TXT', '').replace('.txt', '.png') for filename in test_index])

In [15]:
source_labels_directory = './PCB/dataset/labels/'
train_labels_directory = './PCB/train/labels/'
valid_labels_directory = './PCB/valid/labels/'
test_labels_directory = './PCB/test/labels/'
source_images_directory = './DATA/PCB_DEFECTS_230728/'
train_images_directory = './PCB/train/images/'
valid_images_directory = './PCB/valid/images/'
test_images_directory = './PCB/test/images/'

In [16]:
shutil_labels(train_index_labels, source_labels_directory, train_labels_directory)
shutil_labels(valid_index_labels, source_labels_directory, valid_labels_directory)
shutil_labels(test_index_labels, source_labels_directory, test_labels_directory)
shutil_images(train_index_images, source_images_directory, train_images_directory)
shutil_images(valid_index_images, source_images_directory, valid_images_directory)
shutil_images(test_index_images, source_images_directory, test_images_directory)

In [17]:
if __name__ == "__main__":
    print(get_files_count("./PCB/train/images/"), get_files_count("./PCB/train/labels/"), end = ' ')
    print(get_files_count("./PCB/valid/images/"), get_files_count("./PCB/valid/labels/"), end = ' ')
    print(get_files_count("./PCB/test/images/"), get_files_count("./PCB/test/labels/"))


348 348 88 88 49 49


## C1_PCB, C2_PCB 생성

In [18]:
# test set이 주어진다면 
# C1_files, C2_files, C1_labels, C2_labels = train_test_split(all_files, all_labels, test_size = 0.5, stratify=all_labels, random_state=1234)
C1_files, C2_files, C1_labels, C2_labels = train_test_split(X_train, y_train, test_size = 0.5, stratify=y_train, random_state=1234)

In [19]:
for client in ['C1_PCB', 'C2_PCB']:
    # 디렉토리 삭제 및 생성
    remove_directory(f'./{client}/dataset/images')
    remove_directory(f'./{client}/dataset/labels')
    remove_directory(f'./{client}/train/images')
    remove_directory(f'./{client}/train/labels')
    remove_directory(f'./{client}/valid/images')
    remove_directory(f'./{client}/valid/labels')
    remove_directory(f'./{client}/test/images')
    remove_directory(f'./{client}/test/labels')
    create_directory(f'./{client}/dataset/images')
    create_directory(f'./{client}/dataset/labels')
    create_directory(f'./{client}/train/images')
    create_directory(f'./{client}/train/labels')
    create_directory(f'./{client}/valid/images')
    create_directory(f'./{client}/valid/labels')
    create_directory(f'./{client}/test/images')
    create_directory(f'./{client}/test/labels')
    
    # dataset/labels에 txt파일 복사
    destination_directory = f"./{client}/dataset/labels/"

    # 주어진 파일 경로들에 대해 파일 복사 및 이름 변경 수행
    for source_path in file_paths:
        subdirectory_name = source_path.split("/")[-2]  # Get the second-to-last part of the path
        copy_and_rename_files(source_path, destination_directory, subdirectory_name[:-4])

Removed directory: ./C1_PCB/dataset/images
Removed directory: ./C1_PCB/dataset/labels
Removed directory: ./C1_PCB/train/images
Removed directory: ./C1_PCB/train/labels
Removed directory: ./C1_PCB/valid/images
Removed directory: ./C1_PCB/valid/labels
Removed directory: ./C1_PCB/test/images
Removed directory: ./C1_PCB/test/labels
Created directory: ./C1_PCB/dataset/images
Created directory: ./C1_PCB/dataset/labels
Created directory: ./C1_PCB/train/images
Created directory: ./C1_PCB/train/labels
Created directory: ./C1_PCB/valid/images
Created directory: ./C1_PCB/valid/labels
Created directory: ./C1_PCB/test/images
Created directory: ./C1_PCB/test/labels
Removed directory: ./C2_PCB/dataset/images
Removed directory: ./C2_PCB/dataset/labels
Removed directory: ./C2_PCB/train/images
Removed directory: ./C2_PCB/train/labels
Removed directory: ./C2_PCB/valid/images
Removed directory: ./C2_PCB/valid/labels
Removed directory: ./C2_PCB/test/images
Removed directory: ./C2_PCB/test/labels
Created di

In [20]:
for idx, (files, labels) in enumerate([(C1_files, C1_labels), (C2_files, C2_labels)]):

    train_index, valid_index = train_test_split(files, test_size = 0.2, stratify=labels, random_state=1234)
    test_index = X_test
#     print(len(train_index), len(valid_index), len(test_index))

    train_index_labels = np.array([filename.replace("_TXT", "").replace('/', '_') for filename in train_index])
    valid_index_labels = np.array([filename.replace("_TXT", "").replace('/', '_') for filename in valid_index])
    test_index_labels = np.array([filename.replace("_TXT", "").replace('/', '_') for filename in test_index])
    train_index_images = np.array([filename.replace('_TXT', '').replace('.txt', '.png') for filename in train_index])
    valid_index_images = np.array([filename.replace('_TXT', '').replace('.txt', '.png') for filename in valid_index])
    test_index_images = np.array([filename.replace('_TXT', '').replace('.txt', '.png') for filename in test_index])
    
    # label C1_PCB과 C2_PCB에 분할
    if idx == 0:
        source_labels_directory = './C1_PCB/dataset/labels/'
        train_labels_directory = './C1_PCB/train/labels/'
        valid_labels_directory = './C1_PCB/valid/labels/'
        test_labels_directory = './C1_PCB/test/labels/'
        source_images_directory = './DATA/PCB_DEFECTS_230728/'
        train_images_directory = './C1_PCB/train/images/'
        valid_images_directory = './C1_PCB/valid/images/'
        test_images_directory = './C1_PCB/test/images/'

    else:
        source_labels_directory = './C2_PCB/dataset/labels/'
        train_labels_directory = './C2_PCB/train/labels/'
        valid_labels_directory = './C2_PCB/valid/labels/'
        test_labels_directory = './C2_PCB/test/labels/'
        source_images_directory = './DATA/PCB_DEFECTS_230728/'
        train_images_directory = './C2_PCB/train/images/'
        valid_images_directory = './C2_PCB/valid/images/'
        test_images_directory = './C2_PCB/test/images/'

    shutil_labels(train_index_labels, source_labels_directory, train_labels_directory)
    shutil_labels(valid_index_labels, source_labels_directory, valid_labels_directory)
    shutil_labels(test_index_labels, source_labels_directory, test_labels_directory)
    shutil_images(train_index_images, source_images_directory, train_images_directory)
    shutil_images(valid_index_images, source_images_directory, valid_images_directory)
    shutil_images(test_index_images, source_images_directory, test_images_directory)

In [21]:
if __name__ == "__main__":
    print(get_files_count("./C1_PCB/train/images/"), get_files_count("./C1_PCB/train/labels/"), end = ' ')
    print(get_files_count("./C1_PCB/valid/images/"), get_files_count("./C1_PCB/valid/labels/"), end = ' ')
    print(get_files_count("./C1_PCB/test/images/"), get_files_count("./C1_PCB/test/labels/"))
    
    print(get_files_count("./C2_PCB/train/images/"), get_files_count("./C2_PCB/train/labels/"), end = ' ')
    print(get_files_count("./C2_PCB/valid/images/"), get_files_count("./C2_PCB/valid/labels/"), end = ' ')
    print(get_files_count("./C2_PCB/test/images/"), get_files_count("./C2_PCB/test/labels/"))

174 174 44 44 49 49
174 174 44 44 49 49


# Image Classification에 사용될 데이터 셋 생성
- *기존 PCB, C1_PCB, C2_PCB에 GOODPCB 포함*
- PCB_CLF (중앙집중형 Image classification에 사용)
- PCB_C1_CLF (Client 1 Image classification에 사용)
- PCB_C2_CLF (Client 2 Image classification에 사용)

In [22]:
goodpcb_files = []

path_an = f"./DATA/GOODPCB"
for path, subdirs, files in os.walk(path_an):
#     print([path, subdirs, files])
    for name in files:
        goodpcb_files.append(os.path.join('GOODPCB', name))
        
print(len(goodpcb_files))

90


In [23]:
X_train, X_test = train_test_split(goodpcb_files, test_size = 0.1, random_state=1234)

In [24]:
print(len(X_train), len(X_test))

81 9


## PCB_CLF 생성

In [25]:
remove_directory('./PCB_CLF/train')
remove_directory('./PCB_CLF/test')
create_directory('./PCB_CLF/train')
create_directory('./PCB_CLF/test')

Removed directory: ./PCB_CLF/train
Removed directory: ./PCB_CLF/test
Created directory: ./PCB_CLF/train
Created directory: ./PCB_CLF/test


In [26]:
source_directories = ['./PCB/train/images', './PCB/valid/images', './PCB/test/images']
dest_directories = ['./PCB_CLF/train', './PCB_CLF/train', './PCB_CLF/test']

for source_dir, dest_dir in zip(source_directories, dest_directories):
    # source 디렉토리의 모든 파일을 dest 디렉토리로 복사
    for filename in os.listdir(source_dir):
        if filename.endswith('.png'):
            source_file = os.path.join(source_dir, filename)
            dest_file = os.path.join(dest_dir, filename)
            shutil.copy(source_file, dest_file)

In [27]:
train_index = X_train
test_index = X_test

train_index_images = np.array([filename for filename in train_index])
test_index_images = np.array([filename for filename in test_index])


source_images_directory = './DATA/'
# goodpcb image C1_PCB과 C2_PCB에 분할

train_images_directory = './PCB_CLF/train/'
test_images_directory = './PCB_CLF/test/'

shutil_images(train_index_images, source_images_directory, train_images_directory)
shutil_images(test_index_images, source_images_directory, test_images_directory)

In [28]:
if __name__ == "__main__":
    print(get_files_count("./PCB_CLF/train/"), end = ' ')
    print(get_files_count("./PCB_CLF/test/"))


517 58


## C1_PCB_CLF, C2_PCB_CLF 생성

In [29]:
# test set이 주어진다면 위 행 삭제 후 아래 주석행 실행
# C1_files, C2_files = train_test_split(goodpcb_files, test_size = 0.5, random_state=1234)
C1_files, C2_files = train_test_split(X_train, test_size = 0.5, random_state=1234)

In [30]:
print(len(C1_files), len(C2_files))

40 41


In [31]:
for client in ['C1_PCB_CLF', 'C2_PCB_CLF']:
    # 디렉토리 삭제 및 생성
    remove_directory(f'./{client}/train')
    remove_directory(f'./{client}/test')
    create_directory(f'./{client}/train')
    create_directory(f'./{client}/test')

Removed directory: ./C1_PCB_CLF/train
Removed directory: ./C1_PCB_CLF/test
Created directory: ./C1_PCB_CLF/train
Created directory: ./C1_PCB_CLF/test
Removed directory: ./C2_PCB_CLF/train
Removed directory: ./C2_PCB_CLF/test
Created directory: ./C2_PCB_CLF/train
Created directory: ./C2_PCB_CLF/test


In [32]:
source_directories = ['./C1_PCB/train/images', './C1_PCB/valid/images', './C2_PCB/train/images', './C2_PCB/valid/images', './C1_PCB/test/images', './C2_PCB/test/images']
dest_directories = ['./C1_PCB_CLF/train', './C1_PCB_CLF/train', './C2_PCB_CLF/train', './C2_PCB_CLF/train', './C1_PCB_CLF/test', './C2_PCB_CLF/test']

for source_dir, dest_dir in zip(source_directories, dest_directories):
    # source 디렉토리의 모든 파일을 dest 디렉토리로 복사
    for filename in os.listdir(source_dir):
        if filename.endswith('.png'):
            source_file = os.path.join(source_dir, filename)
            dest_file = os.path.join(dest_dir, filename)
            shutil.copy(source_file, dest_file)

In [33]:
for idx, files in enumerate([C1_files, C2_files]):

    train_index = files
    test_index = X_test
#     print(len(train_index), len(valid_index), len(test_index))

    train_index_images = np.array([filename for filename in train_index])
    test_index_images = np.array([filename for filename in test_index])
    
    
    source_images_directory = './DATA/'
    # goodpcb image C1_PCB과 C2_PCB에 분할
    if idx == 0:
        train_images_directory = './C1_PCB_CLF/train/'
        test_images_directory = './C1_PCB_CLF/test/'

    else:
        train_images_directory = './C2_PCB_CLF/train/'
        test_images_directory = './C2_PCB_CLF/test/'
    
    shutil_images(train_index_images, source_images_directory, train_images_directory)
    shutil_images(test_index_images, source_images_directory, test_images_directory)

In [34]:
if __name__ == "__main__":
    
    
    print(get_files_count("./C1_PCB_CLF/train/"), end = ' ')
    print(get_files_count("./C1_PCB_CLF/test/"))
    
    print(get_files_count("./C2_PCB_CLF/train/"), end =' ')
    print(get_files_count("./C2_PCB_CLF/test/"))

258 58
259 58


In [35]:
# remove_directory(f'./C1_PCB/dataset/')
# remove_directory(f'./C2_PCB/dataset/')

In [36]:
if __name__ == "__main__":
    print(get_files_count("./PCB_CLF/train/"), end = ' ')
    print(get_files_count("./PCB_CLF/test/"))
    
    print(get_files_count("./C1_PCB_CLF/train/"), end = ' ')
    print(get_files_count("./C1_PCB_CLF/test/"))
    
    print(get_files_count("./C2_PCB_CLF/train/"), end =' ')
    print(get_files_count("./C2_PCB_CLF/test/"))

    print(get_files_count("./PCB/train/images/"), get_files_count("./PCB/train/labels/"), end = ' ')
    print(get_files_count("./PCB/valid/images/"), get_files_count("./PCB/valid/labels/"), end = ' ')
    print(get_files_count("./PCB/test/images/"), get_files_count("./PCB/test/labels/"))
    
    print(get_files_count("./C1_PCB/train/images/"), get_files_count("./C1_PCB/train/labels/"), end = ' ')
    print(get_files_count("./C1_PCB/valid/images/"), get_files_count("./C1_PCB/valid/labels/"), end = ' ')
    print(get_files_count("./C1_PCB/test/images/"), get_files_count("./C1_PCB/test/labels/"))
    
    print(get_files_count("./C2_PCB/train/images/"), get_files_count("./C2_PCB/train/labels/"), end = ' ')
    print(get_files_count("./C2_PCB/valid/images/"), get_files_count("./C2_PCB/valid/labels/"), end = ' ')
    print(get_files_count("./C2_PCB/test/images/"), get_files_count("./C2_PCB/test/labels/"))

517 58
258 58
259 58
348 348 88 88 49 49
174 174 44 44 49 49
174 174 44 44 49 49


# 라벨 개수 확인

In [37]:
# 라벨 개수
def label_counts(dir):
    # 경로 설정
    label_dir = dir
    file_pattern = "*.txt"

    # 각 숫자별 이름을 저장하는 딕셔너리 초기화
    number_names = {
        0: 'SCRATCH_1',
        1: 'DELAMINATION',
        2: 'POP_CORN_1',
        3: 'POP_CORN_2',
        4: 'others',
        5: 'others',
        6: 'others',
        7: 'others',
        8: 'others'
    }

    # 각 숫자별 개수를 저장할 딕셔너리 초기화
    number_counts = defaultdict(int)

    # 지정된 디렉토리에서 모든 txt 파일 찾기
    file_paths = glob.glob(os.path.join(label_dir, file_pattern))

    # 각 파일을 읽어서 첫 번째 숫자 개수 세기
    for file_path in file_paths:
        with open(file_path, 'r') as file:
            lines = file.readlines()
            for line in lines:
                parts = line.split()
                if parts:  # 비어있지 않은 줄만 처리
                    try:
                        number = int(float(parts[0]))  # 소수점 이하를 버리고 정수로 변환
                        if 0 <= number <= len(number_names):  # 0부터 5 사이의 숫자만 고려
                            number_counts[number] += 1
                    except ValueError:
                        pass

    # 숫자별 개수를 정렬하여 출력
    sorted_counts = sorted(number_counts.items(), key=lambda x: x[0])
    for number, count in sorted_counts:
        name = number_names[number]
        print(f"{name}: {count}개")

    # 총계 계산 및 출력
    total_count = sum(count for _, count in sorted_counts)
    print(f"총계: {total_count}개")

In [38]:
print('Train')
label_counts("./PCB/train/labels/")
print('\nValid')
label_counts("./PCB/valid/labels/")
print('\nTest')
label_counts("./PCB/test/labels/")

Train
SCRATCH_1: 749개
DELAMINATION: 130개
POP_CORN_1: 364개
POP_CORN_2: 30개
총계: 1273개

Valid
SCRATCH_1: 144개
DELAMINATION: 33개
POP_CORN_1: 92개
POP_CORN_2: 5개
총계: 274개

Test
SCRATCH_1: 101개
DELAMINATION: 22개
POP_CORN_1: 49개
POP_CORN_2: 5개
총계: 177개


In [39]:
print('Train')
label_counts("./C1_PCB/train/labels/")
print('\nValid')
label_counts("./C1_PCB/valid/labels/")
print('\nTest')
label_counts("./C1_PCB/test/labels/")

Train
SCRATCH_1: 372개
DELAMINATION: 75개
POP_CORN_1: 188개
POP_CORN_2: 9개
총계: 644개

Valid
SCRATCH_1: 103개
DELAMINATION: 15개
POP_CORN_1: 57개
POP_CORN_2: 6개
총계: 181개

Test
SCRATCH_1: 101개
DELAMINATION: 22개
POP_CORN_1: 49개
POP_CORN_2: 5개
총계: 177개


In [40]:
print('Train')
label_counts("./C2_PCB/train/labels/")
print('\nValid')
label_counts("./C2_PCB/valid/labels/")
print('\nTest')
label_counts("./C2_PCB/test/labels/")

Train
SCRATCH_1: 302개
DELAMINATION: 54개
POP_CORN_1: 175개
POP_CORN_2: 17개
총계: 548개

Valid
SCRATCH_1: 116개
DELAMINATION: 19개
POP_CORN_1: 36개
POP_CORN_2: 3개
총계: 174개

Test
SCRATCH_1: 101개
DELAMINATION: 22개
POP_CORN_1: 49개
POP_CORN_2: 5개
총계: 177개


# PCB_CLF GOODPCB 증강

In [41]:
# 입력 폴더와 출력 폴더를 동일하게 설정합니다.
input_output_folder = ["./PCB_CLF/train/", "./C1_PCB_CLF/train/", "./C2_PCB_CLF/train/"]

transform = A.Compose([
    A.Resize(416, 416),
    A.RandomCrop(width=256, height=256, p=1),
    A.RandomRotate90(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Resize(416, 416),

])

for i_o_folder in input_output_folder:
    # 입력 폴더의 이미지 파일을 반복하며 증강 및 저장합니다.
    for filename in os.listdir(i_o_folder):
        if filename.endswith(".png") and "GOODPCB" in filename:
            image_path = os.path.join(i_o_folder, filename)
            image = cv2.imread(image_path)

            # 이미지를 5회 증강합니다.
            for i in range(5):
                augmented = transform(image=image)
                augmented_image = augmented["image"]

                # 저장할 파일 이름을 생성합니다.
                new_filename = filename.replace(".png", f"_AUG_{i}.png")
                output_path = os.path.join(i_o_folder, new_filename)

                # 이미지를 덮어쓰기 형식으로 저장합니다.
                cv2.imwrite(output_path, augmented_image)
                print(f"저장 완료: {output_path}")

저장 완료: ./PCB_CLF/train/GOODPCB_56_AUG_0.png
저장 완료: ./PCB_CLF/train/GOODPCB_56_AUG_1.png
저장 완료: ./PCB_CLF/train/GOODPCB_56_AUG_2.png
저장 완료: ./PCB_CLF/train/GOODPCB_56_AUG_3.png
저장 완료: ./PCB_CLF/train/GOODPCB_56_AUG_4.png
저장 완료: ./PCB_CLF/train/GOODPCB_64_AUG_0.png
저장 완료: ./PCB_CLF/train/GOODPCB_64_AUG_1.png
저장 완료: ./PCB_CLF/train/GOODPCB_64_AUG_2.png
저장 완료: ./PCB_CLF/train/GOODPCB_64_AUG_3.png
저장 완료: ./PCB_CLF/train/GOODPCB_64_AUG_4.png
저장 완료: ./PCB_CLF/train/GOODPCB_74_AUG_0.png
저장 완료: ./PCB_CLF/train/GOODPCB_74_AUG_1.png
저장 완료: ./PCB_CLF/train/GOODPCB_74_AUG_2.png
저장 완료: ./PCB_CLF/train/GOODPCB_74_AUG_3.png
저장 완료: ./PCB_CLF/train/GOODPCB_74_AUG_4.png
저장 완료: ./PCB_CLF/train/GOODPCB_16_AUG_0.png
저장 완료: ./PCB_CLF/train/GOODPCB_16_AUG_1.png
저장 완료: ./PCB_CLF/train/GOODPCB_16_AUG_2.png
저장 완료: ./PCB_CLF/train/GOODPCB_16_AUG_3.png
저장 완료: ./PCB_CLF/train/GOODPCB_16_AUG_4.png
저장 완료: ./PCB_CLF/train/GOODPCB_14_AUG_0.png
저장 완료: ./PCB_CLF/train/GOODPCB_14_AUG_1.png
저장 완료: ./PCB_CLF/train/GOODPCB_1

In [42]:
print(get_files_count("./PCB_CLF/train/"))
print(get_files_count("./C1_PCB_CLF/train/"))
print(get_files_count("./C2_PCB_CLF/train/"))

922
458
464


In [43]:
import os
import matplotlib.pyplot as plt

# 디렉토리 경로 설정
directory = ["./PCB_CLF/train/", "./C1_PCB_CLF/train/", "./C2_PCB_CLF/train/"]

for direc in directory:
    # 디렉토리 내의 파일 목록 가져오기
    file_list = os.listdir(direc)

    # "GOODPCB" 이름이 포함된 파일 개수 세기
    goodpcb_file_count = sum(1 for file in file_list if "GOODPCB" in file)

    # 그 외 파일 개수 계산
    total_file_count = len(file_list) - goodpcb_file_count

    # 그래프 그리기
    labels = ['GOODPCB', 'Other']
    file_counts = [goodpcb_file_count, total_file_count]
    print(labels, file_counts)

['GOODPCB', 'Other'] [486, 436]
['GOODPCB', 'Other'] [240, 218]
['GOODPCB', 'Other'] [246, 218]
