In [1]:
import json
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
import pickle

In [2]:
train_image_dir = '재활용품_분류_및_선별_데이터/01-1.정식개방데이터/Validation/01.원천데이터/VS_1.영상추출_01.금속캔_001.철캔'
train_label_dir = '재활용품_분류_및_선별_데이터/01-1.정식개방데이터/Validation/02.라벨링데이터/VL_1.영상추출_01.금속캔_001.철캔'
img_height, img_width = 128, 128  # 원하는 이미지 크기

In [4]:
# 디렉토리 내의 JSON 파일 탐색
for json_file in os.listdir(train_label_dir):
    if json_file.endswith('.json'):
        json_path = os.path.join(train_label_dir, json_file)
        
        # JSON 파일에서 라벨 데이터 읽기
        with open(json_path, encoding='utf-8') as f:
            data = json.load(f)
        
        img_file = data['IMAGE_INFO']['FILE_NAME']
        img_path = os.path.join(train_image_dir, img_file)
        
        # 이미지 파일이 실제로 존재하는지 확인
        if os.path.exists(img_path) and img_file.endswith('.jpg'):
            images.append(img_path)
            image_labels.append(data['ANNOTATION_INFO'])
        else:
            print(f"Image file {img_path} not found or not a jpg file.")

In [5]:
print(f"Number of images: {len(images)}")
print(f"Number of labels: {len(image_labels)}")

Number of images: 3013
Number of labels: 3013


In [6]:
train_images = []
for img_path in images:
    img = load_img(img_path, target_size=(img_height, img_width))
    img_array = img_to_array(img)
    img_array = img_array / 255.0  # 픽셀 값을 [0, 1] 범위로 정규화
    train_images.append(img_array)
# 리스트를 넘파이 배열로 변환
train_images = np.array(train_images)

In [8]:
class_labels = [label_dict[0]['CLASS'] for label_dict in image_labels]
details_labels = [label_dict[0]['DETAILS'] for label_dict in image_labels]
damage_labels = [label_dict[0]['DAMAGE'] for label_dict in image_labels]
dirtiness_labels = [label_dict[0]['DIRTINESS'] for label_dict in image_labels]
cover_labels = [label_dict[0]['COVER'] for label_dict in image_labels]
transparency_labels = [label_dict[0]['TRANSPARENCY'] for label_dict in image_labels]
shape_labels = [label_dict[0]['SHAPE'] for label_dict in image_labels]
shape_type_labels = [label_dict[0]['SHAPE_TYPE'] for label_dict in image_labels]


def encode_labels(labels):
    unique_labels = set(labels)
    label_to_int = {label: i for i, label in enumerate(unique_labels)}
    integer_labels = [label_to_int[label] for label in labels]
    return to_categorical(integer_labels, num_classes=len(unique_labels))

class_labels_enc = encode_labels(class_labels)
details_labels_enc = encode_labels(details_labels)
damage_labels_enc = encode_labels(damage_labels)
dirtiness_labels_enc = encode_labels(dirtiness_labels)
cover_labels_enc = encode_labels(cover_labels)
transparency_labels_enc = encode_labels(transparency_labels)
shape_labels_enc = encode_labels(shape_labels)
shape_type_labels_enc = encode_labels(shape_type_labels)

In [13]:
(train_images, val_images, 
 train_class_labels, val_class_labels, 
 train_details_labels, val_details_labels, 
 train_damage_labels, val_damage_labels, 
 train_dirtiness_labels, val_dirtiness_labels, 
 train_cover_labels, val_cover_labels, 
 train_transparency_labels, val_transparency_labels, 
 train_shape_labels, val_shape_labels, 
 train_shape_type_labels, val_shape_type_labels) = train_test_split(
    train_images, 
    class_labels_enc, 
    details_labels_enc, 
    damage_labels_enc, 
    dirtiness_labels_enc, 
    cover_labels_enc, 
    transparency_labels_enc, 
    shape_labels_enc, 
    shape_type_labels_enc, 
    test_size=0.2, 
    random_state=42,
    stratify=class_labels  # 클래스 불균형 완화를 위해 stratify 사용
)

In [14]:
# 모델 컴파일
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [15]:
# 모델 훈련
history = model.fit(
    train_images, 
    {
        'class_output': train_class_labels,
        'details_output': train_details_labels,
        'damage_output': train_damage_labels,
        'dirtiness_output': train_dirtiness_labels,
        'cover_output': train_cover_labels,
        'transparency_output': train_transparency_labels,
        'shape_output': train_shape_labels,
        'shape_type_output': train_shape_type_labels
    },
    epochs=10,  # 에포크 수는 필요에 따라 조정
    validation_data=(
        val_images, 
        {
            'class_output': val_class_labels,
            'details_output': val_details_labels,
            'damage_output': val_damage_labels,
            'dirtiness_output': val_dirtiness_labels,
            'cover_output': val_cover_labels,
            'transparency_output': val_transparency_labels,
            'shape_output': val_shape_labels,
            'shape_type_output': val_shape_type_labels
        }
    )
)


Epoch 1/10


ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 1), output.shape=(None, 2)