In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import json
from pathlib import Path
import numpy as np
import tensorflow as tf
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer
from PIL import Image
from keras import models, layers
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [None]:
# JSON 파일들이 있는 디렉토리 경로
json_directory_path = '/content/drive/MyDrive/ColabNotebooks/2023/opensource/trash_label/'

# 이미지 파일들이 있는 디렉토리 경로
image_directory_path = '/content/drive/MyDrive/ColabNotebooks/2023/opensource/trash_image/'

In [None]:
# 이미지와 라벨을 저장할 리스트
images = []
labels = []

# 지정된 디렉토리에서 JSON 파일을 읽어오기 위한 파일 리스트 생성
data_size = 1000
json_files = list(Path(json_directory_path).rglob('*.json'))[:data_size]

for json_file in json_files:
    with open(json_file, 'r') as f:
        data = json.load(f)

        # 이미지 경로
        image_path = Path(image_directory_path) / data['imagePath']

        # 이미지 불러오기
        image = np.array(Image.open(image_path))

        # 라벨 정보 추출
        shapes = data.get('shapes', [])
        for shape in shapes:
            label = shape['label']
            points = shape['points']

            images.append(image)
            labels.append(label)

In [None]:
# 이미지와 라벨을 넘파이 배열로 변환
images = np.array(images)
labels = np.array(labels)

# 라벨을 정수로 변환 (라벨 인코딩)
label_binarizer = LabelBinarizer()
labels_encoded = label_binarizer.fit_transform(labels)

# 데이터 분할
train_images, test_images, train_labels, test_labels = train_test_split(images, labels_encoded, test_size=0.2, random_state=42)

  images = np.array(images)


In [None]:
# 이미지 크기 조정
target_image_size = (224, 224)
train_images_resized = [tf.image.resize(image, target_image_size) for image in train_images]
test_images_resized = [tf.image.resize(image, target_image_size) for image in test_images]

In [None]:
# 이미지를 TensorFlow 텐서로 변환
train_images_tensor = tf.convert_to_tensor(train_images_resized, dtype=tf.float32)
test_images_tensor = tf.convert_to_tensor(test_images_resized, dtype=tf.float32)

# 라벨을 TensorFlow 텐서로 변환
train_labels_tensor = tf.convert_to_tensor(train_labels, dtype=tf.float32)
test_labels_tensor = tf.convert_to_tensor(test_labels, dtype=tf.float32)

In [None]:
# CNN 모델 생성
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(target_image_size[0], target_image_size[1], 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(len(label_binarizer.classes_), activation='softmax'))

# 모델 컴파일
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# 모델 훈련
epochs = 5
batch_size = 16

history = model.fit(
    train_images_tensor, train_labels_tensor,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(test_images_tensor, test_labels_tensor)
)

In [None]:
# 정확도 확인
test_loss, test_accuracy = model.evaluate(test_images_np, test_labels)
print('\n-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*')
print(f'Dataset Size: {data_size}')
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')
print('-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*')