In [84]:
## 1. Mass image data pre-classification ##
##    1) Category : [57, 113, 227, 228, 229, 455]
##    2) Result : [57: 60, 113: 64, 227~229(=228): 170, 455: 64]

import os
import shutil


def move_files_with_pattern(source_folder, target_folder, pattern):
    
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)
        print(f"Created folder: {target_folder}")
    
    
    for file_name in os.listdir(source_folder):
        if pattern in file_name:
            source_path = os.path.join(source_folder, file_name)
            target_path = os.path.join(target_folder, file_name)
            shutil.move(source_path, target_path)
            print(f"Moved: {file_name} to {target_folder}")

# Image folder paths
source_folder = r"C:\Users\SAMSUNG\OneDrive\Desktop\image\image"  # Before Classification
target_folder = r"C:\Users\SAMSUNG\OneDrive\Desktop\image\228"  # After Classification
pattern = "229.0"  # mass classification target

# Execute
move_files_with_pattern(source_folder, target_folder, pattern)

Moved: LPMS_B_V111_R_B_30.0_229.0_4.8G_240910142214553.png to C:\Users\SAMSUNG\OneDrive\Desktop\image\228
Moved: LPMS_B_V111_R_B_30.0_229.0_5.9G_240910142136523.png to C:\Users\SAMSUNG\OneDrive\Desktop\image\228
Moved: LPMS_B_V111_R_B_30.0_229.0_6.0G_240910142206553.png to C:\Users\SAMSUNG\OneDrive\Desktop\image\228
Moved: LPMS_B_V111_R_B_30.0_229.0_6.2G_240910142210563.png to C:\Users\SAMSUNG\OneDrive\Desktop\image\228
Moved: LPMS_B_V111_R_B_30.0_229.0_7.4G_240910142218563.png to C:\Users\SAMSUNG\OneDrive\Desktop\image\228
Moved: LPMS_I_V111_T_B_30.0_229.0_7.4G_240910142358523.png to C:\Users\SAMSUNG\OneDrive\Desktop\image\228
Moved: LPMS_I_V111_T_B_30.0_229.0_7.6G_240910142344523.png to C:\Users\SAMSUNG\OneDrive\Desktop\image\228
Moved: LPMS_I_V111_T_B_30.0_229.0_7.7G_240910142318553.png to C:\Users\SAMSUNG\OneDrive\Desktop\image\228
Moved: LPMS_I_V111_T_B_30.0_229.0_7.7G_240910142333533.png to C:\Users\SAMSUNG\OneDrive\Desktop\image\228
Moved: LPMS_I_V111_T_B_30.0_229.0_8.0G_2409101

In [86]:
import os
import shutil
import random


def split_dataset(source_folder, train_folder, val_folder, test_folder, train_ratio=0.8, val_ratio=0.1):
    # train, validation, test 폴더 생성
    for folder in [train_folder, val_folder, test_folder]:
        if not os.path.exists(folder):
            os.makedirs(folder)
            print(f"Created folder: {folder}")

    # 질량별 폴더 처리
    for mass_folder in os.listdir(source_folder):
        mass_path = os.path.join(source_folder, mass_folder)
        if os.path.isdir(mass_path):  # 질량별 폴더인지 확인
            images = [f for f in os.listdir(mass_path) if os.path.isfile(os.path.join(mass_path, f))]
            random.shuffle(images)  # 이미지를 랜덤으로 섞음

            # 데이터 분할
            train_count = int(len(images) * train_ratio)
            val_count = int(len(images) * val_ratio)
            train_images = images[:train_count]
            val_images = images[train_count:train_count + val_count]
            test_images = images[train_count + val_count:]

            # Train 데이터 이동 (질량별 폴더 유지)
            target_class_folder = os.path.join(train_folder, mass_folder)
            if not os.path.exists(target_class_folder):
                os.makedirs(target_class_folder)
            for file in train_images:
                src_path = os.path.join(mass_path, file)
                dst_path = os.path.join(target_class_folder, file)
                shutil.move(src_path, dst_path)

            # Validation 데이터 이동 (폴더 없이 섞임)
            for file in val_images:
                src_path = os.path.join(mass_path, file)
                dst_path = os.path.join(val_folder, file)
                shutil.move(src_path, dst_path)

            # Test 데이터 이동 (폴더 없이 섞임)
            for file in test_images:
                src_path = os.path.join(mass_path, file)
                dst_path = os.path.join(test_folder, file)
                shutil.move(src_path, dst_path)

            print(f"Split {mass_folder}: {len(train_images)} train, {len(val_images)} val, {len(test_images)} test")


# 경로 설정
source_folder = r"C:\Users\SAMSUNG\OneDrive\Desktop\image"  # 질량별 데이터가 있는 폴더
train_folder = r"C:\Users\SAMSUNG\OneDrive\Desktop\image\train"  # Train 데이터 폴더
val_folder = r"C:\Users\SAMSUNG\OneDrive\Desktop\image\validation"  # Validation 데이터 폴더
test_folder = r"C:\Users\SAMSUNG\OneDrive\Desktop\image\test"  # Test 데이터 폴더

# 함수 실행
split_dataset(source_folder, train_folder, val_folder, test_folder, train_ratio=0.8, val_ratio=0.1)


Created folder: C:\Users\SAMSUNG\OneDrive\Desktop\image\train
Created folder: C:\Users\SAMSUNG\OneDrive\Desktop\image\validation
Created folder: C:\Users\SAMSUNG\OneDrive\Desktop\image\test
Split 113: 51 train, 6 val, 7 test
Split 228: 136 train, 17 val, 17 test
Split 455: 51 train, 6 val, 7 test
Split 57: 48 train, 6 val, 6 test
Split test: 29 train, 3 val, 5 test
Split train: 0 train, 0 val, 0 test
Split validation: 30 train, 3 val, 5 test


In [None]:
## 3. Training / Evaluation ##

In [66]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
import matplotlib.pyplot as plt

In [67]:
##  3.2. Path setting

train_dir = r"C:\Users\SAMSUNG\OneDrive\Desktop\image\train"
test_dir = r"C:\Users\SAMSUNG\OneDrive\Desktop\image\test"
model_save_path = "efficientnet_model.keras"

In [68]:
##  3.3. Hyperparameter setting

batch_size = 8
img_size = (224, 224)  # EfficientNet 기본 입력 크기
epochs = 20
learning_rate = 0.0001  # 학습률 줄임

In [69]:
##  3.4. Data load & augmentation

train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # 픽셀 값을 0~1로 정규화
    rotation_range=15,  # 약간의 회전 허용
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

# Test 데이터 로드
test_datagen = ImageDataGenerator(rescale=1.0/255)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode=None,
    shuffle=False
)

Found 323 images belonging to 4 classes.
Found 0 images belonging to 0 classes.


In [70]:
## 3.5. Model justification (Transfer Learning)

# 모델 정의 (EfficientNetB0)
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(256, activation='relu')(x)
predictions = Dense(train_generator.num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Base 모델 상위 레이어만 훈련 가능하도록 설정
for layer in base_model.layers[-20:]:  # 마지막 20개 레이어만 훈련 가능
    layer.trainable = True

# 모델 컴파일
model.compile(optimizer=Adam(learning_rate=learning_rate),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# 콜백 설정 (최고 성능 모델 저장)
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    model_save_path,
    save_best_only=True,
    monitor='accuracy',  # 정확도를 기준으로 저장
    mode='max',
    verbose=1
)

In [71]:
## 3.6. Training

# 학습 기록 저장
history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data = test_generator,
    verbose=1,
    callbacks=[checkpoint_cb]
)

# 학습 과정 시각화 (Accuracy)
plt.plot(history.history['accuracy'], label='Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()

Epoch 1/20


  self._warn_if_super_not_called()


[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.4987 - loss: 1.2972

ValueError: Must provide at least one structure