In [2]:
import numpy as np
import keras
import tensorflow as tf
from keras import layers
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

In [4]:
# 필요한 파라미터 설정
batch_size = 32
img_height = 200
img_width = 200

In [6]:
# 이미지 데이터셋 로드 및 전처리
img_train = keras.utils.image_dataset_from_directory(
    "./Dataset",
    labels="inferred",
    validation_split=0.2,
    label_mode="binary",
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    shuffle=True,
    batch_size=batch_size
)

img_test = keras.utils.image_dataset_from_directory(
    "./Dataset",
    labels="inferred",
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    shuffle=True,
    batch_size=batch_size
)

Found 360 files belonging to 2 classes.
Using 288 files for training.
Found 360 files belonging to 2 classes.
Using 72 files for validation.


In [8]:
# 데이터셋을 numpy 배열로 변환하는 함수
def dataset_to_numpy(dataset):
    images = []
    labels = []
    for image_batch, label_batch in dataset:
        images.append(image_batch.numpy())
        labels.append(label_batch.numpy())
    return np.concatenate(images), np.concatenate(labels)

# numpy 배열로 변환
train_images, train_labels = dataset_to_numpy(img_train)
validation_images, validation_labels = dataset_to_numpy(img_test)

In [10]:
# k-폴드 교차 검증 설정
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=123)

# 각 폴드에 대한 성능 저장할 리스트
accuracy_per_fold = []
loss_per_fold = []

In [18]:
# 각 폴드에 대해 모델 학습 및 검증
fold_no = 1
for train, test in kf.split(train_images, train_labels):
    # 모델 생성
    model = keras.Sequential()
    model.add(keras.Input(shape=(img_height, img_width, 3)))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(300, activation="relu"))
    model.add(keras.layers.Dense(100, activation="relu"))
    model.add(keras.layers.Dense(1, activation="softmax"))
 # 모델 컴파일
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    # 모델 학습
    print(f'폴드 {fold_no} 학습 중...')
    history = model.fit(train_images[train], train_labels[train], epochs=3, batch_size=batch_size, verbose=0)
    
    # 모델 평가
    scores = model.evaluate(train_images[test], train_labels[test], verbose=0)
    print(f'폴드 {fold_no}의 성능: 손실값 {scores[0]}; 정확도 {scores[1]*100}%')
    accuracy_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    fold_no += 1

폴드 1 학습 중...


  output, from_logits = _get_logits(


폴드 1의 성능: 손실값 7312.9580078125; 정확도 56.896549463272095%
폴드 2 학습 중...
폴드 2의 성능: 손실값 2251.951171875; 정확도 51.724135875701904%
폴드 3 학습 중...
폴드 3의 성능: 손실값 1042.64013671875; 정확도 46.55172526836395%
폴드 4 학습 중...
폴드 4의 성능: 손실값 1684.40869140625; 정확도 57.894736528396606%
폴드 5 학습 중...
폴드 5의 성능: 손실값 9076.015625; 정확도 43.85964870452881%


In [14]:
# 폴드별 평균 성능 지표 출력
print('------------------------------------------------------------------------')
print('폴드별 성능')
for i in range(0, len(accuracy_per_fold)):
    print('------------------------------------------------------------------------')
    print(f'> 폴드 {i+1} - 손실값: {loss_per_fold[i]} - 정확도: {accuracy_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('모든 폴드의 평균 성능:')
print(f'> 정확도: {np.mean(accuracy_per_fold)}% (+- {np.std(accuracy_per_fold)})')
print(f'> 손실값: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')

# 최종 모델 평가
final_scores = model.evaluate(img_test)
print(f'최종 모델 성능: 손실값 {final_scores[0]}; 정확도 {final_scores[1]*100}%')


------------------------------------------------------------------------
폴드별 성능
------------------------------------------------------------------------
> 폴드 1 - 손실값: 7278.0712890625 - 정확도: 43.10344755649567%
------------------------------------------------------------------------
> 폴드 2 - 손실값: 212.22683715820312 - 정확도: 53.448277711868286%
------------------------------------------------------------------------
> 폴드 3 - 손실값: 1307.2369384765625 - 정확도: 53.448277711868286%
------------------------------------------------------------------------
> 폴드 4 - 손실값: 2987.747314453125 - 정확도: 57.894736528396606%
------------------------------------------------------------------------
> 폴드 5 - 손실값: 7861.9716796875 - 정확도: 43.85964870452881%
------------------------------------------------------------------------
모든 폴드의 평균 성능:
> 정확도: 50.35087764263153% (+- 5.8439520402967355)
> 손실값: 3929.450811767578
------------------------------------------------------------------------
[1m3/3[0m [32m━━━━━━━━━━━━

  output, from_logits = _get_logits(


In [16]:
import numpy as np
import keras
import tensorflow as tf
from keras import layers
import matplotlib.pyplot as plt

# 필요한 파라미터 설정
batch_size = 32
img_height = 200
img_width = 200

# 이미지 데이터셋 로드 및 전처리
img_train = keras.utils.image_dataset_from_directory(
    "./Dataset",
    labels="inferred",
    validation_split=0.2,
    label_mode="binary",
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    shuffle=True,
    batch_size=batch_size
)

img_test = keras.utils.image_dataset_from_directory(
    "./Dataset",
    labels="inferred",
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    shuffle=True,
    batch_size=batch_size
)

# 모델 생성
model = keras.Sequential()
model.add(keras.Input(shape=(img_height, img_width, 3)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(300, activation="relu"))
model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(1, activation="sigmoid"))

# 모델 컴파일
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

# 모델 학습
model.fit(
    img_train,
    validation_data=img_test,
    epochs=3
)

# 모델 평가
final_scores = model.evaluate(img_test)
print(f'최종 모델 성능: 손실값 {final_scores[0]}; 정확도 {final_scores[1]*100}%')


Found 360 files belonging to 2 classes.
Using 288 files for training.
Found 360 files belonging to 2 classes.
Using 72 files for validation.
Epoch 1/3


  output, from_logits = _get_logits(


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 106ms/step - accuracy: 0.4763 - loss: 15761.5273 - val_accuracy: 0.5556 - val_loss: 4951.4160
Epoch 2/3
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 89ms/step - accuracy: 0.5462 - loss: 4482.0142 - val_accuracy: 0.5556 - val_loss: 1774.6919
Epoch 3/3
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 89ms/step - accuracy: 0.5072 - loss: 3919.6482 - val_accuracy: 0.4444 - val_loss: 5674.6787
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.4566 - loss: 5530.7378
최종 모델 성능: 손실값 5674.6787109375; 정확도 44.44444477558136%
