In [1]:
# Google Drive 마운트
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# 기본 설정
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers

In [16]:
# 데이터세트 경로 설정
dataset_path = '/content/drive/MyDrive/Dataset/Dataset/'

# 이미지 파일 목록과 라벨 생성
image_files = []
labels = []
for filename in os.listdir(dataset_path):
  if filename.endswith(('.png','.jpg')):    #filename 중 .png, .jpg로 끝나는 것 찾기
    image_files.append(os.path.join(dataset_path, filename))
    if 'o' in filename:    # filename = o라면 0
      labels.append(0)
    elif 'x' in filename:  # filename = x라면 1
      labels.append(1)
    else:
      labels.append(2)     # o, x 둘다 아니라면 2

# 이미지 데이터셋 생성 tf.data.Dataset 사용
image_size = (300, 300)
BATCH_SIZE = 30   # batch size : 하나의 소그룹에 속하는 데이터
BUFFER_SIZE = tf.data.AUTOTUNE

# 이미지 전처리
def preprocess_image(image_path, target_size = (300, 300)):
  img = tf.keras.preprocessing.image.load_img(image_path, target_size=target_size) #이미지 불러오기
  img = tf.keras.preprocessing.image.img_to_array(img)  #이미지 배열 변환
  img = img / 255.0 #픽셀 값 정규화
  return img  #결과 반환

In [17]:
# 이미지 데이터와 라벨을 Numpy 배열로 변환
images = np.array([preprocess_image(image_file) for image_file in image_files])
labels = np.array(labels)

#image_files : 리스트에 저장된 각 이미지 파일경로에 대해 preprocess_image 함수를 적용하는 이미지 데이터 전처리
# preprocess_image : 이미지 파일을 읽어들여 크기 조정 후 픽셀 값을 0과 1 사이로 정규화 하는 전처리 작업 수행
# images = np.array ~ : 전처리 된 이미지 데이터들을 리스트로 모아서 np.array 함수로 사용하여 numpy 배열 images로 변환
# labels = np.array(labels) : labels 리스트에 저장된 이미지 라벨들을 np.array 함수를 사용하여 numpy 배열로 변환

In [18]:
# 훈련 데이터셋, 테스트 데이터셋으로 나누기
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=22)
# random_state 값은 데이터 분할시 셔플이 이루어지는데 이를 위한 시드 -> 숫자 아무거나 상관 x

In [20]:
# 모델 구성
model = keras.Sequential([
    layers.Conv2D(30, (3, 3), activation='relu', input_shape=(300, 300, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(60, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(120, activation='relu'),
    layers.Dense(3, activation='softmax')
])

# layers.Conv2D : 합성곱 레이어 -> 30 : 30개의 필터 사용(다른 숫자 사용가능) / 필터 크기 3x3 / 픽셀크기 300x300 + 3개의 채널(빨,파,초)
# layers.MaxPooling2D : 최대 풀링 레이어 -> 이미지 크기 줄이고 중요한 특징 강조
# layers.Conv2D : 합성곱 레이어 -> 60개의 필터를 사용하여 더 많은 특징 추출
# layers.MaxPooling2D : 최대 풀링 레이어 -> 이미지 크기를 더 줄임
# layers.Flatten : 플래튼 레이어 -> 다차원 데이터를 1차원으로 변환
# layers.Dense : 완전 연결 레이어 -> 모든 입력 노드가 모든 출력 노드에 연결됨(120개의 노드를 가지고 있음)
# layers.Dense : 완전 연결 레이어 -> 최종 출력 레이어 -> 3개의 클래스 분류

In [21]:
# 모델 컴파일
model.compile(optimizer='adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])
# optimizer='adam' : 학습 중 오차를 줄이기 위해 모델이 가중치를 업데이트하는 방법 제어
# sparse_categorical_crossentropy : 정수형 -> 모델의 예측과 실제 목표 값 사이 차이를 정량화하는 함수
# metrics=['accuracy'] : 학습 중 모델의 성능을 평가하는데 사용

# 모델 학습
model.fit(x_train, y_train, epochs = 10, validation_data=(x_test, y_test))
# model.fit : 저장된 모델을 학습시키는 함수
# 훈련 데이터 : x_train(이미지 데이터), y_train(o, x 구분)
# epochs : 모든 데이터셋을 학습하는 횟수
# validation_data : 모델의 성능 검증
# x_test : 검증용 이미지 데이터, y_test : 검증용 이미지에 대한 정답

Epoch 1/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 6s/step - accuracy: 0.5277 - loss: 16.6006 - val_accuracy: 0.5000 - val_loss: 10.8526
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 6s/step - accuracy: 0.6641 - loss: 4.3772 - val_accuracy: 0.7361 - val_loss: 0.4723
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 7s/step - accuracy: 0.8931 - loss: 0.2434 - val_accuracy: 0.8750 - val_loss: 0.3258
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 6s/step - accuracy: 0.9754 - loss: 0.0748 - val_accuracy: 0.8472 - val_loss: 0.3746
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 6s/step - accuracy: 0.9811 - loss: 0.0384 - val_accuracy: 0.7778 - val_loss: 0.4992
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 6s/step - accuracy: 0.9939 - loss: 0.0167 - val_accuracy: 0.8333 - val_loss: 0.4668
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x7e62bf366790>

In [22]:
# 정확도 확인
_, accuracy = model.evaluate(x_test, y_test)
print(f'accuracy: {accuracy}')
print(f'k-fold 미적용 정확도 : {accuracy*100}%')

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2s/step - accuracy: 0.7609 - loss: 0.8272
accuracy: 0.7638888955116272
k-fold 미적용 정확도 : 76.38888955116272%


In [23]:
# k-fold 적용 후 정확도 산출_기본 설정
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# 5겹 교차 검증
kfold = KFold(n_splits=5, shuffle=True, random_state=22)

# 정확도 저장 리스트 초기화
accuracy_scores = []

In [24]:
# k-fold 수행
for train_index, test_index in kfold.split(images, labels):
  x_train, x_test = images[train_index], images[test_index]
  y_train, y_test = labels[train_index], labels[test_index]

  # 모델 생성 및 학습
  model = keras.Sequential([
    layers.Conv2D(30, (3, 3), activation='relu', input_shape=(300, 300, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(60, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(120, activation='relu'),
    layers.Dense(3, activation='softmax')
  ])

  # 모델 컴파일
  model.compile(optimizer='adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])

  # 모델 학습
  model.fit(x_train, y_train, epochs = 10, validation_data=(x_test, y_test))


  # 예측
  y_pred = np.argmax(model.predict(x_test), axis=1)

  # 정확도 계산 후 저장
  accuracy = accuracy_score(y_test, y_pred)
  accuracy_scores.append(accuracy)

Epoch 1/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 6s/step - accuracy: 0.5208 - loss: 5.9769 - val_accuracy: 0.6250 - val_loss: 0.7025
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 6s/step - accuracy: 0.8045 - loss: 0.4094 - val_accuracy: 0.7361 - val_loss: 0.5787
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 6s/step - accuracy: 0.9209 - loss: 0.1548 - val_accuracy: 0.8750 - val_loss: 0.3616
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 6s/step - accuracy: 0.9879 - loss: 0.0374 - val_accuracy: 0.8472 - val_loss: 0.4069
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 7s/step - accuracy: 0.9908 - loss: 0.0189 - val_accuracy: 0.8889 - val_loss: 0.3573
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 6s/step - accuracy: 1.0000 - loss: 0.0042 - val_accuracy: 0.8889 - val_loss: 0.3884
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 6s/step - accuracy: 0.4712 - loss: 25.8687 - val_accuracy: 0.5833 - val_loss: 7.1396
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 6s/step - accuracy: 0.5627 - loss: 3.5807 - val_accuracy: 0.7222 - val_loss: 0.7991
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 6s/step - accuracy: 0.8646 - loss: 0.3299 - val_accuracy: 0.7917 - val_loss: 0.4453
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 6s/step - accuracy: 0.9697 - loss: 0.1160 - val_accuracy: 0.8611 - val_loss: 0.4735
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 6s/step - accuracy: 0.9908 - loss: 0.0548 - val_accuracy: 0.8333 - val_loss: 0.3434
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 7s/step - accuracy: 0.9962 - loss: 0.0239 - val_accuracy: 0.7778 - val_loss: 0.5420
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 6s/step - accuracy: 0.3418 - loss: 15.4543 - val_accuracy: 0.4722 - val_loss: 3.4133
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 6s/step - accuracy: 0.6748 - loss: 1.6616 - val_accuracy: 0.7500 - val_loss: 0.6192
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 6s/step - accuracy: 0.9246 - loss: 0.1640 - val_accuracy: 0.7917 - val_loss: 0.4518
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 7s/step - accuracy: 0.9680 - loss: 0.1132 - val_accuracy: 0.8333 - val_loss: 0.4146
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 6s/step - accuracy: 0.9820 - loss: 0.0366 - val_accuracy: 0.8333 - val_loss: 0.4420
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 6s/step - accuracy: 0.9908 - loss: 0.0257 - val_accuracy: 0.8194 - val_loss: 0.5091
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0



[1m2/3[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m1s[0m 1s/step



[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 894ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 6s/step - accuracy: 0.4995 - loss: 14.7368 - val_accuracy: 0.7083 - val_loss: 0.4800
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 6s/step - accuracy: 0.8441 - loss: 0.3553 - val_accuracy: 0.8611 - val_loss: 0.3125
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 6s/step - accuracy: 0.9444 - loss: 0.1429 - val_accuracy: 0.8194 - val_loss: 0.4782
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 6s/step - accuracy: 0.9843 - loss: 0.0590 - val_accuracy: 0.8611 - val_loss: 0.3358
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 6s/step - accuracy: 0.9908 - loss: 0.0186 - val_accuracy: 0.8611 - val_loss: 0.4133
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 6s/step - accuracy: 0.9939 - loss: 0.0080 - val_accuracy: 0.8472 - val_loss: 0.4609
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 7s/step - accuracy: 0.5240 - loss: 24.7223 - val_accuracy: 0.6806 - val_loss: 1.1369
Epoch 2/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 6s/step - accuracy: 0.7499 - loss: 0.8797 - val_accuracy: 0.7778 - val_loss: 0.5106
Epoch 3/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 6s/step - accuracy: 0.9005 - loss: 0.1749 - val_accuracy: 0.8472 - val_loss: 0.4454
Epoch 4/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 6s/step - accuracy: 0.9857 - loss: 0.0723 - val_accuracy: 0.8611 - val_loss: 0.3895
Epoch 5/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 6s/step - accuracy: 0.9955 - loss: 0.0305 - val_accuracy: 0.8333 - val_loss: 0.4126
Epoch 6/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 6s/step - accuracy: 1.0000 - loss: 0.0083 - val_accuracy: 0.8611 - val_loss: 0.4775
Epoch 7/10
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [26]:
# 평균 정확도 출력
print(f'accuracy : {np.mean(accuracy_scores)}')
print(f'k-fold 적용 정확도 : {np.mean(accuracy_scores)*100}%')

accuracy : 0.8416666666666666
k-fold 적용 정확도 : 84.16666666666666%
