### 체스 말 분류
> https://www.kaggle.com/datasets/niteshfre/chessman-image-dataset

In [1]:
from glob import glob
import os

# 경로 설정
root = './datasets/chess/original/'

# root경로 내의 모든 폴더(*)
directories = glob(os.path.join(root, '*'))
directory_names = []
for directory in directories:
    # 폴더 명을 하나씩 directory_names에 담아줌
    directory_names.append(directory[directory.rindex('\\') + 1:])

print(directory_names)

['Bishop', 'King', 'Knight', 'Pawn', 'Queen', 'Rook']


In [2]:
# directory_names에 담긴 이름을 통해 폴더 내의 파일 명을 변경. ex)King01
for name in directory_names:
    for i, file_name in enumerate(os.listdir(os.path.join(root, name))):
        old_file = os.path.join(root + name + '/', file_name)
        new_file = os.path.join(root + name + '/', name + str(i + 1) + '.png')

        os.rename(old_file, new_file)

FileExistsError: [WinError 183] 파일이 이미 있으므로 만들 수 없습니다: './datasets/chess/original/Bishop/Bishop10.png' -> './datasets/chess/original/Bishop/Bishop2.png'

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 이미지의 모든 픽셀값(0 ~ 255)을 255로 나누어줘서 0 ~ 1값으로 정규화
image_data_generator = ImageDataGenerator(rescale=1./255)

# root경로를 불러오고 target_size를 통해 width와 height를 244 X 244로 조정, 다중 분류이기 때문에 categorical을 통해 원핫 인코딩
generator = image_data_generator.flow_from_directory(root, target_size=(244, 244), batch_size=32, class_mode='categorical')
print(generator.class_indices)

Found 556 images belonging to 6 classes.
{'Bishop': 0, 'King': 1, 'Knight': 2, 'Pawn': 3, 'Queen': 4, 'Rook': 5}


In [4]:
import pandas as pd

# 파일 내의 각 사진의 경로와 인코딩 된 타겟을 DataFrame으로 만들어
c_df = pd.DataFrame({'file_paths': generator.filepaths, 'targets': generator.classes})
c_df

Unnamed: 0,file_paths,targets
0,./datasets/chess/original/Bishop\Bishop1.png,0
1,./datasets/chess/original/Bishop\Bishop10.png,0
2,./datasets/chess/original/Bishop\Bishop11.png,0
3,./datasets/chess/original/Bishop\Bishop12.png,0
4,./datasets/chess/original/Bishop\Bishop13.png,0
...,...,...
551,./datasets/chess/original/Rook\Rook95.png,5
552,./datasets/chess/original/Rook\Rook96.png,5
553,./datasets/chess/original/Rook\Rook97.png,5
554,./datasets/chess/original/Rook\Rook98.png,5


In [5]:
# file_paths 피쳐에 \로 된 부분을 /로 변경
c_df.loc[:, 'file_paths'] = c_df.file_paths.apply(lambda x: x.replace('\\', '/'))
c_df

Unnamed: 0,file_paths,targets
0,./datasets/chess/original/Bishop/Bishop1.png,0
1,./datasets/chess/original/Bishop/Bishop10.png,0
2,./datasets/chess/original/Bishop/Bishop11.png,0
3,./datasets/chess/original/Bishop/Bishop12.png,0
4,./datasets/chess/original/Bishop/Bishop13.png,0
...,...,...
551,./datasets/chess/original/Rook/Rook95.png,5
552,./datasets/chess/original/Rook/Rook96.png,5
553,./datasets/chess/original/Rook/Rook97.png,5
554,./datasets/chess/original/Rook/Rook98.png,5


In [6]:
# 타겟 비중 확인
c_df.targets.value_counts()

targets
3    107
2    106
5    102
0     87
4     78
1     76
Name: count, dtype: int64

In [7]:
from sklearn.model_selection import train_test_split

# c_df에서 train데이터와 test데이터 분리
X_train, X_test, y_train, y_test = \
train_test_split(c_df.file_paths, c_df.targets, stratify=c_df.targets, test_size=0.2, random_state=124)

# 위에서 분리한 train데이터에서 validation 데이터 분리
X_train, X_val, y_train, y_val = \
train_test_split(X_train, y_train, stratify=y_train, test_size=0.2, random_state=124)

print(y_train.value_counts())
print(y_val.value_counts())
print(y_test.value_counts())

targets
2    68
3    68
5    65
0    56
4    49
1    49
Name: count, dtype: int64
targets
3    17
2    17
5    16
0    14
4    13
1    12
Name: count, dtype: int64
targets
3    22
5    21
2    21
0    17
4    16
1    15
Name: count, dtype: int64


In [8]:
import shutil

# chess파일 경로
chess_root = './datasets/chess/'

# train데이터를 train폴더를 만들어 저장
for file_path in X_train:
    chess_dir = file_path[len(chess_root + 'original/'): file_path.rindex('/')]
    destination = os.path.join(chess_root, 'train/' + chess_dir)

    # 해당 경로에 destination경로가 없다면 생성
    if not os.path.exists(destination):
        os.makedirs(destination)

    shutil.copy2(file_path, destination)

In [9]:
# validation
for file_path in X_val:
    chess_dir = file_path[len(chess_root + 'original/'): file_path.rindex('/')]
    destination = os.path.join(chess_root, 'validation/' + chess_dir)

    if not os.path.exists(destination):
        os.makedirs(destination)

    shutil.copy2(file_path, destination)

In [10]:
# test
for file_path in X_test:
    chess_dir = file_path[len(chess_root + 'original/'): file_path.rindex('/')]
    destination = os.path.join(chess_root, 'test/' + chess_dir)

    if not os.path.exists(destination):
        os.makedirs(destination)

    shutil.copy2(file_path, destination)

In [40]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMAGE_SIZE = 299

train_dir = './datasets/jellyfish/train'
validation_dir = './datasets/jellyfish/valid'
test_dir = './datasets/jellyfish/test'

train_data_generator = ImageDataGenerator(rescale=1./255)
validation_data_generator = ImageDataGenerator(rescale=1./255)
test_data_generator = ImageDataGenerator(rescale=1./255)

train_generator = train_data_generator.flow_from_directory(
    train_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=16,
    class_mode='categorical'
)

validation_generator = validation_data_generator.flow_from_directory(
    validation_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=16,
    class_mode='categorical'
)

test_generator = test_data_generator.flow_from_directory(
    test_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=16,
    class_mode='categorical'
)

print(train_generator.class_indices)
print(validation_generator.class_indices)
print(test_generator.class_indices)

Found 900 images belonging to 6 classes.
Found 39 images belonging to 6 classes.
Found 40 images belonging to 6 classes.
{'Moon_jellyfish': 0, 'barrel_jellyfish': 1, 'blue_jellyfish': 2, 'compass_jellyfish': 3, 'lions_mane_jellyfish': 4, 'mauve_stinger_jellyfish': 5}
{'Moon_jellyfish': 0, 'barrel_jellyfish': 1, 'blue_jellyfish': 2, 'compass_jellyfish': 3, 'lions_mane_jellyfish': 4, 'mauve_stinger_jellyfish': 5}
{'Moon_jellyfish': 0, 'barrel_jellyfish': 1, 'blue_jellyfish': 2, 'compass_jellyfish': 3, 'lions_mane_jellyfish': 4, 'mauve_stinger_jellyfish': 5}


In [41]:
target_name = {v: k for k, v in train_generator.class_indices.items()}
target_name

{0: 'Moon_jellyfish',
 1: 'barrel_jellyfish',
 2: 'blue_jellyfish',
 3: 'compass_jellyfish',
 4: 'lions_mane_jellyfish',
 5: 'mauve_stinger_jellyfish'}

In [43]:
train_target_names = []
for target in train_generator.classes:
    train_target_names.append(target_name[target])

valid_target_names = []
for target in validation_generator.classes:
    valid_target_names.append(target_name[target])

test_target_names = []
for target in test_generator.classes:
    test_target_names.append(target_name[target])

In [44]:
import pandas as pd

train_df = pd.DataFrame({'file_paths': train_generator.filepaths, 'target_names': train_target_names, 'targets': train_generator.classes})
train_df.file_paths = train_df.file_paths.apply(lambda file_path: file_path.replace('\\', '/'))

validation_df = pd.DataFrame({'file_paths': validation_generator.filepaths, 'target_names': valid_target_names, 'targets': validation_generator.classes})
validation_df.file_paths = validation_df.file_paths.apply(lambda file_path: file_path.replace('\\', '/')) 

test_df = pd.DataFrame({'file_paths': test_generator.filepaths, 'target_names': test_target_names, 'targets': test_generator.classes})
test_df.file_paths = test_df.file_paths.apply(lambda file_path: file_path.replace('\\', '/'))

train_df

Unnamed: 0,file_paths,target_names,targets
0,./datasets/jellyfish/train/Moon_jellyfish/01.jpg,Moon_jellyfish,0
1,./datasets/jellyfish/train/Moon_jellyfish/02.jpg,Moon_jellyfish,0
2,./datasets/jellyfish/train/Moon_jellyfish/03.jpg,Moon_jellyfish,0
3,./datasets/jellyfish/train/Moon_jellyfish/04.jpg,Moon_jellyfish,0
4,./datasets/jellyfish/train/Moon_jellyfish/05.jpg,Moon_jellyfish,0
...,...,...,...
895,./datasets/jellyfish/train/mauve_stinger_jelly...,mauve_stinger_jellyfish,5
896,./datasets/jellyfish/train/mauve_stinger_jelly...,mauve_stinger_jellyfish,5
897,./datasets/jellyfish/train/mauve_stinger_jelly...,mauve_stinger_jellyfish,5
898,./datasets/jellyfish/train/mauve_stinger_jelly...,mauve_stinger_jellyfish,5


In [45]:
print(train_df.shape)
print(validation_df.shape)
print(test_df.shape)

(900, 3)
(39, 3)
(40, 3)


In [46]:
import numpy as np
from tensorflow.keras.utils import Sequence
from sklearn.utils import shuffle
import cv2

IMAGE_SIZE = 299
BATCH_SIZE = 16

class Dataset(Sequence):
    def __init__(self, file_paths, targets, batch_size=BATCH_SIZE, aug=None, preprocess=None, shuffle=False):
        self.file_paths = file_paths
        self.targets = targets
        self.batch_size = batch_size
        self.aug = aug
        self.preprocess = preprocess
        self.shuffle = shuffle

        if self.shuffle:
            # 에포크 종료 시, 객체 생성 및 데이터 섞기
            self.on_epoch_end()

    # __len__()는 전체 데이터 건수에서 batch_size 단위로 나눈 데이터 수
    # 예를 들어, 1000개의 데이터를 30 batch_size로 설정하면, 1 batch당 33.33..개이다.
    # 이 때, 소수점은 무조건 올려서 33 + 1 = 34개로 설정한다.
    def __len__(self):
        return int(np.ceil(len(self.targets) / self.batch_size))

    # batch_size 단위로 이미지 배열과 타켓 데이터들을 가져온 뒤 변환한 값을 리턴한다.
    def __getitem__(self, index):
        file_paths_batch = self.file_paths[index * self.batch_size: (index + 1) * self.batch_size]
        targets_batch = self.targets[index * self.batch_size: (index + 1) * self.batch_size]

        results_batch = np.zeros((file_paths_batch.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3))

        for i in range(file_paths_batch.shape[0]):
            image = cv2.cvtColor(cv2.imread(file_paths_batch[i]), cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))

            if self.aug is not None:
                image = self.aug(image=image)['image']

            if self.preprocess is not None:
                image = self.preprocess(image)
                    
            results_batch[i] = image

        return results_batch, targets_batch
        
    def on_epoch_end(self):
        if self.shuffle:
            self.file_paths, self.targets = shuffle(self.file_paths, self.targets)        

In [47]:
import albumentations as A
from tensorflow.keras.applications.xception import preprocess_input as xception_preprocess_input

# train 데이터의 파일 경로를 리스트로 담아줌
train_file_paths = train_df['file_paths'].values
# target을 get_dummies를 통해 원핫인코딩
train_targets = pd.get_dummies(train_df['targets']).values

validation_file_paths = validation_df['file_paths'].values
validation_targets = pd.get_dummies(validation_df['targets']).values

test_file_paths = test_df['file_paths'].values
test_targets = pd.get_dummies(test_df['targets']).values

# aug = A.Compose([
#     A.VerticalFlip(p=0.5),
#     A.HorizontalFlip(p=0.5)
# ])

train_dataset = Dataset(train_file_paths, 
                        train_targets, 
                        batch_size=BATCH_SIZE,
                        preprocess=xception_preprocess_input, 
                        shuffle=True)

validation_dataset = Dataset(validation_file_paths, 
                        validation_targets, 
                        batch_size=BATCH_SIZE, 
                        preprocess=xception_preprocess_input)

test_dataset = Dataset(test_file_paths, 
                        test_targets, 
                        batch_size=BATCH_SIZE, 
                        preprocess=xception_preprocess_input)

In [48]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense , Conv2D , Dropout , Flatten , Activation, MaxPooling2D , GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications import Xception
from tensorflow.keras.applications import MobileNetV2

def create_model(model_name='vgg16', verbose=False):
    input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    if model_name == 'vgg16':
        model = VGG16(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'resnet50': # ResNet50, 74.9% ; ResNet50V2, 76.0%
        model = ResNet50V2(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'xception': # Inception을 기초로 한 모델
        model = Xception(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'mobilenet':
        model = MobileNetV2(input_tensor=input_tensor, include_top=False, weights='imagenet')

    x = model.output

    # 분류기
    x = GlobalAveragePooling2D()(x)
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    x = Dense(50, activation='relu')(x)
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    output = Dense(6, activation='softmax', name='output')(x)
    
    model = Model(inputs=input_tensor, outputs=output)
    
    if verbose:
        model.summary()
    
    return model

In [49]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy

model = create_model(model_name='xception', verbose=True)
# model.compile(optimizer=Adam(), loss=SparseCategoricalCrossentropy(), metrics=['acc'])
model.compile(optimizer=Adam(), loss=CategoricalCrossentropy(), metrics=['acc'])

In [50]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

mcp_cb = ModelCheckpoint(
    filepath="./callback_files/weights.{epoch:03d}-{val_loss:.4f}-{acc:.4f}.weights.h5",
    monitor='val_loss',
    save_best_only=False,
    save_weights_only=True,
    mode='min'
)

rlr_cb = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=2,
    mode='min'
)

ely_cb = EarlyStopping(
    monitor='val_loss',
    patience=5,
    mode='min'
)

In [None]:
N_EPOCHS = 20
BATCH_SIZE = 16

history = model.fit(train_dataset,
                    batch_size=BATCH_SIZE,
                    epochs=N_EPOCHS, 
                    validation_data=validation_dataset,
                    callbacks=[mcp_cb, rlr_cb, ely_cb])

Epoch 1/20


  self._warn_if_super_not_called()


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m330s[0m 6s/step - acc: 0.4638 - loss: 1.3673 - val_acc: 0.5385 - val_loss: 4.7054 - learning_rate: 0.0010
Epoch 2/20
[1m 9/57[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m4:58[0m 6s/step - acc: 0.8796 - loss: 0.4597