### 표정 분류
- https://drive.google.com/file/d/1lpwQNwijBfaSr8knSNHKWu5KUmzYWU9d/view?usp=drive_link

#### 순서
1. 이미지 파일명 변경 (ex. happy01.png)
2. 이미지 클래스 분류
3. 경로와 클래스 column이 있는 pandas 데이터프레임 생성
4. validation 데이터 세트 분할
5. validation 이미지가 담긴 폴더 생성(train 수는 그만큼 감소?)

In [4]:
from glob import glob
import os

# train 폴더 안 이미지들의 이름을 'directory명+숫자.png'로 변경
root = './datasets/face/train/'

directories = glob(os.path.join(root,'*'))
directory_names = []

# root 경로 안 directory들의 이름 출력
for directory in directories:
    directory_names.append(directory[directory.rindex('\\') + 1:])

# 각 directory 내 이미지 파일의 이름을 'directory명+숫자(01, 02, ...).png'로 변경
for name in directory_names:
    for i, file_name in enumerate(os.listdir(os.path.join(root, name))):
        old_file = os.path.join(root + name + '/', file_name)
        new_file = os.path.join(root + name + '/', name + str(i + 1) + '.png')

        os.rename(old_file, new_file)

In [5]:
# test 폴더 안 이미지들도 마찬가지로 이름 변경
root = './datasets/face/test/'

directories = glob(os.path.join(root,'*'))
directory_names = []

# root 경로 안 directory들의 이름 출력
for directory in directories:
    directory_names.append(directory[directory.rindex('\\') + 1:])

# 각 directory 내 이미지 파일의 이름을 'directory명+숫자(01, 02, ...).png'로 변경
for name in directory_names:
    for i, file_name in enumerate(os.listdir(os.path.join(root, name))):
        old_file = os.path.join(root + name + '/', file_name)
        new_file = os.path.join(root + name + '/', name + str(i + 1) + '.png')

        os.rename(old_file, new_file)

In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

root = './datasets/face/'
dir_name = ['test', 'train']

# 경로 안 이미지들의 클래스를 분류
# train을 나중에 함으로서 generator가 train 경로를 기억하게 함
for name in dir_name:
    new_root = os.path.join(root + name)
    print(new_root)
    
    # 이미지 데이터를 자동으로 클래스 분류
    image_data_generator = ImageDataGenerator(rescale=1./255)
    
    generator = image_data_generator.flow_from_directory(new_root, target_size=(150, 150), batch_size=32, class_mode='categorical')
    print(generator.class_indices)

./datasets/face/test
Found 7178 images belonging to 7 classes.
{'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'neutral': 4, 'sad': 5, 'surprise': 6}
./datasets/face/train
Found 28709 images belonging to 7 classes.
{'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'neutral': 4, 'sad': 5, 'surprise': 6}


In [13]:
import pandas as pd

# 경로 및 target 클래스로 데이터프레임 생성
f_df = pd.DataFrame({'file_paths': generator.filepaths, 'targets': generator.classes})
f_df

Unnamed: 0,file_paths,targets
0,./datasets/face/train\angry\angry1.png,0
1,./datasets/face/train\angry\angry10.png,0
2,./datasets/face/train\angry\angry100.png,0
3,./datasets/face/train\angry\angry1000.png,0
4,./datasets/face/train\angry\angry1001.png,0
...,...,...
28704,./datasets/face/train\surprise\surprise995.png,6
28705,./datasets/face/train\surprise\surprise996.png,6
28706,./datasets/face/train\surprise\surprise997.png,6
28707,./datasets/face/train\surprise\surprise998.png,6


In [14]:
# file_path의 '\\'를 '/'로 대체
f_df.loc[:, 'file_paths'] = f_df.file_paths.apply(lambda x: x.replace('\\', '/'))
f_df

Unnamed: 0,file_paths,targets
0,./datasets/face/train/angry/angry1.png,0
1,./datasets/face/train/angry/angry10.png,0
2,./datasets/face/train/angry/angry100.png,0
3,./datasets/face/train/angry/angry1000.png,0
4,./datasets/face/train/angry/angry1001.png,0
...,...,...
28704,./datasets/face/train/surprise/surprise995.png,6
28705,./datasets/face/train/surprise/surprise996.png,6
28706,./datasets/face/train/surprise/surprise997.png,6
28707,./datasets/face/train/surprise/surprise998.png,6


In [16]:
from sklearn.model_selection import train_test_split

# 데이터 세트 분할
X_train, X_val, y_train, y_val = \
            train_test_split(f_df.file_paths, f_df.targets, stratify=f_df.targets, test_size=0.2, random_state=124)

print(y_train.value_counts())
print(y_val.value_counts())

targets
3    5772
4    3972
5    3864
2    3277
0    3196
6    2537
1     349
Name: count, dtype: int64
targets
3    1443
4     993
5     966
2     820
0     799
6     634
1      87
Name: count, dtype: int64


In [17]:
import shutil

root = './datasets/face/'

# train 폴더 안 이미지들 중, X_val에 있는 이미지들을 validation 폴더에 복사
for file_path in X_val:
    face_dir = file_path[len(root + 'train/'): file_path.rindex('/')]
    destination = os.path.join(root, 'validation/' + face_dir)

    # validation 폴더 없으면 생성
    if not os.path.exists(destination):
        os.makedirs(destination)

    shutil.copy2(file_path, destination)