### Image Augmentation

In [1]:
import pandas as pd
import random
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
train = pd.read_csv('../fashionmnist/fashion-mnist_train.csv')

In [3]:
augmented_train = train.copy()
X_train = augmented_train.drop(['label'],axis = 1)
X_label = augmented_train['label']

X_train = X_train.astype('float32')

In [4]:
label_dict = {
    "T-shirt/top":0,
    "Trouser":1,
    "Pullover":2,
    "Dress":3,
    "Coat":4,
    "Sandal":5,
    "Shirt":6,
    "Sneaker":7,
    "Bag":8,
    "Ankle_boot":9
}

In [5]:
# 원하는 Label number 리스트로 담기
is_target = [False for _ in range(10)]
target_labels = ["T-shirt/top", "Sneaker", "Pullover", "Dress", "Coat",
    "Shirt"] #증강할 옷 종류 담기
for label in target_labels:
    is_target[label_dict[label]] = True
target_res = [[] for _ in range(10)]

train_count = 60000
for ind in range(train_count):
    if is_target[X_label[ind]]:
        target_res[X_label[ind]].append(ind)

In [6]:
origin_train = X_train.values # pandas.DF -> numpy.ARRAY
origin_train = origin_train.reshape(-1, 28, 28) # 60000x28x28로 변환

In [7]:
augmented_train = train.copy()
#plt.imshow(origin_train[59999], cmap='gray')
#plt.axis('off')
#plt.show()
print(f"Before data shape: {origin_train.shape}")
image_generator = ImageDataGenerator(
            rotation_range=0,       # 회전 범위
            zoom_range=0.3,        # 확대/축소 범위
            shear_range=0,          # 변형(전단) 범위
            width_shift_range=0.05, # 가로 이동 범위
            height_shift_range=0.05,# 세로 이동 범위
            horizontal_flip=False,  # 수평 뒤집기
            vertical_flip=False)    # 수직 뒤집기

sample_size = 311 # 각 label마다 추출할 표본 개수
augment_size = 10 # 각 표본별 증강할 개수

print(f"Estimated Augmented shape: ({(sample_size*len(target_labels)*augment_size) + origin_train.shape[0]}, 785)")

for label in target_labels:
    abstract_class  = random.sample(target_res[label_dict[label]], sample_size) #옷을 종류별로 sample_size만큼 무작위 선택
    labels = np.full(augment_size, label_dict[label])
    for ind in abstract_class:
        images = np.tile(origin_train[ind], augment_size).reshape(-1,28,28,1)
        aug_imgs, aug_labels = image_generator.flow(images, labels, batch_size=augment_size, shuffle=False, seed=43).next()
        aug_imgs = aug_imgs.reshape(augment_size, 784)
        aug_imgs = np.insert(aug_imgs, 0, aug_labels, axis=1)
        df_augmented = pd.DataFrame(aug_imgs,columns=augmented_train.columns)
        augmented_train = pd.concat([augmented_train, df_augmented], ignore_index=True)

augmented_train = augmented_train.fillna(0.0)
print(f"After data shape: {augmented_train.shape}")

Before data shape: (60000, 28, 28)
Estimated Augmented shape: (78660, 785)
After data shape: (78660, 785)


In [8]:
augmented_train.to_csv('../Preprocessing/augmented_train.csv', index=False)