### Image Augmentation

In [1]:
import pandas as pd
import random
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
train = pd.read_csv('../fashionmnist/fashion-mnist_train.csv')

In [4]:
augmented_train = train.copy()

In [5]:
X_train = augmented_train.drop(['label'],axis = 1)
X_label = augmented_train['label']

X_train = X_train.astype('float32')

In [6]:
label_dict = {
    "T-shirt/top":0,
    "Trouser":1,
    "Pullover":2,
    "Dress":3,
    "Coat":4,
    "Sandal":5,
    "Shirt":6,
    "Sneaker":7,
    "Bag":8,
    "Ankle_boot":9
}

In [7]:
# 원하는 Label number 리스트로 담기
is_target = [False for _ in range(10)]
target_labels = ['Shirt', 'Pullover', 'T-shirt/top'] # 증강할 Label 담기 0, 2, 6
for label in target_labels:
    is_target[label_dict[label]] = True
target_res = [[] for _ in range(10)]

train_count = 60000
for ind in range(train_count):
    if is_target[X_label[ind]]:
        target_res[X_label[ind]].append(ind)

In [8]:
origin_train = X_train.values # pandas.DF -> numpy.ARRAY
origin_train = origin_train.reshape(-1, 28, 28) # 60000x28x28로 변환

In [9]:
#plt.imshow(origin_train[59999], cmap='gray')
#plt.axis('off')
#plt.show()
print(f"Before shape: {augmented_train.shape}")
image_generator = ImageDataGenerator(
            rotation_range=0,       # 회전 범위
            zoom_range=0.11,        # 확대/축소 범위
            shear_range=0,          # 변형(전단) 범위
            width_shift_range=0.13, # 가로 이동 범위
            height_shift_range=0.13,# 세로 이동 범위
            horizontal_flip=False,  # 수평 뒤집기
            vertical_flip=False)    # 수직 뒤집기

augment_size = 10 # 이미지별 증강할 개수
sample_size = 300
for label in target_labels:
    abstract_class  = random.sample(target_res[label_dict[label]], sample_size) #옷을 종류별로 sample_size만큼 무작위 선택
    for ind in abstract_class:
        x_augmented = image_generator.flow(np.tile(origin_train[ind], augment_size).reshape(-1,28,28,1), np.zeros((augment_size, 28, 28)), batch_size=augment_size, shuffle=False, seed=42).next()[0]
        for i in range(augment_size):
            augmented_image = (x_augmented[i]).flatten().tolist()
            augmented_image.insert(0,label_dict[label])
            df_augmented = pd.DataFrame([augmented_image],columns=augmented_train.columns)
            augmented_train = pd.concat([augmented_train, df_augmented], ignore_index=True)
augmented_train = augmented_train.fillna(0.0)

(60000, 785)


In [10]:
augmented_train.to_csv('augmented_train.csv', index=False)
print(f"After shape: {augmented_train.shape}")

After shape: (69000, 785)
