In [1]:
# param
import platform

if platform.system() == 'Linux':
    BASE_FOLDER = '/home/jim/Documents'
else:
    BASE_FOLDER = '/Users/jim/Documents'
    
SOURCE_FOLDER = BASE_FOLDER + '/clean_data_0627+0711'
TARGET_FOLDER = BASE_FOLDER + '/clean_data'

SRC_IMG_FOLDER = SOURCE_FOLDER + '/img'
TAR_IMG_FOLDER = TARGET_FOLDER + '/img'

AUG_KEY = '-tf'

In [2]:
import pandas as pd

train_df = pd.read_csv(SOURCE_FOLDER + '/train.csv')
test_df = pd.read_csv(SOURCE_FOLDER + '/test.csv')

In [3]:
from torchvision import transforms

tf = transforms.Compose([
    transforms.ColorJitter(0.1, 0.1, 0.1, 0.1),
    transforms.RandomAffine(degrees=(-1, 1), translate=(0.05, 0.05), scale=(1, 1.05)),
])

tf_resize = transforms.Resize((66, 200))

In [4]:
from PIL import Image
from matplotlib.pyplot import imshow
from ipywidgets import IntProgress
from IPython.display import display
import numpy as np
%matplotlib inline

import os

if not os.path.exists(TARGET_FOLDER):
    os.mkdir(TARGET_FOLDER)
    os.mkdir(TAR_IMG_FOLDER)
    
aug_count = 5
    
def augmentation(df):
    keys = df['im_key']
    
    pbar = IntProgress()
    display(pbar)
    
    pbar.value = 0
    pbar.max = len(keys)
    
    for key in keys:
        img = Image.open(f'{SRC_IMG_FOLDER}/{key}.jpg')
        row = df.loc[df['im_key'] == key]
        
        # aug imgs
        for index in range(aug_count):
            img_t = tf(img)
            img_t = tf_resize(img_t)
            img_t.save(f'{TAR_IMG_FOLDER}/{key}{AUG_KEY}-{index}.jpg')
            
            row_cp = row.copy()
            row_cp['im_key'] = f'{key}{AUG_KEY}-{index}'
            df = df.append(row_cp, ignore_index=True)
        
        # resize origin img
        img = tf_resize(img)
        img.save(f'{TAR_IMG_FOLDER}/{key}.jpg')
        
        pbar.value += 1
    
    return df.sample(frac=1)
        
print('augmentation train data...')
train_df = augmentation(train_df)
print('augmentation test data...')
test_df = augmentation(test_df)
    
train_df.to_csv(TARGET_FOLDER + '/train.csv', index=False)
test_df.to_csv(TARGET_FOLDER + '/test.csv', index=False)

print('finish')


augmentation train data...


IntProgress(value=0)

augmentation test data...


IntProgress(value=0)

finish
