In [0]:
import torch
import torchvision
from google.colab import drive
import os
import numpy as np
import matplotlib.pyplot as plt

In [0]:
drive.mount("/drive/", force_remount=True)

---

위는 Colab Drive Mount 하는 설정

In [0]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import os


def augmentation(img, save_path, n=20):
    dataGen = ImageDataGenerator(
                rotation_range=40,
                width_shift_range=0.2,
                height_shift_range=0.2,
                shear_range=0.2,
                zoom_range=0.2,
                horizontal_flip=True,
                fill_mode='nearest')

    path = img[0]
    prefix = path.split('/')[-2].split('_')[0]

    save = save_path + prefix
    if not os.path.exists(save):
        os.mkdir(save)

    image = load_img(path)
    x = img_to_array(image)
    x = x.reshape((1,) + x.shape)
    
    for i, _ in enumerate(
        dataGen.flow(x,
                     batch_size=1,
                     save_to_dir=save,
                     save_prefix=prefix,
                     save_format='jpg')):
        if i == n:
            break

In [0]:
# directory 안에 있는 datasets의 경로 다 가져오는 함수
# 정의만 해놓음. 실제로는 사용 안함

def getDataPath(directory):
    dataPath = {'train':[], 'test':[]}

    for path, _, files in os.walk(directory):
        if files:
            if 'train' in path:
                dataPath['train'].append(path)
            elif 'test' in path:
                dataPath['test'].append(path)
    return dataPath

In [0]:
# Windows의 경우, 경로 구분을 전부 \\로 바꿔서 돌려야 함
# trainDir = '/path/to/dir/
# 위의 코드를 아래처럼
# trainDir = '\\path\\to\\dir\\'

In [0]:
from torchvision.transforms import Compose, CenterCrop, Resize, ToTensor, Normalize
from torchvision.datasets import ImageFolder


dataTransforms = Compose([
    CenterCrop(2688),
    Resize(224),
    ToTensor(),
    Normalize((0.5, 0.5, 0.5), (0.3, 0.3, 0.3))
])


# trainDir --> 전체 train dataset이 들어있는 경로 
# testDir  --> 전체 test dataset이 들어있는 경로

trainDir = '/drive/My Drive/공유 문서/K-Data 고려대학교 빅데이터 청년인재 교육과정 1조/프로젝트/Total/train/'
testDir = '/drive/My Drive/공유 문서/K-Data 고려대학교 빅데이터 청년인재 교육과정 1조/프로젝트/Total/test/'
datasets = {
    'train':ImageFolder(trainDir, dataTransforms),
    'test':ImageFolder(testDir, dataTransforms)
}

In [0]:
# 테스트

from threading import Thread


# 저장할 위치
saveTrainDir = '/drive/My Drive/workspace/Total/train/'
saveTestDir = '/drive/My Drive/workspace/Total/test/'

nImages = 1

# Thread 돌려서 trainset, testset 각각 Augmentation 수행
for img in datasets['train'].imgs:
    Thread(target=augmentation, args=(img, saveTrainDir, nImages)).start()
    break

for img in datasets['test'].imgs:
    Thread(target=augmentation, args=(img, saveTestDir, nImages)).start()
    break

In [0]:
# 돌릴 때 주의 ...

nImages = 5

for img in datasets['train'].imgs:
    Thread(target=augmentation, args=(img, saveTrainDir, nImages)).start()

for img in datasets['test'].imgs:
    Thread(target=augmentation, args=(img, saveTestDir, nImages)).start()

In [0]:
# Non-Thread

nImages = 5

for img in datasets['train'].imgs:
    augmentation(img, saveTrainDir, nImages)

for img in datasets['test'].imgs:
    augmentation(img, saveTrainDir, nImages)