In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

import joblib # 디스크 캐싱, 병렬 프로그래밍하거나 학습한 모델을 저장할 수 있는 sklearn 라이브러리
from tqdm import tqdm # python 진행률 프로세스바

image augmentation을 진행하기 위한 sample 이미지 1개 불러오기

In [None]:
data_dir = "../input/bengaliai-cv19/"
files_train = [f'train_image_data_{fid}.parquet' for fid in range(4)]
file_name = os.path.join(data_dir + files_train[0]) # 2개의 문자열을 결합하여 1개의 경로로 만들어주는 것

In [None]:
df_train = pd.read_parquet(file_name)

df_train

In [None]:
img_ids = df_train['image_id'].values
img_array = df_train.iloc[:, 1:].values # pandas보다 numpy array로 바꿀 경우 속도가 훨씬 빨라짐.

In [None]:
idx = 314
img_id = img_ids[idx]
img = img_array[idx]

In [None]:
img.shape

In [None]:
HEIGHT = 137
WIDTH = 236

In [None]:
img = img.reshape(HEIGHT, WIDTH).astype(np.uint8)
img = 255 - img
img = img[:, :, np.newaxis]

img.shape

In [None]:
plt.imshow(img, cmap='gray')

augmentation을 진행하기 전 channel을 3차원으로 확장시켜주는 것이 필요

In [None]:
img = np.repeat(img, 3, 2) # img를 3번 복사해서 2번째 axis로 복사

img.shape

# albumentations library

In [None]:
!pip install -U albumentations

In [None]:
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [None]:
# Declare an augmentation pipeline
transform = A.Compose([
    A.Rotate(20),
    A.RandomRotate90(),
    A.HueSaturationValue(p=0.3),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.OneOf([A.MotionBlur(p=0.2),
    A.MedianBlur(blur_limit=3, p=0.1)], p=0.2), 
#     ToTensorV2()
])

In [None]:
# Augment an image
transformed = transform(image=img)
transformed_image = transformed["image"]

print(transformed_image.shape)

In [None]:
f, ax = plt.subplots(1, 2, figsize=(10, 16))
ax[0].imshow(img)
ax[1].imshow(transformed_image)

ax[0].set_title('initial')
ax[1].set_title('transform')

In [None]:
transformed_image

In [None]:
class BengaliDataset(Dataset):
    def __init__(self, csv, img_height, img_width, transform):
        self.csv = csv.reset_index() # 나중에 알게 됨(폴더로 인덱싱하게 되면 꼬이게 되는데 이것을 잡아주기 위해)
        self.img_ids = csv['image_id'].values
        self.img_height = img_height
        self.img_width = img_width
        # self.transform = transform
    
    def __len__(self):
        return len(self.csv)
    
    def __getitem__(self, index):
        img_id = self.img_ids[index]
        img = joblib.load(f'../input/train_images/{img_id}.pkl')
        img = img.reshape(self.img_height, self.img_width).astype(np.uint8)
        img = 255 - img
        img = img[:, :, np.newaxis]
        # img = np.repeat(img, 3, 2)
        
        #if self.transform is not None:
            #img = self.transform(image=img)["image"]
        
        label_1 = self.csv.iloc[index].grapheme_root
        label_2 = self.csv.iloc[index].vowel_diacritic
        label_3 = self.csv.iloc[index].consonant_diacritic
        
        # return img, label_1, label_2, label_3

# torch transforms

In [None]:
plt.imshow(img)
img.shape

In [None]:
from torchvision.transforms import transforms

In [None]:
transforms = transforms.Compose([
    transforms.ToPILImage(), 
    transforms.RandomAffine(20, translate=(0.15, 0.15)),
    transforms.To
])

In [None]:
# Augment an image
transformed_image = transforms(img)
print(transformed_image.shape)

In [None]:
f, ax = plt.subplots(1, 2, figsize=(10, 16))
ax[0].imshow(img)
ax[1].imshow(transformed_image.permute(1, 2, 0).numpy())

ax[0].set_title('initial')
ax[1].set_title('transform')

In [None]:
class BengaliDataset(Dataset):
    def __init__(self, csv, img_height, img_width, transform):
        self.csv = csv.reset_index() # 나중에 알게 됨(폴더로 인덱싱하게 되면 꼬이게 되는데 이것을 잡아주기 위해)
        self.img_ids = csv['image_id'].values
        self.img_height = img_height
        self.img_width = img_width
        # self.transform = transform
    
    def __len__(self):
        return len(self.csv)
    
    def __getitem__(self, index):
        img_id = self.img_ids[index]
        img = joblib.load(f'../input/train_images/{img_id}.pkl')
        img = img.reshape(self.img_height, self.img_width).astype(np.uint8)
        img = 255 - img
        img = img[:, :, np.newaxis]
        # img = np.repeat(img, 3, 2)
        
        #if self.transform is not None:
            #img = self.transform(img)
        
        label_1 = self.csv.iloc[index].grapheme_root
        label_2 = self.csv.iloc[index].vowel_diacritic
        label_3 = self.csv.iloc[index].consonant_diacritic
        
        # return img, label_1, label_2, label_3