# Mission 1

주어진 바닐라 데이터를 가지고 이미지와 해당하는 클래스 Label (18개의 클래스 중 하나)을 생성할 수 있는 Pytorch **Dataset** Class를 직접 생성해보세요.

18개의 클래스를 만드셨다면, 그 타겟 클래스의 분포도 다시 한번 확인해보면 좋겠습니다.

In [1]:
import glob
import os

In [None]:
# 이미지 path 저장

img_dir = glob.glob('/opt/ml/input/data/train/images/*')
train_images_path = []

for path in img_dir:
    img_path = glob.glob(os.path.join(path, '*'))
    train_images_path.extend(sorted(img_path))
len(train_images_path)

In [None]:
train_images_path[0]

In [None]:
path = train_images_path[0]
info = path.split('/')[-2].split('_')
gender, age = info[1], int(info[3])
label = 0
if gender == 'female':
    label += 3

if 30 <= age and age < 60:
    label += 1
elif 60 <= age:
    label += 2
    
info = path.split('/')[-1]
if 'incorrect' in info:
    label += 6
elif 'normal' in info:
    label += 12
    
print(gender, age, info)
print(label)

## Dataset Class 생성

### Map-style datasets
https://pytorch.org/docs/stable/data.html#map-style-datasets

In [None]:
import cv2
import numpy as np

In [None]:
img_path = '/opt/ml/input/data/train/images/000523_female_Asian_51/incorrect_mask.jpg'
img = cv2.imread(img_path)

print(img.shape)

In [None]:
from torch.utils.data import Dataset

class MyDataset(Dataset):
    def __init__(self, train_images_path, transform=None):
        self.train_images_path = train_images_path
        self.size = len(self.train_images_path)
        self.transform = transform
        
    def __getitem__(self, index):
        """
        input : index
        output: (image tenor(width x heigth x channel), label)
        """

        path = self.train_images_path[index]
        img = cv2.imread(img_path)  # shape (height x width x channel) = (512 x 384 x 3)
        label = self.get_label(path)
        sample = {'image': img, 'label': label}
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample
    
    def __len__(self):
        # 고정해서 출력하는 것이 좋을까? - self.size
        # 그 떄 그 때 측정하는 것이 좋을까? 혹시 누가 삭제하면,, ㅜㅜ - len(self.train_images_path)
        return self.size
    
    def get_label(self, path):
        """
        input : path
            ex. '/opt/ml/input/data/train/images/000523_female_Asian_51/incorrect_mask.jpg'
        output: label
        """
        info = path.split('/')[-2].split('_')
        gender, age = info[1], int(info[3])
        label = 0
        if gender == 'female':
            label += 3

        if 30 <= age and age < 60:
            label += 1
        elif 60 <= age:
            label += 2

        info = path.split('/')[-1]
        if 'incorrect' in info:
            label += 6
        elif 'normal' in info:
            label += 12
    
        return label

In [None]:
train_set = MyDataset(train_images_path)

In [None]:
sample = train_set[10]
print(sample['image'].shape)
print(sample['label'])

In [None]:
train_images_path[10]

In [None]:
print(len(train_set))
print(len(train_images_path))

## 타겟 클래스의 분포확인

In [2]:
# 이미지 path 저장

img_dir = glob.glob('/opt/ml/input/data/train/images/*')

class_num = 18
classes = [[] for _ in range(class_num)]

for path in img_dir:
    # gender와 age에 따라 class 설정
    info = path.split('/')[-1].split('_')
    gender, age = info[1], int(info[3])
    c = 0
    if gender == 'female':
        c += 3
        
    if 30 <= age and age < 60:
        c += 1
    elif 60 <= age:
        c += 2
    
    new_c = c
    img_paths = glob.glob(os.path.join(path, '*'))
    for img_path in img_paths:
        c = new_c
        if 'incorrect' in img_path:
            c += 6
        elif 'normal' in img_path:
            c += 12
        classes[c].append(img_path)

In [None]:
import matplotlib.pyplot as plt

In [None]:
class_num = 18
index = []
count = []
colors = ['#FEA443', '#F3FEB0', '#F2EDD0', '#F2D479', 
          '#C3B2AF', '#C2DDC8', '#BDCC94', '#BCBF50', 
          '#B0BAC3', '#AAB0B5', '#A9B6CC', '#A8C0CE', 
          '#A5AAA3', '#99BFB3', '#768591', '#705E78', 
          '#55967e', '#263959']
for i in range(class_num):
    index.append(i)
    count.append(len(classes[i]))

plt.figure(figsize=(8, 5))
plt.bar(index, count, color=colors)
plt.title('Class Group Exploration', fontsize=20)
plt.xlabel('class', fontsize=18)
plt.ylabel('count', fontsize=18)
plt.xticks(index, fontsize=15)
plt.show()

# Mission 2

강의때 보여드렸던 **torchvision**에 내장된 여러 Augmentation 함수와 **albumentation** 라이브러리의 여러 transform 기법을 적용해보세요. 적용해 보신 뒤에 실제로 어떻게 변환되어 나오는지 확인해보세요. 아마 **plot**형태로 그려서 확인해야 할거에요.

그리고 이러한 Transforms를  추가한 Dataset이 과연 어느 정도의성능을 가지는지 체크해보세요. 혹여 너무 무거운 프로세스라면 생각보다 느리게 동작하겠죠? 

In [None]:
from torch.utils.data import DataLoader 
from tqdm import tqdm

In [None]:
from torchvision import transforms

In [None]:
# https://tutorials.pytorch.kr/recipes/recipes/custom_dataset_transforms_loader.html
class ToTensor(object):
    """ 샘플 안에 있는 n차원 배열을 Tensor로 변홥힙니다. """

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        # 색깔 축들을 바꿔치기해야하는데 그 이유는 numpy와 torch의 이미지 표현방식이 다르기 때문입니다.
        # numpy 이미지: H x W x C
        # torch 이미지: C X H X W
        image = image.transpose((2, 0, 1))
        return {'image': torch.from_numpy(image),
                'label': torch.tensor(label)}

In [None]:
transformed_dataset = MyDataset(train_images_path,
                                 transform=transforms.Compose([
                                     ToTensor()
                                 ]))

for i in range(len(transformed_dataset)):
    sample = transformed_dataset[i]
    
    print(i, sample['image'].size, sample['label'])
    
    if i==3:
        break

### transforms - ToTensor()
→ ToTensor 했을 때 성능이 더 좋다. 또 연산을 위해서는 ToTensor로 shape를 바꿔줘야한다고 한다.

In [None]:
transformed_dataset = MyDataset(train_images_path,
                                 transform=transforms.Compose([
                                     ToTensor()
                                 ]))
dataloader = DataLoader(transformed_dataset)

for i, sample_batched in enumerate(tqdm(dataloader)):
    data, target = sample_batched['image'], sample_batched['label']
    print(i, data.size(), target)
    
    if i==3:
        break

In [None]:
transformed_dataset = MyDataset(train_images_path)
dataloader = DataLoader(transformed_dataset)

for i, sample_batched in enumerate(tqdm(dataloader)):
    data, target = sample_batched['image'], sample_batched['label']
    print(i, data.size(), target)
    
    if i==3:
        break

### DataLoader - num_workers

→ num_workers=1 일 때 성능이 가장 좋다..

In [None]:
transformed_dataset = MyDataset(train_images_path)
dataloader = DataLoader(transformed_dataset, batch_size=3, num_workers=1)

for i, sample_batched in enumerate(tqdm(dataloader)):
    data, target = sample_batched['image'], sample_batched['label']
    print(i, data.size(), target)
    
    if i==3:
        break

In [None]:
transformed_dataset = MyDataset(train_images_path)
dataloader = DataLoader(transformed_dataset, batch_size=3, num_workers=2)

for i, sample_batched in enumerate(tqdm(dataloader)):
    data, target = sample_batched['image'], sample_batched['label']
    print(i, data.size(), target)
    
    if i==3:
        break

In [None]:
transformed_dataset = MyDataset(train_images_path)
dataloader = DataLoader(transformed_dataset, batch_size=3, num_workers=3)

for i, sample_batched in enumerate(tqdm(dataloader)):
    data, target = sample_batched['image'], sample_batched['label']
    print(i, data.size(), target)
    
    if i==3:
        break

In [None]:
transformed_dataset = MyDataset(train_images_path)
dataloader = DataLoader(transformed_dataset, batch_size=3, num_workers=10)

for i, sample_batched in enumerate(tqdm(dataloader)):
    data, target = sample_batched['image'], sample_batched['label']
    print(i, data.size(), target)
    
    if i==3:
        break

In [None]:
transformed_dataset = MyDataset(train_images_path)
dataloader = DataLoader(transformed_dataset, batch_size=3, num_workers=100)

for i, sample_batched in enumerate(tqdm(dataloader)):
    data, target = sample_batched['image'], sample_batched['label']
    print(i, data.size(), target)
    
    if i==3:
        break

### torchvision - Augmentation

In [None]:
import torch

In [None]:
from torchvision import transforms

transforms.Compose([
    transforms.CenterCrop(10),
    transforms.ToTensor(),
])

In [None]:
from torch.utils.data import Dataset

In [None]:
class MyDataset(Dataset):
    def __init__(self, train_images_path, transform=None):
        self.train_images_path = train_images_path
        self.size = len(self.train_images_path)
        self.transform = transform
        
    def __getitem__(self, index):
        """
        input : index
        output: (image tenor(width x heigth x channel), label)
        """
            
        path = self.train_images_path[index]
        label = self.get_label(path)

        if self.transform:
            img = Image.open(path)  # PIL Image
            img = self.transform(img)
        
        else:
            img = cv2.imread(path)  # type: numpy.ndarray, dtype: unit8, H x W x C
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
        return (img, label)
    
    def __len__(self):
        # 고정해서 출력하는 것이 좋을까? - self.size
        # 그 떄 그 때 측정하는 것이 좋을까? 혹시 누가 삭제하면,, ㅜㅜ - len(self.train_images_path)
        return self.size
    
    def get_label(self, path):
        """
        input : path
            ex. '/opt/ml/input/data/train/images/000523_female_Asian_51/incorrect_mask.jpg'
        output: label
        """
        info = path.split('/')[-2].split('_')
        gender, age = info[1], int(info[3])
        label = 0
        if gender == 'female':
            label += 3

        if 30 <= age and age < 60:
            label += 1
        elif 60 <= age:
            label += 2

        info = path.split('/')[-1]
        if 'incorrect' in info:
            label += 6
        elif 'normal' in info:
            label += 12
    
        return label

In [None]:
transformed_dataset = MyDataset(train_images_path,
                                transform=transforms.Compose([
                                     transforms.CenterCrop(280),
                                     transforms.ToTensor(),
                                 ]))
dataloader = DataLoader(transformed_dataset)

fig = plt.figure(figsize=(30, 20))
for i, sample_batched in enumerate(dataloader):
    pix, label = sample_batched

    img = np.squeeze(pix.numpy())  # C X H X W
    img = np.transpose(img, (1, 2, 0))

    ax = fig.add_subplot(5, 7, i+1)
    ax.imshow(img) # H x W x C
    ax.set_title(label[0])

    if i==34:
        break

In [None]:
class MyDataset(Dataset):
    def __init__(self, train_images_path, transform=None):
        self.train_images_path = train_images_path
        self.size = len(self.train_images_path)
        self.transform = transform
        
    def __getitem__(self, index):
        """
        input : index
        output: (image tenor(width x heigth x channel), label)
        """
            
        path = self.train_images_path[index]
        label = self.get_label(path)
        img = cv2.imread(path)  # type: numpy.ndarray, dtype: unit8, H x W x C
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            img = self.transform(image=img)
        
        return (img, label)
    
    def __len__(self):
        # 고정해서 출력하는 것이 좋을까? - self.size
        # 그 떄 그 때 측정하는 것이 좋을까? 혹시 누가 삭제하면,, ㅜㅜ - len(self.train_images_path)
        return self.size
    
    def get_label(self, path):
        """
        input : path
            ex. '/opt/ml/input/data/train/images/000523_female_Asian_51/incorrect_mask.jpg'
        output: label
        """
        info = path.split('/')[-2].split('_')
        gender, age = info[1], int(info[3])
        label = 0
        if gender == 'female':
            label += 3

        if 30 <= age and age < 60:
            label += 1
        elif 60 <= age:
            label += 2

        info = path.split('/')[-1]
        if 'incorrect' in info:
            label += 6
        elif 'normal' in info:
            label += 12
    
        return label

In [None]:
transform = A.Compose([
    A.RandomCrop(width=384, height=512),
    A.HorizontalFlip(p=1),
    A.RandomBrightnessContrast(p=1),
])

transformed_dataset = MyDataset(train_images_path,
                                transform = transform)
dataloader = DataLoader(transformed_dataset)

fig = plt.figure(figsize=(30, 20))
for i, sample_batched in enumerate(dataloader):
    pix, label = sample_batched
    pix = pix['image']
    
    img = np.squeeze(pix.numpy())  # H x W x C

    ax = fig.add_subplot(5, 7, i+1)
    ax.imshow(img) # H x W x C
    ax.set_title(label[0])

    if i==34:
        break

### 어떤 방법으로 augmentation 해주는 것이 좋을까..

1. crop
2. 좌우 반전
3. cutmix
    - 음 다른 연령대랑 cutmix하는 건 음 괜찮은가?
    - 마스크 쓴거랑 안쓴거 cutmix로 하면 incorrect라고 해야하는 건가?
4. 밝기 조절

**Albumentations**
<br/>https://github.com/albumentations-team/albumentations

In [None]:
!pip install -U albumentations

In [None]:
import albumentations as A
import cv2

# Declare an augmentation pipeline
transform = A.Compose([
    A.RandomCrop(width=256, height=256),
    A.ShiftScaleRotate(p=1),
    A.RandomBrightnessContrast(p=2),
])

# Read an image with OpenCV and conver it to the RGB colorspace
image = cv2.imread('/opt/ml/input/data/train/images/000523_female_Asian_51/incorrect_mask.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Augment an image
transformed = transform(image=image)
transformed_image = transformed['image']

In [None]:
fig = plt.figure()

ax1 = fig.add_subplot(1, 2, 1)
ax1.imshow(image) # H x W x C
ax1.set_title('original imgae')

ax2 = fig.add_subplot(1, 2, 2)
ax2.imshow(transformed_image)
ax2.set_title('transformed image')

plt.show()

흠.. 45도 회전(ShiftScaleRotate).. 할까 말까