## 0. Libarary 불러오기 및 경로설정

In [3]:
import os
import pandas as pd
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize
import torchvision.models as models
from tqdm.notebook import tqdm

In [4]:
# 테스트 데이터셋 폴더 경로를 지정해주세요.
train_dir = '/opt/ml/input/data/train'

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
print(device)

cuda:0


## 1. Model 정의

In [6]:
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes=3, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))


In [7]:
net_age = models.resnet152(pretrained=True)
net_gender = models.resnet152(pretrained=True)
net_mask = models.resnet152(pretrained=True)
# net_age = models.resnet18(pretrained=True)
# net_gender = models.resnet18(pretrained=True)
# net_mask = models.resnet18(pretrained=True)

In [8]:
net_age.fc = torch.nn.Linear(in_features=2048, out_features=100, bias=True)
net_age.to(device)
net_gender.fc = torch.nn.Linear(in_features=2048, out_features=2, bias=True)
net_gender.to(device)
net_mask.fc = torch.nn.Linear(in_features=2048, out_features=3, bias=True)
net_mask.to(device)
LEARNING_RATE = 0.0001

loss_fn = LabelSmoothingLoss() 
optim_age = torch.optim.Adam(net_age.parameters(), lr=LEARNING_RATE)
torch.nn.init.xavier_uniform_(net_age.fc.weight)
optim_gender = torch.optim.Adam(net_gender.parameters(), lr=LEARNING_RATE)
torch.nn.init.xavier_uniform_(net_gender.fc.weight)
optim_mask = torch.optim.Adam(net_mask.parameters(), lr=LEARNING_RATE)
torch.nn.init.xavier_uniform_(net_mask.fc.weight)

Parameter containing:
tensor([[-0.0400,  0.0018, -0.0406,  ...,  0.0374, -0.0032, -0.0401],
        [ 0.0489, -0.0444,  0.0136,  ...,  0.0277,  0.0516,  0.0035],
        [-0.0239,  0.0276,  0.0374,  ...,  0.0131,  0.0393,  0.0204]],
       device='cuda:0', requires_grad=True)

## 2. Train Dataset 정의

#### age

In [9]:
class TrainAge(Dataset):
    def __init__(self, train_path, transform):
        img_path = os.path.join(train_path, 'images')
        self.transform = transform
        self.traininfo = pd.read_csv(os.path.join(train_path, 'train.csv'))
        img_paths = img_path + '/' + self.traininfo['path']
        file_names = []
        for path in img_paths:
            names = os.listdir(path)
            names = [name for name in names if name[0] != '.']
            file_names.append(names)

        self.X = []
        self.Y = []
        for i in range(len(self.traininfo['path'])):
            for name in file_names[i]:
                self.X.append(os.path.join(img_paths[i], name))
                self.Y.append(int(self.traininfo['age'][i]))

    def __getitem__(self, index):
        image = Image.open(self.X[index])
        if self.transform:
            image = self.transform(image)
        target = self.Y[index]
        return image, target

    def _classify(self, age):
        if age < 30 :
            return 0
        elif 30 <= age < 60:
            return 1
        else:
            return 2    

    def __len__(self):
        return len(self.X) 

#### gender

In [10]:
class TrainGender(Dataset):
    def __init__(self, train_path, transform):
        img_path = os.path.join(train_path, 'images')
        self.transform = transform
        self.traininfo = pd.read_csv(os.path.join(train_path, 'train.csv'))
        img_paths = img_path + '/' + self.traininfo['path']
        file_names = []
        for path in img_paths:
            names = os.listdir(path)
            names = [name for name in names if name[0] != '.']
            file_names.append(names)

        self.X = []
        self.Y = []
        self.filter = {'male' : 0, 'female' : 1}
        for i in range(len(self.traininfo['path'])):
            for name in file_names[i]:
                self.X.append(os.path.join(img_paths[i], name))
                self.Y.append(self.filter[self.traininfo['gender'][i]])

    def __getitem__(self, index):
        image = Image.open(self.X[index])
        if self.transform:
            image = self.transform(image)
        target = self.Y[index]
        return image, target

    def __len__(self):
        return len(self.X) 

#### mask

In [11]:
class TrainMask(Dataset):
    def __init__(self, train_path, transform):
        img_path = os.path.join(train_path, 'images')
        self.transform = transform
        self.traininfo = pd.read_csv(os.path.join(train_path, 'train.csv'))
        img_paths = img_path + '/' + self.traininfo['path']
        file_names = []
        for path in img_paths:
            names = os.listdir(path)
            names = [name for name in names if name[0] != '.']
            file_names.append(names)

        self.X = []
        self.Y = []
        for i in range(len(self.traininfo['path'])):
            for name in file_names[i]:
                self.X.append(os.path.join(img_paths[i], name))
                self.Y.append(self._classify(name[0]))

    def __getitem__(self, index):
        image = Image.open(self.X[index])
        if self.transform:
            image = self.transform(image)
        target = self.Y[index]
        return image, target

    def _classify(self, mask):
        if mask == 'm':
            return 0
        elif mask == 'i':
            return 1
        else:
            return 2
            
    def __len__(self):
        return len(self.X) 

In [12]:
transform = transforms.Compose([
    Resize((512, 384), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
])
age_train = TrainAge(train_dir,transform)
gender_train = TrainGender(train_dir,transform)
mask_train = TrainMask(train_dir,transform)
age_train[0]

(tensor([[[ 1.2647,  1.2647,  1.2647,  ...,  1.5000,  1.5000,  1.5000],
          [ 1.2647,  1.2647,  1.2647,  ...,  1.5000,  1.5000,  1.5000],
          [ 1.2647,  1.2647,  1.2647,  ...,  1.4804,  1.5000,  1.5000],
          ...,
          [-0.0098, -0.0294, -0.0294,  ...,  0.4608,  0.5588,  0.5980],
          [-0.3431, -0.3627, -0.3627,  ...,  0.4804,  0.5588,  0.6176],
          [-0.8137, -0.8137, -0.8137,  ...,  0.4804,  0.5784,  0.6176]],
 
         [[ 1.2451,  1.2451,  1.2451,  ...,  1.4804,  1.4804,  1.4804],
          [ 1.2451,  1.2451,  1.2451,  ...,  1.4804,  1.4804,  1.4804],
          [ 1.2451,  1.2451,  1.2451,  ...,  1.4608,  1.4804,  1.4804],
          ...,
          [-0.9314, -0.9510, -0.9510,  ..., -0.5196, -0.4804, -0.4412],
          [-1.2647, -1.2843, -1.2843,  ..., -0.5000, -0.4608, -0.4020],
          [-1.7353, -1.7353, -1.7353,  ..., -0.5000, -0.4412, -0.4020]],
 
         [[ 1.2059,  1.2059,  1.2059,  ...,  1.4412,  1.4412,  1.4412],
          [ 1.2059,  1.2059,

In [13]:
NUM_EPOCH = 3
BATCH_SIZE = 32
save_dir = '/opt/ml/v2/vo'
age_dataloader = torch.utils.data.DataLoader(age_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
gender_dataloader = torch.utils.data.DataLoader(gender_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
mask_dataloader = torch.utils.data.DataLoader(mask_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

## 3. Train

In [14]:
best_test_accuracy = 0.
best_test_loss = 9999.
for epoch in range(NUM_EPOCH):
  running_loss = 0.
  running_acc = 0.
  net_age.train()
  for ind, (images, labels) in enumerate(tqdm(age_dataloader)):
    images = images.to(device)
    labels = labels.to(device)
    optim_age.zero_grad() # parameter gradient를 업데이트 전 초기화함
    logits = net_age(images)
    _, preds = torch.max(logits, 1) # 모델에서 linear 값으로 나오는 예측 값 ([0.9,1.2, 3.2,0.1,-0.1,...])을 최대 output index를 찾아 예측 레이블([2])로 변경함 
    loss = loss_fn(logits, labels.long())
    loss.backward() # 모델의 예측 값과 실제 값의 CrossEntropy 차이를 통해 gradient 계산
    optim_age.step() # 계산된 gradient를 가지고 모델 업데이트
    running_loss += loss.item() * images.size(0) # 한 Batch에서의 loss 값 저장
    running_acc += torch.sum(preds == labels.data) # 한 Batch에서의 Accuracy 값 저장
  # 한 epoch이 모두 종료되었을 때,
  epoch_loss = running_loss / len(age_dataloader.dataset)
  epoch_acc = running_acc / len(age_dataloader.dataset)
  print(f"현재 epoch-{epoch}의 평균 Loss : {epoch_loss:.3f}, 평균 Accuracy : {epoch_acc:.3f}")
  if best_test_accuracy < epoch_acc: # phase가 test일 때, best accuracy 계산
    best_test_accuracy = epoch_acc
  if best_test_loss > epoch_loss: # phase가 test일 때, best loss 계산
    best_test_loss = epoch_loss
print("age 학습 종료!")
print(f"최고 accuracy : {best_test_accuracy}, 최고 낮은 loss : {best_test_loss}")
torch.save(net_age.state_dict(), f"{save_dir}/age.pth")

best_test_accuracy = 0.
best_test_loss = 9999.
for epoch in range(NUM_EPOCH):
  running_loss = 0.
  running_acc = 0.
  net_gender.train()
  for ind, (images, labels) in enumerate(tqdm(gender_dataloader)):
    images = images.to(device)
    labels = labels.to(device)
    optim_gender.zero_grad() # parameter gradient를 업데이트 전 초기화함
    logits = net_gender(images)
    _, preds = torch.max(logits, 1) # 모델에서 linear 값으로 나오는 예측 값 ([0.9,1.2, 3.2,0.1,-0.1,...])을 최대 output index를 찾아 예측 레이블([2])로 변경함 
    loss = loss_fn(logits, labels.long())
    loss.backward() # 모델의 예측 값과 실제 값의 CrossEntropy 차이를 통해 gradient 계산
    optim_gender.step() # 계산된 gradient를 가지고 모델 업데이트
    running_loss += loss.item() * images.size(0) # 한 Batch에서의 loss 값 저장
    running_acc += torch.sum(preds == labels.data) # 한 Batch에서의 Accuracy 값 저장
  # 한 epoch이 모두 종료되었을 때,
  epoch_loss = running_loss / len(gender_dataloader.dataset)
  epoch_acc = running_acc / len(gender_dataloader.dataset)
  print(f"현재 epoch-{epoch}의 평균 Loss : {epoch_loss:.3f}, 평균 Accuracy : {epoch_acc:.3f}")
  if best_test_accuracy < epoch_acc: # phase가 test일 때, best accuracy 계산
    best_test_accuracy = epoch_acc
  if best_test_loss > epoch_loss: # phase가 test일 때, best loss 계산
    best_test_loss = epoch_loss
print("gender 학습 종료!")
print(f"최고 accuracy : {best_test_accuracy}, 최고 낮은 loss : {best_test_loss}")
torch.save(net_gender.state_dict(), f"{save_dir}/gender.pth")



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1628.0), HTML(value='')))


현재 epoch-0의 평균 Loss : 1.068, 평균 Accuracy : 0.674


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1628.0), HTML(value='')))


현재 epoch-1의 평균 Loss : 0.213, 평균 Accuracy : 0.940


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1628.0), HTML(value='')))


현재 epoch-2의 평균 Loss : 0.108, 평균 Accuracy : 0.969
age 학습 종료!
최고 accuracy : 0.9690476059913635, 최고 낮은 loss : 0.10788756691337135


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1628.0), HTML(value='')))


현재 epoch-0의 평균 Loss : 0.054, 평균 Accuracy : 0.981


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1628.0), HTML(value='')))


현재 epoch-1의 평균 Loss : 0.020, 평균 Accuracy : 0.993


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1628.0), HTML(value='')))


현재 epoch-2의 평균 Loss : 0.017, 평균 Accuracy : 0.994
gender 학습 종료!
최고 accuracy : 0.9940860271453857, 최고 낮은 loss : 0.016571665326224325


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1628.0), HTML(value='')))




RuntimeError: CUDA out of memory. Tried to allocate 48.00 MiB (GPU 0; 31.75 GiB total capacity; 22.85 GiB already allocated; 42.50 MiB free; 22.91 GiB reserved in total by PyTorch)

In [None]:
best_test_accuracy = 0.
best_test_loss = 9999.
for epoch in range(NUM_EPOCH):
  running_loss = 0.
  running_acc = 0.
  net_mask.train()
  for ind, (images, labels) in enumerate(tqdm(mask_dataloader)):
    images = images.to(device)
    labels = labels.to(device)
    optim_mask.zero_grad() # parameter gradient를 업데이트 전 초기화함
    logits = net_mask(images)
    _, preds = torch.max(logits, 1) # 모델에서 linear 값으로 나오는 예측 값 ([0.9,1.2, 3.2,0.1,-0.1,...])을 최대 output index를 찾아 예측 레이블([2])로 변경함 
    loss = loss_fn(logits, labels.long())
    loss.backward() # 모델의 예측 값과 실제 값의 CrossEntropy 차이를 통해 gradient 계산
    optim_mask.step() # 계산된 gradient를 가지고 모델 업데이트
    running_loss += loss.item() * images.size(0) # 한 Batch에서의 loss 값 저장
    running_acc += torch.sum(preds == labels.data) # 한 Batch에서의 Accuracy 값 저장
  # 한 epoch이 모두 종료되었을 때,
  epoch_loss = running_loss / len(mask_dataloader.dataset)
  epoch_acc = running_acc / len(mask_dataloader.dataset)
  print(f"현재 epoch-{epoch}의 평균 Loss : {epoch_loss:.3f}, 평균 Accuracy : {epoch_acc:.3f}")
  if best_test_accuracy < epoch_acc: # phase가 test일 때, best accuracy 계산
    best_test_accuracy = epoch_acc
  if best_test_loss > epoch_loss: # phase가 test일 때, best loss 계산
    best_test_loss = epoch_loss
print("mask 학습 종료!")
print(f"최고 accuracy : {best_test_accuracy}, 최고 낮은 loss : {best_test_loss}")
torch.save(net_mask.state_dict(), f"{save_dir}/mask.pth")

## 4.Inference

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
test_dir = '/opt/ml/input/data/eval'

In [None]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [None]:
# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
transform = transforms.Compose([
    Resize((512, 384), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
])
dataset = TestDataset(image_paths, transform)

loader = DataLoader(
    dataset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
device = torch.device('cuda')
net_age.eval()
net_gender.eval()
net_mask.eval()


# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
preds_age = []
preds_gender = []
preds_mask = []
for images in loader:
    with torch.no_grad():
        images = images.to(device)
        pred = net_age(images)
        pred = pred.argmax(dim=-1)
        preds_age.extend(pred.cpu().numpy())

        pred = net_gender(images)
        pred = pred.argmax(dim=-1)
        preds_gender.extend(pred.cpu().numpy())

        pred = net_mask(images)
        pred = pred.argmax(dim=-1)
        preds_mask.extend(pred.cpu().numpy())

values = list(zip(preds_age,preds_gender,preds_mask))
answers = []
for value in values:
    print(value)
    age, gender, mask = value
    if age < 30 :
            c = 0
    elif 30 <= age < 60:
        c = 1
    else:
        c = 2
    if gender == 1:
        c += 3
    if mask == 1:
        c += 6
    elif mask == 2:
        c += 12
    print(f'age : {age}, gender: {gender}, mask : {mask}, class : {c}')
    answers.append(c)
submission['ans'] = answers

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission4.csv'), index=False)
print('test inference is done!')