## Set Environment

In [1]:
# Check GPU
!nvidia-smi

Wed Mar  3 09:29:01 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    25W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [1]:
# Google Drive Mount
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Install EfficientNet
!pip install efficientnet_pytorch



In [3]:
# Import libraries
import pandas as pd
import numpy as np
import cv2
import os
import PIL
import random
import glob
import time
import pickle
from tqdm import tqdm
from sklearn.model_selection import KFold
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset
from efficientnet_pytorch import EfficientNet

# Set random seed
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
print(f'seed : {seed}')

# Set path
'''
해당 경로는 다음과 같은 구조를 가정합니다.
MyDrive
└── dirty_mnist
     ├── model
     └── data
          ├── train_data
          │   └── 00000~49999.png
          └── test_data
               └── 50000~54999.png
'''
os.chdir('/content/drive/MyDrive/dirty_mnist')
ROOT_PATH = os.getcwd()
model_path = os.path.join(ROOT_PATH, 'model')
train_data_path = os.path.join(ROOT_PATH, 'data/train_data')
test_data_path = os.path.join(ROOT_PATH, 'data/test_data')
print(f'ROOT_PATH : {ROOT_PATH}')
print(f'model_path : {model_path}')
print(f'train_data_path : {train_data_path}')
print(f'test_data_path : {test_data_path}')

# Set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'device : {device}')
print(torch.cuda.get_device_properties(device))

seed : 42
ROOT_PATH : /content/drive/MyDrive/dirty_mnist
model_path : /content/drive/MyDrive/dirty_mnist/model
train_data_path : /content/drive/MyDrive/dirty_mnist/data/train_data
test_data_path : /content/drive/MyDrive/dirty_mnist/data/test_data
device : cuda:0
_CudaDeviceProperties(name='Tesla V100-SXM2-16GB', major=7, minor=0, total_memory=16160MB, multi_processor_count=80)


## Load Train Data

In [5]:
# Load labels for train
df_labels = pd.read_csv('data/dirty_mnist_2nd_answer.csv')
labels = np.array(df_labels.values[:, 1:])
print(labels)

[[1 1 0 ... 1 1 1]
 [1 0 0 ... 0 1 1]
 [0 0 0 ... 1 1 0]
 ...
 [0 1 0 ... 1 0 0]
 [0 1 1 ... 1 0 0]
 [1 0 1 ... 1 0 0]]


In [6]:
# Set images path
'''
구글 드라이브에서 Load 하는데 시간이 오래 걸리는 관계로, .pkl file로 관리합니다.
해당 파일이 없는 경우에는 모든 데이터를 로드 한 후 file로 저장하며,
해당 파일이 있는 경우에는 .pkl file을 로드합니다.
'''

if os.path.isfile('Train_Img_path.pkl') :
    with open('Train_Img_path.pkl', 'rb') as f :
        imgs_path = pickle.load(f)
else :
    imgs_path = np.array(sorted(glob.glob(os.path.join(train_data_path, '*.png'))))
    with open('Train_Img_path.pkl', 'wb') as f :
        pickle.dump(imgs_path, f)

print(imgs_path)

['/content/drive/MyDrive/dirty_mnist/data/train_data/00000.png'
 '/content/drive/MyDrive/dirty_mnist/data/train_data/00001.png'
 '/content/drive/MyDrive/dirty_mnist/data/train_data/00002.png' ...
 '/content/drive/MyDrive/dirty_mnist/data/train_data/49997.png'
 '/content/drive/MyDrive/dirty_mnist/data/train_data/49998.png'
 '/content/drive/MyDrive/dirty_mnist/data/train_data/49999.png']


In [7]:
# Load Images
'''
구글 드라이브에서 Load 하는데 시간이 오래 걸리는 관계로, Image data를 하나의 .npy file로 관리합니다.
해당 파일이 없는 경우에는 모든 데이터를 로드한 후 file로 저장하며,
해당 파일이 있는 경우에는 np.array 객체를 로드합니다.
이 방법으로 평균 2~3분 남짓한 시간으로 이미지 파일을 로드할 수 있는 장점이 있습니다.
단, file이 9GB이며 로드 시 메모리를 약 15GB 이상 사용합니다.
'''

if os.path.isfile('Imgs_Numpy.npy') :
    st = time.time()
    imgs = np.load('Imgs_Numpy.npy')
    print(imgs.shape)
    print(int(time.time()-st),'sec')
else :
    imgs = []
    for img_file in tqdm(imgs_path) :
        img = cv2.imread(img_file, cv2.IMREAD_COLOR)
        imgs.append(img)
    imgs = np.array(imgs)
    np.save('Imgs_Numpy.npy', imgs)

(50000, 256, 256, 3)
108 sec


In [4]:
# Load train and validation index
'''
재현 등의 이슈에 대처하기 위해, KFold로 분리된 index를 file로 관리합니다.
해당 파일이 없는 경우에는 KFold 수행 후 index를 가진 객체를 file로 저장하며,
해당 파일이 있는 경우에는 List[Tuple[np.array, np.array]]형태로 파일을 로드합니다.
이 방법으로 세션이나 런타임의 종료 등의 이슈가 생기더라도 매번 동일한 데이터 사용을 보장합니다.
'''

if os.path.isfile('Train_KFold.pkl') :
    with open('Train_KFold.pkl', 'rb') as f :
        folds = pickle.load(f)
else : 
    kf = KFold(n_splits=5, shuffle=True, random_state=seed)
    folds = []
    for train_idx, valid_idx in kf.split(imgs) :
        folds.append((train_idx, valid_idx))
    with open('Train_KFold.pkl', 'wb') as f :
        pickle.dump(folds, f)

folds

[(array([    0,     1,     2, ..., 49997, 49998, 49999]),
  array([    4,     6,     7, ..., 49988, 49990, 49992])),
 (array([    2,     3,     4, ..., 49997, 49998, 49999]),
  array([    0,     1,    11, ..., 49979, 49986, 49993])),
 (array([    0,     1,     2, ..., 49994, 49995, 49997]),
  array([    3,     8,    14, ..., 49996, 49998, 49999])),
 (array([    0,     1,     2, ..., 49997, 49998, 49999]),
  array([    5,    15,    18, ..., 49989, 49994, 49995])),
 (array([    0,     1,     3, ..., 49996, 49998, 49999]),
  array([    2,     9,    10, ..., 49981, 49985, 49997]))]

## Define Dataset

In [9]:
# Define Dataset
'''
메모리에 로드 되어 있는 np.array를 사용하는 Dataset을 정의합니다.
이미지는 3개의 채널인 COLOR로 로드되어 있습니다.
transform이 있는 경우에만 transform을 수행하며,
label이 있는 경우에는 image와 label을 반환하고,
label이 없는 경우에는 image만 반환합니다.
'''

class MnistDataset(Dataset) :
    def __init__(self, imgs=None, labels=None, transform=None) :
        self.imgs = imgs
        self.labels = labels
        self.transform = transform
    
    def __len__(self) :
        return len(self.imgs)
    
    def __getitem__(self, idx) :
        img = self.imgs[idx]
        if self.transform is not None :
            img = self.transform(img)
        if self.labels is not None :
            label = torch.FloatTensor(self.labels[idx])
            return img, label
        else :
            return img

## Define Model

In [5]:
# Define model
'''
EfficientNet b7을 사용했습니다.
Task에 맞는 결과를 위해 26차원으로 변환하는 Linear Layer 추가합니다.
Activation function은 silu를 사용했습니다.
'''

class MnistEfficientNet(nn.Module) :
    def __init__(self, in_channels) :
        super(MnistEfficientNet, self).__init__()
        self.EffNet = EfficientNet.from_pretrained('efficientnet-b7', in_channels=in_channels)
        self.FC = nn.Linear(1000, 26)
    
    def forward(self, x) :
        x = F.silu(self.EffNet(x))
        x = torch.sigmoid(self.FC(x))
        return x

## Set Train Options

In [11]:
# Set augmentation and transform
'''
실험을 통해 좋은 성과를 낸다고 판단된 augmentation을 train_set에 적용했습니다.
모델의 학습 환경에서 augmentation은 제외하고, transform만 동일하게 valid_set에 적용합니다.
'''

train_transform = T.Compose([
    T.ToPILImage(),
    T.Resize((331,331)),
    T.RandomHorizontalFlip(p=0.6),
    T.RandomVerticalFlip(p=0.6),
    T.RandomRotation(40),
    T.ToTensor(),
    T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

valid_transform = T.Compose([
    T.ToPILImage(),
    T.Resize((331,331)),
    T.ToTensor(),
    T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

In [12]:
# Set hyper parameters
'''
모델의 크기와 resize와 같은 이유로 작은 batch size를 사용했습니다.
세션의 종료 등의 이슈로 체크포인트로부터 학습을 재개하는 경우 lr의 값을 변경할 필요가 있습니다.
'''

batch_size = 8
lr = 0.001  ## if started in checkpoint change this (ex. lr = 0.001 * (0.75 ** 5))
epochs = 25
lr_scheduler_step = 5
lr_scheduler_gamma = 0.75

## Training

In [13]:
# Set train env
'''
여러 세션을 통해 학습을 진행할 경우, now_train_folds에 원하는 fold만 기재하여 학습할 수 있습니다.
이를 통해 여러 모델이 동시에 각 fold로 학습을 수행할 수 있습니다.
'''

now_train_folds = [0, 1, 2, 3, 4]  ## if started in checkpoint change this to 0~4 (ex. now_train_folds = [4])
torch.cuda.empty_cache()

In [16]:
# Train in fold
'''
체크포인트로부터 학습을 재개하는 경우 ##로 표시된 부분을 변경할 필요가 있습니다.
체크포인트를 로드 할 수 있도록 파일 명을 기재해야 합니다. (model directory 참고)
체크포인트의 val_loss값을 valid_loss_min으로 설정해야 합니다.
체크포인트의 epoch만큼 pass한 후 학습되도록 설정해야 합니다.

validation 수행 시 해당 epoch의 평균 loss가 계산되도록 설정해야 합니다.
valid_loss가 valid_loss_min보다 작은 경우 더 좋은 모델로 판단하고,
해당 폴드의 이전 모델을 0byte로 만들고 삭제한 후 모델의 state_dict를 저장합니다.
'''

for fold in now_train_folds :
    # Modeling
    model = MnistEfficientNet(in_channels=3).to(device)
    # model.load_state_dict(torch.load(''))  ## if started in checkpoint change this to best model of now fold (ex. 'model/4fold_24epoch_0.1989_silu.pth')
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer, step_size=lr_scheduler_step, gamma=lr_scheduler_gamma)
    criterion = torch.nn.BCELoss()

    # Data
    train_idx = folds[fold][0]
    valid_idx = folds[fold][1]
    train_dataset = MnistDataset(imgs=imgs[train_idx], labels=labels[train_idx], transform=train_transform)
    valid_dataset = MnistDataset(imgs=imgs[valid_idx], labels=labels[valid_idx], transform=valid_transform)
    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(dataset=valid_dataset, batch_size=batch_size//4, shuffle=False)

    valid_loss_min = 999  ## if started in checkpoint change this to best model's loss (ex. valid_loss_min = 0.1989)
    for epoch in range(epochs) :
        if epoch <= -1 :  ## if started in checkpoint change this to best model's epoch (ex. epoch <= 24)
            continue
        # Train
        train_acc_list = []
        train_loss_list = []
        with tqdm(train_loader,
                total=train_loader.__len__(),
                unit='batch') as train_bar :
            for img, label in train_bar :
                train_bar.set_description(f'Train Epoch {epoch+1} / {epochs}')
                X = img.to(device)
                y = label.to(device)

                optimizer.zero_grad()
                model.train()
                y_probs = model(X)
                loss = criterion(y_probs, y)
                loss.backward()
                optimizer.step()

                y_probs = y_probs.cpu().detach().numpy()
                label = label.detach().numpy()
                y_preds = y_probs > 0.5
                batch_acc = (label == y_preds).mean()
                train_acc_list.append(batch_acc)
                train_acc = np.mean(train_acc_list)
                train_loss_list.append(loss.item())
                train_loss = np.mean(train_loss_list)
                train_bar.set_postfix(train_loss = train_loss,
                                        train_acc = train_acc)
        
        # Valid
        valid_acc_list = []
        valid_loss_list = []
        with tqdm(valid_loader,
                total=valid_loader.__len__(),
                unit='batch') as valid_bar :
            for img, label in valid_bar :
                valid_bar.set_description(f'Valid Epoch {epoch+1} / {epochs}')
                X = img.to(device)
                y = label.to(device)

                optimizer.zero_grad()
                model.eval()
                y_probs = model(X)
                loss = criterion(y_probs, y)

                y_probs = y_probs.cpu().detach().numpy()
                label = label.detach().numpy()
                y_preds = y_probs > 0.5
                batch_acc = (label == y_preds).mean()
                valid_acc_list.append(batch_acc)
                valid_acc = np.mean(valid_acc_list)
                valid_loss_list.append(loss.item())
                valid_loss = np.mean(valid_loss_list)
                valid_bar.set_postfix(valid_loss = valid_loss,
                                        valid_acc = valid_acc)
        
        lr_scheduler.step()

        if valid_loss < valid_loss_min :
            valid_loss_min = valid_loss
            for f in glob.glob(os.path.join(model_path, str(fold)+'*_silu.pth')) :  # if you want to train another model, change this
                open(os.path.join(model_path, f), 'w').close()
                os.remove(os.path.join(model_path, f))
            torch.save(model.state_dict(), f'{model_path}/{fold}fold_{epoch}epoch_{valid_loss:2.4f}_silu.pth') # if you want to train another model, change this

Loaded pretrained weights for efficientnet-b7


Train Epoch 25 / 25: 100%|██████████| 5000/5000 [33:18<00:00,  2.50batch/s, train_acc=0.928, train_loss=0.187]
Valid Epoch 25 / 25: 100%|██████████| 5000/5000 [06:21<00:00, 13.09batch/s, valid_acc=0.928, valid_loss=0.191]


## Set Test Data

In [6]:
# Load test images path
test_imgs_path = np.array(sorted(glob.glob(os.path.join(test_data_path, '*.png'))))
print(test_imgs_path)

['/content/drive/MyDrive/dirty_mnist/data/test_data/50000.png'
 '/content/drive/MyDrive/dirty_mnist/data/test_data/50001.png'
 '/content/drive/MyDrive/dirty_mnist/data/test_data/50002.png' ...
 '/content/drive/MyDrive/dirty_mnist/data/test_data/54997.png'
 '/content/drive/MyDrive/dirty_mnist/data/test_data/54998.png'
 '/content/drive/MyDrive/dirty_mnist/data/test_data/54999.png']


In [7]:
# Define test dataset
'''
메모리 부족 이슈를 피하기 위해, file로부터 load하는 Dataset을 정의합니다.
이미지는 3개의 채널인 COLOR로 로드합니다.
'''

class MnistDatasetFromFiles(Dataset) :
    def __init__(self, imgs_dir=None, labels=None, transform=None) :
        self.imgs_dir = imgs_dir
        self.labels = labels
        self.transform = transform
    
    def __len__(self) :
        return len(self.imgs_dir)
    
    def __getitem__(self, idx) :
        img = cv2.imread(self.imgs_dir[idx], cv2.IMREAD_COLOR)
        if self.transform is not None :
            img = self.transform(img)
        if self.labels is not None :
            labels = torch.FloatTensor(self.labels[idx])
            return img, labels
        else :
            return img

## Set Testing Environment

In [8]:
# Set transform
test_transform = T.Compose([
    T.ToPILImage(),
    T.Resize((331,331)),
    T.ToTensor(),
    T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

# Set dataset and dataloader
test_dataset = MnistDatasetFromFiles(imgs_dir=test_imgs_path, transform=test_transform)
test_data_loader = DataLoader(dataset=test_dataset, batch_size=8, shuffle=False)

In [13]:
# Load best model's state dict
best_models = []
for f in glob.glob('model/*.pth') :
    best_models.append(f)

best_models

['model/1fold_26epoch_0.1908_silu.pth',
 'model/3fold_24epoch_0.1937_silu.pth',
 'model/2fold_26epoch_0.1921_silu.pth',
 'model/4fold_24epoch_0.1911_silu.pth']

## Inference

In [14]:
# Inference
'''
Team Ensemble을 위해 최종 결과 대신, 확률분포를 .npy file 형태로 저장합니다.
팀원의 각 Inference를 Ensemble하여 최종 결과를 구현했습니다.
단, Private 최종 스코어는 본 코드의 결과물을 제외하고 Ensemble한 결과임을 알립니다.
'''

probs_list = []
preds_list = []
df_prediction = pd.read_csv(os.path.join(ROOT_PATH, "data/sample_submission.csv"))

for model_sd in best_models :
    model = MnistEfficientNet(in_channels=3).to(device)
    model.load_state_dict(torch.load(str(model_sd)))
    probs_array = np.zeros([df_prediction.shape[0], df_prediction.shape[1] -1])
    preds_array = np.zeros([df_prediction.shape[0], df_prediction.shape[1] -1])
    model.to(device)
    with tqdm(test_data_loader,
              total=test_data_loader.__len__(),
              unit='batch') as test_bar :
        for idx, img in enumerate(test_bar) :
            model.eval()
            img = img.to(device)
            probs = model(img)
            probs = probs.cpu().detach().numpy()
            preds = probs > 0.5

            batch_index = 8 * idx
            probs_array[batch_index : batch_index + img.shape[0], :] = probs
            preds_array[batch_index : batch_index + img.shape[0], :] = preds
        
        probs_list.append(probs_array[...,np.newaxis])
        preds_list.append(preds_array[...,np.newaxis])


probs_array = np.concatenate(probs_list, axis = 2)
probs_mean = probs_array.mean(axis = 2)
np.save('b7_silu_probs_mean.npy', probs_mean)

preds_array = np.concatenate(preds_list, axis = 2)
preds_mean = preds_array.mean(axis = 2)
np.save('b7_silu_preds_mean.npy', preds_mean)

Loaded pretrained weights for efficientnet-b7


100%|██████████| 625/625 [45:38<00:00,  4.38s/batch]


Loaded pretrained weights for efficientnet-b7


100%|██████████| 625/625 [01:21<00:00,  7.64batch/s]


Loaded pretrained weights for efficientnet-b7


100%|██████████| 625/625 [01:21<00:00,  7.65batch/s]


Loaded pretrained weights for efficientnet-b7


100%|██████████| 625/625 [01:21<00:00,  7.65batch/s]


In [15]:
probs_mean

array([[0.99812491, 0.00925773, 0.99730143, ..., 0.99830039, 0.03750102,
        0.84398993],
       [0.03853523, 0.4627129 , 0.66287056, ..., 0.00893928, 0.24977124,
        0.03209148],
       [0.02157063, 0.02526641, 0.61355481, ..., 0.00879913, 0.01638424,
        0.99865368],
       ...,
       [0.82898347, 0.01906913, 0.04364971, ..., 0.00350274, 0.11315994,
        0.99790683],
       [0.04948841, 0.0143809 , 0.9129397 , ..., 0.04467394, 0.10395029,
        0.85323882],
       [0.99786201, 0.93398415, 0.03226112, ..., 0.99815553, 0.99981292,
        0.99963774]])

In [16]:
preds_mean

array([[1.  , 0.  , 1.  , ..., 1.  , 0.  , 0.75],
       [0.  , 0.5 , 0.75, ..., 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.5 , ..., 0.  , 0.  , 1.  ],
       ...,
       [1.  , 0.  , 0.  , ..., 0.  , 0.  , 1.  ],
       [0.  , 0.  , 1.  , ..., 0.  , 0.  , 1.  ],
       [1.  , 1.  , 0.  , ..., 1.  , 1.  , 1.  ]])

In [17]:
# if you want to get prediction
'''
본 코드가 팀의 최종 결과물이 아님을 명시합니다.
이 코드의 결과물을 원하는 경우, 이 셀을 실행하여 제출이 가능한 file을 얻을 수 있습니다.
'''

prediction = (probs_mean > 0.5) * 1  # also you can use "preds_mean > 0.5"
df_prediction.iloc[:, 1:] = prediction
df_prediction.to_csv('b7_silu_prediction.csv', index=False)
df_prediction

Unnamed: 0,index,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z
0,50000,1,0,1,0,1,1,0,1,1,0,0,0,0,1,0,1,0,0,0,1,1,1,0,1,0,1
1,50001,0,0,1,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,0,1,1,1,0,0,0
2,50002,0,0,1,1,1,0,1,0,1,0,1,1,0,0,0,1,0,1,1,0,1,0,1,0,0,1
3,50003,1,1,0,0,0,1,1,0,1,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,0,1
4,50004,0,0,1,0,1,1,0,0,0,0,0,1,1,0,1,0,1,0,1,1,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,54995,0,1,1,1,0,0,0,1,0,0,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,0
4996,54996,1,1,1,0,1,0,0,0,1,1,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1
4997,54997,1,0,0,1,0,1,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1,1,1,0,0,1
4998,54998,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,0,1,1,0,1,0,0,1,0,0,1
