# [모의 캐글-의료] 흉부 CT 코로나 감염 여부 분류
- 이미지 binary 분류 과제
- 담당: 이녕민M

## Import Libraries

In [1]:
import os, torch, copy, cv2, sys, random, math
# from datetime import datetime, timezone, timedelta
from PIL import Image
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import gc
from torchvision import models
from sklearn.metrics import confusion_matrix

## Set Arguments & hyperparameters

In [2]:
# 시드(seed) 설정

RANDOM_SEED = 2022

torch.manual_seed(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

In [3]:
# parameters

### 데이터 디렉토리 설정 ###
DATA_DIR= '/USER/Taeyun/'
NUM_CLS = 1

EPOCHS = 30
BATCH_SIZE = 32
LEARNING_RATE = 0.0005
EARLY_STOPPING_PATIENCE = 5
INPUT_SHAPE = 384

os.environ["CUDA_VISIBLE_DEVICES"]="0"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dataloader

#### Train & Validation Set loader

In [112]:
####### 전체 upsample의 경우의 dataset
class CustomDataset(Dataset):
    def __init__(self, data_dir, mode, input_shape):
        self.data_dir = data_dir
        self.mode = mode
        self.input_shape = input_shape
        
        # Loading dataset
        self.db= self.data_loader()
        
        # Dataset split
        if self.mode == 'train':
            self.db = self.db.loc[:int(len(self.db)*0.7),:]
        elif self.mode == 'val':
            self.db = self.db.loc[int(len(self.db)*0.7):,:]
            self.db.reset_index(drop=True,inplace=True)
        else:
            print(f'!!! Invalid split {self.mode}... !!!')
            
        # Transform function
        self.transform = transforms.Compose([
                                             transforms.Resize(self.input_shape),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    def data_loader(self):
        print('Loading ' + self.mode + ' dataset..')
        if not os.path.isdir(self.data_dir):
            print(f'!!! Cannot find {self.data_dir}... !!!')
            sys.exit()
        
        # (COVID : 1, No : 0)
        db = pd.read_csv(os.path.join(self.data_dir, 'train.csv'))
        new_db = pd.read_csv(os.path.join(self.data_dir,'new.csv'))
        
        return pd.concat([new_db,db],axis=0,ignore_index=True)

    def __len__(self):
        return len(self.db)

    def __getitem__(self, index):
        data = copy.deepcopy(self.db.loc[index])
        flag = int(data['file_name'].split('.')[0])>=646
        # Loading image
        if not flag:
            cvimg = cv2.imread(os.path.join(self.data_dir,'train',data['file_name']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            cvimg = cv2.imread(os.path.join(self.data_dir,'newimg',data['file_name']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if not isinstance(cvimg, np.ndarray):
            raise IOError("Fail to read %s" % data['file_name'])

        # Preprocessing images
        trans_image = self.transform(Image.fromarray(cvimg))

        return trans_image, data['COVID']


## Model

## Utils
### EarlyStopper

In [5]:
class LossEarlyStopper():
    """Early stopper
    
    Attributes:
        patience (int): loss가 줄어들지 않아도 학습할 epoch 수
        patience_counter (int): loss 가 줄어들지 않을 때 마다 1씩 증가, 감소 시 0으로 리셋
        min_loss (float): 최소 loss
        stop (bool): True 일 때 학습 중단

    """

    def __init__(self, patience: int)-> None:
        self.patience = patience

        self.patience_counter = 0
        self.min_loss = np.Inf
        self.stop = False
        self.save_model = False

    def check_early_stopping(self, loss: float)-> None:
        """Early stopping 여부 판단"""  

        if self.min_loss == np.Inf:
            self.min_loss = loss
            return None

        elif loss > self.min_loss:
            self.patience_counter += 1
            msg = f"Early stopping counter {self.patience_counter}/{self.patience}"
            self.save_model=False

            if self.patience_counter == self.patience:
                self.stop = True
                
                
        elif loss <= self.min_loss:
            self.patience_counter = 0
            self.save_model = True
            msg = f"Validation loss decreased {self.min_loss} -> {loss}"
            self.min_loss = loss
        
        print(msg)

### Trainer

In [6]:
class Trainer():
    """ epoch에 대한 학습 및 검증 절차 정의"""
    
    def __init__(self, loss_fn, model, device, metric_fn, optimizer=None, scheduler=None):
        """ 초기화
        """
        self.loss_fn = loss_fn
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.metric_fn = metric_fn

    def train_epoch(self, dataloader, epoch_index):
        """ 한 epoch에서 수행되는 학습 절차"""
        self.model.train()
        self.model.cuda()
        train_total_loss = 0
        target_lst = []
        pred_lst = []
        prob_lst = []
        for batch_index, (img, label) in enumerate(dataloader):
            
            img = img.cuda()
            label = label.cuda().float()
            
            output,aux1,aux2 = self.model(img)
            output_loss = self.loss_fn(torch.sigmoid(output).view(-1),label)
            aux1_loss = self.loss_fn(torch.sigmoid(aux1).view(-1),label)
            aux2_loss = self.loss_fn(torch.sigmoid(aux2).view(-1),label)
            
            # 기존Resnet model에서의 output
            #pred = torch.sigmoid(self.model(img)).view(-1)
            
            loss = output_loss+0.3*(aux1_loss+aux1_loss)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            self.scheduler.step()
            
            train_total_loss += loss.item()
            #prob_lst.extend(pred[:, 1].cpu().tolist())
            target_lst.extend(label.cpu().tolist())
            pred_lst.extend((torch.sigmoid(output).view(-1)>0.5).int().cpu().tolist())
            #del pred
            
        self.train_mean_loss = train_total_loss / batch_index
        self.train_score, f1 = self.metric_fn(y_pred=pred_lst, y_answer=target_lst)
        msg = f'Epoch {epoch_index}, Train loss: {self.train_mean_loss}, Acc: {self.train_score}, F1-Macro: {f1}'
        del img
        del label
        gc.collect()
        torch.cuda.empty_cache()
        print(msg)

    def validate_epoch(self, dataloader, epoch_index):
        """ 한 epoch에서 수행되는 검증 절차
        """
        self.model.eval()
        self.model.cuda()
        val_total_loss = 0
        target_lst = []
        pred_lst = []
        prob_lst = []

        for batch_index, (img, label) in enumerate(dataloader):
            img = img.cuda()
            label = label.float().cuda()          
            
            pred = torch.sigmoid(self.model(img)).view(-1)
            
            loss = self.loss_fn(pred,label)
            val_total_loss += loss.item()
            #prob_lst.extend(pred[:, 1].cpu().tolist())
            target_lst.extend(label.cpu().tolist())
            pred_lst.extend((pred>0.5).int().cpu().tolist())
        self.val_mean_loss = val_total_loss / batch_index
        self.validation_score, f1 = self.metric_fn(y_pred=pred_lst, y_answer=target_lst)
        msg = f'Epoch {epoch_index}, Val loss: {self.val_mean_loss}, Acc: {self.validation_score}, F1-Macro: {f1}'
        print(confusion_matrix(target_lst,pred_lst))
        print(msg)
        del img
        del label
        gc.collect()
        torch.cuda.empty_cache()
        return self.validation_score

### Metrics

In [7]:
from sklearn.metrics import accuracy_score, f1_score

def get_metric_fn(y_pred, y_answer):
    """ 성능을 반환하는 함수"""
    
    assert len(y_pred) == len(y_answer), 'The size of prediction and answer are not same.'
    accuracy = accuracy_score(y_answer, y_pred)
    f1 = f1_score(y_answer, y_pred, average='macro')
    return accuracy, f1

## Train
### 학습을 위한 객체 선언

#### Load Dataset & Dataloader

In [8]:
# Load dataset & dataloader
train_dataset = CustomDataset(data_dir=DATA_DIR, mode='train', input_shape=INPUT_SHAPE)
validation_dataset = CustomDataset(data_dir=DATA_DIR, mode='val', input_shape=INPUT_SHAPE)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True)
print('Train set samples:',len(train_dataset),  'Val set samples:', len(validation_dataset))

Loading train dataset..
Loading val dataset..
Train set samples: 12481 Val set samples: 4168


In [9]:
model = models.GoogLeNet(num_classes=1,init_weights=False)
model

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

### GoogLeNet모델 수정 Dropout추가

In [None]:
model.inception3a.branch1.bn =nn.Sequential(
    nn.BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception3a.branch2[1].bn =nn.Sequential(
    nn.BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception3a.branch3[1].bn =nn.Sequential(
    nn.BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception3a.branch4[1].bn =nn.Sequential(
    nn.BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)

model.inception3b.branch1.bn =nn.Sequential(
    nn.BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception3b.branch2[1].bn =nn.Sequential(
    nn.BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception3b.branch3[1].bn =nn.Sequential(
    nn.BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception3b.branch4[1].bn =nn.Sequential(
    nn.BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)

model.inception4a.branch1.bn =nn.Sequential(
    nn.BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4a.branch2[1].bn =nn.Sequential(
    nn.BatchNorm2d(208, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4a.branch3[1].bn =nn.Sequential(
    nn.BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4a.branch4[1].bn =nn.Sequential(
    nn.BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)

model.inception4b.branch1.bn =nn.Sequential(
    nn.BatchNorm2d(160, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4b.branch2[1].bn =nn.Sequential(
    nn.BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4b.branch3[1].bn =nn.Sequential(
    nn.BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4b.branch4[1].bn =nn.Sequential(
    nn.BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)

model.inception4c.branch1.bn =nn.Sequential(
    nn.BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4c.branch2[1].bn =nn.Sequential(
    nn.BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4c.branch3[1].bn =nn.Sequential(
    nn.BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4c.branch4[1].bn =nn.Sequential(
    nn.BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)

model.inception4d.branch1.bn =nn.Sequential(
    nn.BatchNorm2d(112, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4d.branch2[1].bn =nn.Sequential(
    nn.BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4d.branch3[1].bn =nn.Sequential(
    nn.BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4d.branch4[1].bn =nn.Sequential(
    nn.BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)

model.inception4e.branch1.bn =nn.Sequential(
    nn.BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4e.branch2[1].bn =nn.Sequential(
    nn.BatchNorm2d(320, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4e.branch3[1].bn =nn.Sequential(
    nn.BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception4e.branch4[1].bn =nn.Sequential(
    nn.BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)

model.inception5a.branch1.bn =nn.Sequential(
    nn.BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception5a.branch2[1].bn =nn.Sequential(
    nn.BatchNorm2d(320, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception5a.branch3[1].bn =nn.Sequential(
    nn.BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception5a.branch4[1].bn =nn.Sequential(
    nn.BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)

model.inception5b.branch1.bn =nn.Sequential(
    nn.BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception5b.branch2[1].bn =nn.Sequential(
    nn.BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception5b.branch3[1].bn =nn.Sequential(
    nn.BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)
model.inception5b.branch4[1].bn =nn.Sequential(
    nn.BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True),
    nn.Dropout2d(0.3)
)

#### Load model and other utils

In [11]:
# Load Model
#model = custom_CNN(NUM_CLS).to(DEVICE)

# # Save Initial Model
# torch.save(model.state_dict(), 'initial.pt')

# Set optimizer, scheduler, loss function, metric function
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler =  optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e5, max_lr=0.0001, epochs=EPOCHS, steps_per_epoch=len(train_dataloader))
loss_fn = nn.BCELoss()
metric_fn = get_metric_fn


# Set trainer
trainer = Trainer(loss_fn, model, DEVICE, metric_fn, optimizer, scheduler)

# Set earlystopper
early_stopper = LossEarlyStopper(patience=EARLY_STOPPING_PATIENCE)

### epoch 단위 학습 진행

In [12]:
val_acc=0
acc_cnt=0
for epoch_index in tqdm(range(EPOCHS)):
    trainer.train_epoch(train_dataloader, epoch_index)
    value = trainer.validate_epoch(validation_dataloader, epoch_index)
    
    
    #가장 높은 ACC모델 저장
    if value>=val_acc:
        if value>0.9999:
            acc_cnt+=1
            if acc_cnt==3:
                break
        else:
            acc_cnt=0
        val_acc = value
        check_point = {
            "model" : model.state_dict(),
            "optimizer" : optimizer.state_dict(),
            "scheduler" : scheduler.state_dict()            
        }
        torch.save(check_point,'best_acc.pt')
    # early_stopping check
    early_stopper.check_early_stopping(loss=trainer.val_mean_loss)
    if early_stopper.stop:
        print('Early stopped')
        break

    if early_stopper.save_model:
        check_point = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict()
        }
        torch.save(check_point, 'best_loss.pt')


  0% 0/30 [00:00<?, ?it/s]

Epoch 0, Train loss: 1.1061404610291505, Acc: 0.538418395961862, F1-Macro: 0.5378800330102946
[[2086    0]
 [2082    0]]
Epoch 0, Val loss: 0.8819122300698207, Acc: 0.5004798464491362, F1-Macro: 0.3335465302206588


  3% 1/30 [22:47<11:00:47, 1367.16s/it]

Epoch 1, Train loss: 0.6642398028801649, Acc: 0.791042384424325, F1-Macro: 0.7906193619169457
[[2086    0]
 [2066   16]]
Epoch 1, Val loss: 2.256603412444775, Acc: 0.5043186180422264, F1-Macro: 0.34202836271188825


  7% 2/30 [45:42<10:39:11, 1369.68s/it]

Early stopping counter 1/5
Epoch 2, Train loss: 0.3430234999419787, Acc: 0.9053761717811073, F1-Macro: 0.9053581979304646


 10% 3/30 [1:08:50<10:18:47, 1375.08s/it]

[[2086    0]
 [2071   11]]
Epoch 2, Val loss: 2.839922780257005, Acc: 0.5031190019193857, F1-Macro: 0.3393898442892446
Early stopping counter 2/5
Epoch 3, Train loss: 0.21465953846868033, Acc: 0.944475602916433, F1-Macro: 0.9444740502252007
[[2058   28]
 [1541  541]]
Epoch 3, Val loss: 1.2913694707246928, Acc: 0.6235604606525912, F1-Macro: 0.5660792114092457


 13% 4/30 [1:31:49<9:56:22, 1376.27s/it] 

Early stopping counter 3/5
Epoch 4, Train loss: 0.14340452122478148, Acc: 0.9642656838394359, F1-Macro: 0.9642647146131103


 17% 5/30 [1:58:49<10:03:57, 1449.49s/it]

[[2086    0]
 [1772  310]]
Epoch 4, Val loss: 1.9861489364734062, Acc: 0.5748560460652591, F1-Macro: 0.48054078872148975
Early stopping counter 4/5
Epoch 5, Train loss: 0.09206340958555348, Acc: 0.9765243169617819, F1-Macro: 0.9765234000540428
[[2025   61]
 [ 650 1432]]
Epoch 5, Val loss: 0.5883444483750142, Acc: 0.8294145873320538, F1-Macro: 0.8258902534138829
Validation loss decreased 0.8819122300698207 -> 0.5883444483750142


 20% 6/30 [2:34:15<11:00:53, 1652.24s/it]

Epoch 6, Train loss: 0.08114913748935439, Acc: 0.9804502844323372, F1-Macro: 0.9804497210492601
[[2073   13]
 [1087  995]]
Epoch 6, Val loss: 0.8250719235493587, Acc: 0.736084452975048, F1-Macro: 0.7171646882753635
Early stopping counter 1/5


 23% 7/30 [3:09:24<11:25:56, 1789.39s/it]

Epoch 7, Train loss: 0.05820279459600361, Acc: 0.986860027241407, F1-Macro: 0.9868597720714671
[[2066   20]
 [1392  690]]
Epoch 7, Val loss: 1.5043129766216645, Acc: 0.6612284069097889, F1-Macro: 0.6197897931422286
Early stopping counter 2/5


 27% 8/30 [3:44:13<11:29:04, 1879.31s/it]

Epoch 8, Train loss: 0.04851319607935296, Acc: 0.9891835590096948, F1-Macro: 0.9891833854165393
[[2062   24]
 [1342  740]]
Epoch 8, Val loss: 1.438202493924361, Acc: 0.6722648752399232, F1-Macro: 0.6356060402418247
Early stopping counter 3/5


 30% 9/30 [4:18:20<11:15:18, 1929.47s/it]

Epoch 9, Train loss: 0.04432718081680389, Acc: 0.9902251422161685, F1-Macro: 0.9902249659490716
[[2076   10]
 [1747  335]]
Epoch 9, Val loss: 2.2514071294894586, Acc: 0.5784548944337812, F1-Macro: 0.48935897229390996
Early stopping counter 4/5


 33% 10/30 [4:50:28<10:43:03, 1929.16s/it]

Epoch 10, Train loss: 0.053289747625538626, Acc: 0.9913468472077558, F1-Macro: 0.9913467444965869


 33% 10/30 [5:12:34<10:25:08, 1875.41s/it]


KeyboardInterrupt: 

## Inference
### 모델 로드

In [102]:
LOSS_MODEL_PATH = 'best_loss.pt'
ACC_MODEL_PATH = 'best_acc.pt'

### Load dataset

In [103]:
class TestDataset(Dataset):
    def __init__(self, data_dir, input_shape):
        self.data_dir = data_dir
        self.input_shape = input_shape
        
        # Loading dataset
        self.db = self.data_loader()
        
        # Transform function
        self.transform = transforms.Compose([transforms.Resize(self.input_shape),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    def data_loader(self):
        print('Loading test dataset..')
        if not os.path.isdir(self.data_dir):
            print(f'!!! Cannot find {self.data_dir}... !!!')
            sys.exit()
        
        db = pd.read_csv(os.path.join(self.data_dir, 'sample_submission.csv'))
        return db
    
    def __len__(self):
        return len(self.db)
    
    def __getitem__(self, index):
        data = copy.deepcopy(self.db.loc[index])
        
        # Loading image
        cvimg = cv2.imread(os.path.join(self.data_dir,'test',data['file_name']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if not isinstance(cvimg, np.ndarray):
            raise IOError("Fail to read %s" % data['file_name'])

        # Preprocessing images
        trans_image = self.transform(Image.fromarray(cvimg))

        return trans_image, data['file_name']

In [104]:
# Load dataset & dataloader
test_dataset = TestDataset(data_dir=DATA_DIR, input_shape=INPUT_SHAPE)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

Loading test dataset..


### 추론 진행& 결과저장

In [105]:
model.load_state_dict(torch.load(LOSS_MODEL_PATH)['model'])

# Prediction
file_lst = []
pred_lst = []
prob_lst = []
model.eval()
with torch.no_grad():
    model.cuda()
    for batch_index, (img, file_num) in tqdm(enumerate(test_dataloader)):
        img = img.to(DEVICE)
        pred = torch.sigmoid(model(img)).view(-1)
        file_lst.extend(list(file_num))
        pred_lst.extend((pred>0.5).int().tolist())
        #prob_lst.extend(pred[:, 1].tolist())

4it [00:06,  1.75s/it]


In [106]:
df = pd.DataFrame({'file_name':file_lst, 'COVID':pred_lst})
# df.sort_values(by=['file_name'], inplace=True)
df.to_csv('loss_prediction.csv', index=False)

In [107]:
model.load_state_dict(torch.load(ACC_MODEL_PATH)['model'])

# Prediction
file_lst = []
pred_lst = []
prob_lst = []
model.eval()
with torch.no_grad():
    model.cuda()
    for batch_index, (img, file_num) in tqdm(enumerate(test_dataloader)):
        img = img.to(DEVICE)
        pred = torch.sigmoid(model(img)).view(-1)
        file_lst.extend(list(file_num))
        pred_lst.extend((pred>0.5).int().tolist())
        #prob_lst.extend(pred[:, 1].tolist())

4it [00:06,  1.69s/it]


In [108]:
df = pd.DataFrame({'file_name':file_lst, 'COVID':pred_lst})
# df.sort_values(by=['file_name'], inplace=True)
df.to_csv('acc_prediction.csv', index=False)