# [모의 캐글-의료] 흉부 CT 코로나 감염 여부 분류
- 이미지 binary 분류 과제
- 담당: 이녕민M

## Import Libraries

In [1]:
# !apt-get update && apt-get install -y python3-opencv

In [2]:
# !pip install opencv-python

In [3]:
# !pip install sklearn

In [4]:
# !pip install pandas

In [5]:
# !pip install efficientnet_pytorch

In [None]:
# !pip install imgaug

In [2]:
import os, torch, copy, cv2, sys, random
# from datetime import datetime, timezone, timedelta
from PIL import Image
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

## Set Arguments & hyperparameters

In [3]:
# 시드(seed) 설정

RANDOM_SEED = 2022

torch.manual_seed(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

In [4]:
os.chdir("/USER/daeyeong")  # 기준 경로 변경

In [81]:
# parameters

### 데이터 디렉토리 설정 ###
DATA_DIR= 'data'
NUM_CLS = 2

EPOCHS = 60
BATCH_SIZE = 64
LEARNING_RATE = 0.0005
EARLY_STOPPING_PATIENCE = 20
INPUT_SHAPE = 128

os.environ["CUDA_VISIBLE_DEVICES"]="0"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dataloader

#### Train & Validation Set loader

In [110]:
# from sklearn.model_selection import train_test_split
# tem = pd.read_csv(os.path.join(DATA_DIR, 'train1.csv'))
# X_train, X_val, y_train, y_val = train_test_split(tem, tem['COVID'], test_size=0.1, random_state=2022)

In [111]:
class CustomDataset(Dataset):
    def __init__(self, data_dir, mode, input_shape):
        self.data_dir = data_dir
        self.mode = mode
        self.input_shape = input_shape
        
        # Loading dataset
        self.db = self.data_loader()
        
        # Dataset split
        if self.mode == 'train':
            self.db = self.db[:int(len(self.db) * 0.9)]
        elif self.mode == 'val':
            self.db = self.db[int(len(self.db) * 0.9):]
            self.db.reset_index(inplace=True)
        else:
            print(f'!!! Invalid split {self.mode}... !!!')
            
        # Transform function
        self.transform = transforms.Compose([transforms.Resize(self.input_shape),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    def data_loader(self):
        print('Loading ' + self.mode + ' dataset..')
        if not os.path.isdir(self.data_dir):
            print(f'!!! Cannot find {self.data_dir}... !!!')
            sys.exit()
        
        # (COVID : 1, No : 0)
        db = pd.read_csv(os.path.join(self.data_dir, 'train1.csv'))
        
        return db

    def __len__(self):
        return len(self.db)

    def __getitem__(self, index):
        data = copy.deepcopy(self.db.loc[index])

        # Loading image
        cvimg = cv2.imread(os.path.join(self.data_dir,'train',data['file_name']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if not isinstance(cvimg, np.ndarray):
            raise IOError("Fail to read %s" % data['file_name'])

        # Preprocessing images
        trans_image = self.transform(Image.fromarray(cvimg))

        return trans_image, data['COVID']


In [None]:
from imgaug import augmentables
import imgaug as ia
import imgaug.augmenters as iaa

In [88]:
#augment시 사용
eq_img_list = []
class Augmentation:
    def __init__(self, data_dir, mode, input_shape):
        self.data_dir = data_dir
        self.mode = mode
        self.input_shape = input_shape
        self.img = None
    
        #이미지크기 저장공간

        self.db = self.data_loader()
        if self.mode == 'train':
            self.db = self.db[:int(len(self.db) * 0.9)]
        elif self.mode == 'val':
            self.db = self.db[int(len(self.db) * 0.9):]
            self.db.reset_index(inplace=True)
        else:
            print(f'!!! Invalid split {self.mode}... !!!')
            
        # Transform function
        self.transform = transforms.Compose([transforms.Resize(self.input_shape),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    def data_loader(self):
        print('Loading ' + self.mode + ' dataset..')
        if not os.path.isdir(self.data_dir):
            print(f'!!! Cannot find {self.data_dir}... !!!')
            sys.exit()
        
        # (COVID : 1, No : 0)
        db = pd.read_csv(os.path.join(self.data_dir, 'train.csv'))
       
        return db
    def Equalization(self):
    #    // for cnt in tqdm(range(646)):
        for i in tqdm(range(646)):
            # img = eq_img_list
            db = pd.read_csv(os.path.join(self.data_dir, 'train.csv'))
            data = copy.deepcopy(db.loc[i])
            cvimg = cv2.imread(os.path.join(self.data_dir,'train',data['file_name']))
            cvimg_yuv = cv2.cvtColor(cvimg, cv2.COLOR_BGR2YUV)

            img_clahe = cvimg_yuv.copy()
            clahe = cv2.createCLAHE(clipLimit = 3.0 , tileGridSize=(8,8))
            img_clahe[:,:,0] = clahe.apply(img_clahe[:,:,0])
            img_clahe = cv2.cvtColor(img_clahe, cv2.COLOR_YUV2BGR)

                # img_eq = cv2_imshow(img_clahe)
            # cv2.waitKey()
            cv2.destroyAllWindows()
            eq_img_list.append(img_clahe)
            # os.makedirs('/content/drive/MyDrive/이어드림/project2/data/aug_img', exist_ok=True)
            cv2.imwrite(f'{self.data_dir}/train/{i + 646}.png', img_clahe)
            self.img = eq_img_list
        return eq_img_list

    def augmentation(self):
        
        for i in tqdm(range(646)):
            
            db = pd.read_csv(os.path.join(self.data_dir, 'train.csv'))
            data = copy.deepcopy(db.loc[i])
            sss =cv2.imread(os.path.join(self.data_dir,'train',data['file_name']))
            
            # cv2_imshow(sss)
            augmentation_resize = iaa.Sequential([
                            iaa.Resize({"height":384,"width":384},interpolation="cubic")
                        ])


            image_aug = augmentation_resize(image=sss)
            # cv2_imshow(image_aug)
            # sometimes = lambda aug: iaa.Sometimes(0.5, aug)
            for j in (range(3)):
                augmentation_resize = iaa.Sequential([
                                    # iaa.Affine(translate_percent={"x":(-0.5,0.5),"y":(-0.5,0.5)},rotate=(-2,2),scale=(0.5,2)),
                                    iaa.LinearContrast((0.75,1.45)),
                                    iaa.GaussianBlur((0.0,1.5)),
                                    iaa.Fliplr(0.5),
                                    # iaa.Flipud(0.3),
                                    # iaa.Affine(translate_px={"x": (-20, 20), "y": (-20, 20)}),
                                    # iaa.PerspectiveTransform(scale=(0.01, 0.15)),
                                    # iaa.PiecewiseAffine(scale=(0.01, 0.05)),
                                    # iaa.Sharpen(alpha=(0, 0.3), lightness=(0.75, 1.00)),
                                    # iaa.Emboss(alpha=(0, 1.0), strength=(0, 1.5)),
                                    ])
                    
                image_augs = augmentation_resize(image=image_aug)
            # cv2_imshow(image_augs)
                # os.makedirs('/content/drive/MyDrive/이어드림/project2/data/augment{}'.format(j), exist_ok=True)
                cv2.imwrite(f'{self.data_dir}/train/{(i)+(646)+(646*j)}.png', image_augs)

In [89]:
#data augm
if __name__ == '__main__': 
    temp = Augmentation(DATA_DIR,'train', INPUT_SHAPE)
    print('이미w지를 불러오는 중입니다.')
    temp.data_loader()
    print('*' * 30)
    print('Equalization중 입니다')
    # temp.Equalization()
    print('*' * 30)
    print('Equalization 성공')
    print('Augmentation중 입니다')
    temp.augmentation()

    print('*' * 30)
    del temp

  0% 3/646 [00:00<00:23, 27.32it/s]

Loading train dataset..
이미w지를 불러오는 중입니다.
Loading train dataset..
******************************
Equalization중 입니다
******************************
Equalization 성공
Augmentation중 입니다


100% 646/646 [00:17<00:00, 37.84it/s]

******************************





In [49]:
db = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))

In [50]:
db['COVID'][0]

0

In [76]:
645+(646*3)

2583

In [77]:
file_name = []
COVID = []
for i in range(2584):
    file_name.append(f'{i}.png')
    COVID.append(db['COVID'][i % 646])
    

In [78]:
df = pd.DataFrame({'file_name':file_name, 'COVID':COVID})
# df.sort_values(by=['file_name'], inplace=True)
df.to_csv('train1.csv', index=False)

## Model

In [112]:
import timm 

class Efficientnet(nn.Module):
    def __init__(self, num_classes):
        super(Efficientnet, self).__init__()
        self.efficientnet = timm.create_model('efficientnetv2_s', pretrained=False, num_classes=num_classes)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.efficientnet(x)
        
        output = self.softmax(x)
        
        return output

## Utils
### EarlyStopper

In [113]:
class LossEarlyStopper():
    """Early stopper
    
    Attributes:
        patience (int): loss가 줄어들지 않아도 학습할 epoch 수
        patience_counter (int): loss 가 줄어들지 않을 때 마다 1씩 증가, 감소 시 0으로 리셋
        min_loss (float): 최소 loss
        stop (bool): True 일 때 학습 중단

    """

    def __init__(self, patience: int)-> None:
        self.patience = patience

        self.patience_counter = 0
        self.min_loss = np.Inf
        self.stop = False
        self.save_model = False

    def check_early_stopping(self, loss: float)-> None:
        """Early stopping 여부 판단"""  

        if self.min_loss == np.Inf:
            self.min_loss = loss
            return None

        elif loss > self.min_loss:
            self.patience_counter += 1
            msg = f"Early stopping counter {self.patience_counter}/{self.patience}"

            if self.patience_counter == self.patience:
                self.stop = True
                
        elif loss <= self.min_loss:
            self.patience_counter = 0
            self.save_model = True
            msg = f"Validation loss decreased {self.min_loss} -> {loss}"
            self.min_loss = loss
        
        print(msg)

### Trainer

In [114]:
class Trainer():
    """ epoch에 대한 학습 및 검증 절차 정의"""
    
    def __init__(self, loss_fn, model, device, metric_fn, optimizer=None, scheduler=None):
        """ 초기화
        """
        self.loss_fn = loss_fn
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.metric_fn = metric_fn

    def train_epoch(self, dataloader, epoch_index):
        """ 한 epoch에서 수행되는 학습 절차"""
        
        self.model.train()
        train_total_loss = 0
        target_lst = []
        pred_lst = []
        prob_lst = []

        for batch_index, (img, label) in enumerate(dataloader):
            img = img.to(self.device)
            label = label.to(self.device).float()
            
            pred = self.model(img)
            
            loss = self.loss_fn(pred[:,1], label)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            self.scheduler.step()
            
            train_total_loss += loss.item()
            prob_lst.extend(pred[:, 1].cpu().tolist())
            target_lst.extend(label.cpu().tolist())
            pred_lst.extend(pred.argmax(dim=1).cpu().tolist())
        self.train_mean_loss = train_total_loss / batch_index
        self.train_score, f1 = self.metric_fn(y_pred=pred_lst, y_answer=target_lst)
        msg = f'Epoch {epoch_index}, Train loss: {self.train_mean_loss}, Acc: {self.train_score}, F1-Macro: {f1}'
        print(msg)

    def validate_epoch(self, dataloader, epoch_index):
        """ 한 epoch에서 수행되는 검증 절차
        """
        self.model.eval()
        val_total_loss = 0
        target_lst = []
        pred_lst = []
        prob_lst = []

        for batch_index, (img, label) in enumerate(dataloader):
            img = img.to(self.device)
            label = label.to(self.device).float()
            pred = self.model(img)
            
            loss = self.loss_fn(pred[:,1], label)
            val_total_loss += loss.item()
            prob_lst.extend(pred[:, 1].cpu().tolist())
            target_lst.extend(label.cpu().tolist())
            pred_lst.extend(pred.argmax(dim=1).cpu().tolist())
        self.val_mean_loss = val_total_loss / batch_index
        self.validation_score, f1 = self.metric_fn(y_pred=pred_lst, y_answer=target_lst)
        msg = f'Epoch {epoch_index}, Val loss: {self.val_mean_loss}, Acc: {self.validation_score}, F1-Macro: {f1}'
        print(msg)

### Metrics

In [115]:
from sklearn.metrics import accuracy_score, f1_score

def get_metric_fn(y_pred, y_answer):
    """ 성능을 반환하는 함수"""
    
    assert len(y_pred) == len(y_answer), 'The size of prediction and answer are not same.'
    accuracy = accuracy_score(y_answer, y_pred)
    f1 = f1_score(y_answer, y_pred, average='macro')
    return accuracy, f1

## Train
### 학습을 위한 객체 선언

#### Load Dataset & Dataloader

In [116]:
# Load dataset & dataloader
train_dataset = CustomDataset(data_dir=DATA_DIR, mode='train', input_shape=INPUT_SHAPE)
validation_dataset = CustomDataset(data_dir=DATA_DIR, mode='val', input_shape=INPUT_SHAPE)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True)
print('Train set samples:',len(train_dataset),  'Val set samples:', len(validation_dataset))

Loading train dataset..
Loading val dataset..
Train set samples: 2325 Val set samples: 259


#### Load model and other utils

In [124]:
# Load Model
model = Efficientnet(NUM_CLS)
model = model.to(DEVICE)

# # Save Initial Model
# torch.save(model.state_dict(), 'initial.pt')

# Set optimizer, scheduler, loss function, metric function
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler =  optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e5, max_lr=0.0001, epochs=EPOCHS, steps_per_epoch=len(train_dataloader))
loss_fn = nn.BCELoss()
metric_fn = get_metric_fn


# Set trainer
trainer = Trainer(loss_fn, model, DEVICE, metric_fn, optimizer, scheduler)

# Set earlystopper
early_stopper = LossEarlyStopper(patience=EARLY_STOPPING_PATIENCE)

### epoch 단위 학습 진행

In [125]:
for epoch_index in tqdm(range(EPOCHS)):

    trainer.train_epoch(train_dataloader, epoch_index)
    trainer.validate_epoch(validation_dataloader, epoch_index)

    # early_stopping check
    early_stopper.check_early_stopping(loss=trainer.val_mean_loss)

    if early_stopper.stop:
        print('Early stopped')
        break

    if early_stopper.save_model:
        check_point = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict()
        }
        torch.save(check_point, 'best.pt')


  0% 0/60 [00:00<?, ?it/s]

Epoch 0, Train loss: 1.3299112369616826, Acc: 0.5221505376344086, F1-Macro: 0.49779633676103796


  2% 1/60 [04:13<4:09:01, 253.25s/it]

Epoch 0, Val loss: 1.021317020058632, Acc: 0.5173745173745173, F1-Macro: 0.5043403708069875
Epoch 1, Train loss: 1.447876963350508, Acc: 0.49376344086021506, F1-Macro: 0.491169600092524


  3% 2/60 [08:35<4:07:22, 255.90s/it]

Epoch 1, Val loss: 2.6491337716579437, Acc: 0.4864864864864865, F1-Macro: 0.4484685463598956
Early stopping counter 1/20
Epoch 2, Train loss: 1.7274970942073398, Acc: 0.46365591397849465, F1-Macro: 0.43586946179579633


  5% 3/60 [12:58<4:05:16, 258.18s/it]

Epoch 2, Val loss: 2.424583673477173, Acc: 0.49034749034749037, F1-Macro: 0.4436995574069253
Early stopping counter 2/20
Epoch 3, Train loss: 4.198280284802119, Acc: 0.46365591397849465, F1-Macro: 0.4157124780006082


  7% 4/60 [17:23<4:02:39, 259.99s/it]

Epoch 3, Val loss: 5.228681682841852, Acc: 0.4864864864864865, F1-Macro: 0.4228533132277792
Early stopping counter 3/20
Epoch 4, Train loss: 4.302237086825901, Acc: 0.4696774193548387, F1-Macro: 0.41547430949720054


  8% 5/60 [21:32<3:55:27, 256.87s/it]

Epoch 4, Val loss: 4.4040690660476685, Acc: 0.4942084942084942, F1-Macro: 0.4247469524084027
Early stopping counter 4/20
Epoch 5, Train loss: 4.05571730600463, Acc: 0.47139784946236557, F1-Macro: 0.41323577698554825


 10% 6/60 [25:45<3:50:05, 255.65s/it]

Epoch 5, Val loss: 4.88250333070755, Acc: 0.4980694980694981, F1-Macro: 0.42738095238095236
Early stopping counter 5/20
Epoch 6, Train loss: 3.815714564588335, Acc: 0.46580645161290324, F1-Macro: 0.40246154431922027


 12% 7/60 [29:23<3:35:46, 244.27s/it]

Epoch 6, Val loss: 3.904910087585449, Acc: 0.4980694980694981, F1-Macro: 0.42738095238095236
Early stopping counter 6/20
Epoch 7, Train loss: 3.394385861025916, Acc: 0.47053763440860213, F1-Macro: 0.4079391173807516


 13% 8/60 [33:38<3:34:38, 247.67s/it]

Epoch 7, Val loss: 3.6305618584156036, Acc: 0.49034749034749037, F1-Macro: 0.4185714285714286
Early stopping counter 7/20
Epoch 8, Train loss: 3.0016906758149466, Acc: 0.4666666666666667, F1-Macro: 0.3964548473698578


 15% 9/60 [38:06<3:35:40, 253.73s/it]

Epoch 8, Val loss: 4.270425587892532, Acc: 0.5019305019305019, F1-Macro: 0.42637136234869943
Early stopping counter 8/20
Epoch 9, Train loss: 2.637688837117619, Acc: 0.469247311827957, F1-Macro: 0.3981717662102725


 17% 10/60 [42:30<3:33:59, 256.79s/it]

Epoch 9, Val loss: 2.49759903550148, Acc: 0.4980694980694981, F1-Macro: 0.41618116243584413
Early stopping counter 9/20
Epoch 10, Train loss: 2.281357364522086, Acc: 0.4735483870967742, F1-Macro: 0.4062016435557292


 18% 11/60 [46:48<3:30:01, 257.18s/it]

Epoch 10, Val loss: 3.229373872280121, Acc: 0.5019305019305019, F1-Macro: 0.4300117713291366
Early stopping counter 10/20
Epoch 11, Train loss: 2.1037089940574436, Acc: 0.48, F1-Macro: 0.4217370938088423


 20% 12/60 [51:11<3:27:02, 258.81s/it]

Epoch 11, Val loss: 2.406519740819931, Acc: 0.4980694980694981, F1-Macro: 0.42738095238095236
Early stopping counter 11/20
Epoch 12, Train loss: 1.962478044960234, Acc: 0.48258064516129034, F1-Macro: 0.4499848483924843


 22% 13/60 [55:33<3:23:26, 259.71s/it]

Epoch 12, Val loss: 2.2889160364866257, Acc: 0.49034749034749037, F1-Macro: 0.448936170212766
Early stopping counter 12/20
Epoch 13, Train loss: 1.9364389065239165, Acc: 0.5083870967741936, F1-Macro: 0.4976441510200803


 23% 14/60 [59:55<3:19:42, 260.49s/it]

Epoch 13, Val loss: 2.7218008935451508, Acc: 0.5173745173745173, F1-Macro: 0.5173745173745173
Early stopping counter 13/20
Epoch 14, Train loss: 1.6767427954408858, Acc: 0.5105376344086021, F1-Macro: 0.5095751337901501


 25% 15/60 [1:03:38<3:06:56, 249.27s/it]

Epoch 14, Val loss: 4.7681746780872345, Acc: 0.5212355212355212, F1-Macro: 0.5186450839328538
Early stopping counter 14/20
Epoch 15, Train loss: 2.6101172500186496, Acc: 0.5131182795698924, F1-Macro: 0.5117959912327694


 27% 16/60 [1:08:02<3:06:04, 253.74s/it]

Epoch 15, Val loss: 9.141071557998657, Acc: 0.47876447876447875, F1-Macro: 0.4492052740189978
Early stopping counter 15/20
Epoch 16, Train loss: 6.789684918191698, Acc: 0.4666666666666667, F1-Macro: 0.43801169590643274


 28% 17/60 [1:12:10<3:00:33, 251.93s/it]

Epoch 16, Val loss: 8.525649428367615, Acc: 0.46332046332046334, F1-Macro: 0.4183724574696674
Early stopping counter 16/20
Epoch 17, Train loss: 6.256861183378431, Acc: 0.4726881720430108, F1-Macro: 0.4420291273538738


 30% 18/60 [1:16:32<2:58:25, 254.90s/it]

Epoch 17, Val loss: 7.579774975776672, Acc: 0.4671814671814672, F1-Macro: 0.4184131736526946
Early stopping counter 17/20
Epoch 18, Train loss: 5.567378633552128, Acc: 0.4735483870967742, F1-Macro: 0.4434126405452514


 32% 19/60 [1:21:02<2:57:23, 259.61s/it]

Epoch 18, Val loss: 6.753124475479126, Acc: 0.46332046332046334, F1-Macro: 0.4156156156156156
Early stopping counter 18/20
Epoch 19, Train loss: 4.9566852119233875, Acc: 0.47483870967741937, F1-Macro: 0.4458235181697735


 33% 20/60 [1:25:33<2:55:23, 263.08s/it]

Epoch 19, Val loss: 4.873050963506103, Acc: 0.47104247104247104, F1-Macro: 0.4212054481689911
Early stopping counter 19/20
Epoch 20, Train loss: 4.428588718175888, Acc: 0.47397849462365593, F1-Macro: 0.4439849238124569


 33% 20/60 [1:30:07<3:00:15, 270.40s/it]

Epoch 20, Val loss: 5.469532787799835, Acc: 0.4749034749034749, F1-Macro: 0.43223726627981945
Early stopping counter 20/20
Early stopped





## Inference
### 모델 로드

In [126]:
TRAINED_MODEL_PATH = 'best.pt'

### Load dataset

In [127]:
class TestDataset(Dataset):
    def __init__(self, data_dir, input_shape):
        self.data_dir = data_dir
        self.input_shape = input_shape
        
        # Loading dataset
        self.db = self.data_loader()
        
        # Transform function
        self.transform = transforms.Compose([transforms.Resize(self.input_shape),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    def data_loader(self):
        print('Loading test dataset..')
        if not os.path.isdir(self.data_dir):
            print(f'!!! Cannot find {self.data_dir}... !!!')
            sys.exit()
        
        db = pd.read_csv(os.path.join(self.data_dir, 'sample_submission.csv'))
        return db
    
    def __len__(self):
        return len(self.db)
    
    def __getitem__(self, index):
        data = copy.deepcopy(self.db.loc[index])
        
        # Loading image
        cvimg = cv2.imread(os.path.join(self.data_dir,'test',data['file_name']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if not isinstance(cvimg, np.ndarray):
            raise IOError("Fail to read %s" % data['file_name'])

        # Preprocessing images
        trans_image = self.transform(Image.fromarray(cvimg))

        return trans_image, data['file_name']

In [128]:
# Load dataset & dataloader
test_dataset = TestDataset(data_dir=DATA_DIR, input_shape=INPUT_SHAPE)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

Loading test dataset..


### 추론 진행

In [129]:
model.load_state_dict(torch.load(TRAINED_MODEL_PATH)['model'])

# Prediction
file_lst = []
pred_lst = []
prob_lst = []
model.eval()
with torch.no_grad():
    for batch_index, (img, file_num) in tqdm(enumerate(test_dataloader)):
        img = img.to(DEVICE)
        pred = model(img)
        print(pred)
        file_lst.extend(list(file_num))
        pred_lst.extend(pred.argmax(dim=1).tolist())
        prob_lst.extend(pred[:, 1].tolist())

1it [00:06,  6.62s/it]

tensor([[9.2919e-02, 9.0708e-01],
        [4.4915e-01, 5.5085e-01],
        [1.0805e-01, 8.9195e-01],
        [2.0093e-01, 7.9907e-01],
        [5.8540e-02, 9.4146e-01],
        [1.1323e-01, 8.8677e-01],
        [2.0215e-01, 7.9785e-01],
        [1.3539e-01, 8.6461e-01],
        [1.8725e-01, 8.1275e-01],
        [3.5899e-01, 6.4101e-01],
        [1.1026e-01, 8.8974e-01],
        [1.7864e-01, 8.2136e-01],
        [1.4950e-01, 8.5050e-01],
        [8.9599e-01, 1.0401e-01],
        [1.2247e-01, 8.7753e-01],
        [1.5026e-01, 8.4974e-01],
        [1.1138e-01, 8.8862e-01],
        [1.5254e-01, 8.4746e-01],
        [2.0075e-01, 7.9925e-01],
        [1.2714e-01, 8.7286e-01],
        [2.1569e-01, 7.8431e-01],
        [1.3249e-01, 8.6751e-01],
        [1.1024e-01, 8.8976e-01],
        [1.2237e-01, 8.7763e-01],
        [9.9984e-01, 1.6165e-04],
        [1.7938e-01, 8.2062e-01],
        [1.1954e-01, 8.8046e-01],
        [2.3681e-01, 7.6319e-01],
        [9.9054e-01, 9.4551e-03],
        [1.410

2it [00:10,  5.17s/it]

tensor([[0.4210, 0.5790],
        [0.8189, 0.1811],
        [0.9514, 0.0486],
        [0.2170, 0.7830],
        [0.2356, 0.7644],
        [0.1252, 0.8748],
        [0.1045, 0.8955],
        [0.1260, 0.8740],
        [0.1887, 0.8113],
        [0.1993, 0.8007],
        [0.4402, 0.5598],
        [0.7066, 0.2934],
        [0.1330, 0.8670],
        [0.6236, 0.3764],
        [0.0816, 0.9184],
        [0.2396, 0.7604],
        [0.9286, 0.0714],
        [0.2138, 0.7862],
        [0.1890, 0.8110],
        [0.1721, 0.8279],
        [0.1084, 0.8916],
        [0.5921, 0.4079],
        [0.1301, 0.8699],
        [0.1341, 0.8659],
        [0.1522, 0.8478],
        [0.1230, 0.8770],
        [0.8489, 0.1511],
        [0.1353, 0.8647],
        [0.3471, 0.6529],
        [0.1074, 0.8926],
        [0.1474, 0.8526],
        [0.0958, 0.9042],
        [0.3897, 0.6103],
        [0.7763, 0.2237],
        [0.1601, 0.8399],
        [0.0771, 0.9229]], device='cuda:0')





### 결과 저장

In [130]:
df = pd.DataFrame({'file_name':file_lst, 'COVID':pred_lst})
# df.sort_values(by=['file_name'], inplace=True)
df.to_csv('prediction.csv', index=False)

앙상블을 위한 데이터 저장

In [28]:
model.load_state_dict(torch.load(TRAINED_MODEL_PATH)['model'])

# Prediction
file_lst = []
pred_lst = []
prob_lst = []
model.eval()
with torch.no_grad():
    for batch_index, (img, file_num) in tqdm(enumerate(test_dataloader)):
        img = img.to(DEVICE)
        pred = model(img)
        print(pred)
        file_lst.extend(list(file_num))
        pred_lst.extend(pred.tolist())
df = pd.DataFrame(pred_lst)
# df.sort_values(by=['file_name'], inplace=True)
df.to_csv('prediction111.csv', index=False)

Unnamed: 0,file_name,COVID
0,0.png,0
1,1.png,0
2,2.png,0
3,3.png,0
4,4.png,0
...,...,...
95,95.png,0
96,96.png,1
97,97.png,1
98,98.png,1
