## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.transforms as transforms

import torchvision.models as models

from sklearn import metrics
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [3]:
device

device(type='cuda')

## Hyperparameter Setting

In [4]:
CFG = {
    'IMG_SIZE':224, # fixed
    'EPOCHS':10,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':32,
    'SEED':40
}

## Fixed RandomSeed

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing
#### 1. Load Dataframe
#### 2. 결측치 보완
#### 3. Train / Validation Split
#### 4. Numeric Feature Scaling / Categorical Featrue Label-Encoding

In [6]:
train_df = pd.read_csv('C:/Users/sanda/Desktop/dacon/train.csv')
test_df = pd.read_csv('C:/Users/sanda/Desktop/dacon/test.csv')

In [7]:
train_df.head()

Unnamed: 0,ID,img_path,mask_path,나이,수술연월일,진단명,암의 위치,암의 개수,암의 장경,NG,...,ER_Allred_score,PR,PR_Allred_score,KI-67_LI_percent,HER2,HER2_IHC,HER2_SISH,HER2_SISH_ratio,BRCA_mutation,N_category
0,BC_01_0001,./train_imgs/BC_01_0001.png,-,63,2015-10-23,1,2,1,19.0,2.0,...,8.0,1.0,6.0,12.0,0.0,1.0,,,,0
1,BC_01_0002,./train_imgs/BC_01_0002.png,-,51,2015-10-28,1,1,1,22.0,3.0,...,,0.0,,70.0,0.0,0.0,,,,1
2,BC_01_0003,./train_imgs/BC_01_0003.png,-,37,2015-10-29,1,2,1,,2.0,...,7.0,1.0,4.0,7.0,0.0,1.0,,,0.0,0
3,BC_01_0004,./train_imgs/BC_01_0004.png,-,54,2016-03-08,1,2,1,0.0,3.0,...,,0.0,,1.0,1.0,3.0,,,,0
4,BC_01_0005,./train_imgs/BC_01_0005.png,-,57,2015-10-30,1,2,1,8.0,2.0,...,8.0,0.0,,8.0,1.0,2.0,1.0,5.44,,0


In [8]:
test_df.head()

Unnamed: 0,ID,img_path,나이,수술연월일,진단명,암의 위치,암의 개수,암의 장경,NG,HG,...,ER,ER_Allred_score,PR,PR_Allred_score,KI-67_LI_percent,HER2,HER2_IHC,HER2_SISH,HER2_SISH_ratio,BRCA_mutation
0,BC_01_0011,./test_imgs/BC_01_0011.png,55,2015-11-17,2,2,1,23.0,2.0,2.0,...,1.0,8.0,0.0,2.0,5.0,0.0,1.0,,,
1,BC_01_0220,./test_imgs/BC_01_0220.png,43,2020-06-09,4,2,1,13.0,3.0,2.0,...,1.0,4.0,1.0,8.0,8.67,0.0,0.0,,,
2,BC_01_0233,./test_imgs/BC_01_0233.png,76,2020-05-14,1,1,1,,,,...,1.0,6.0,1.0,6.0,,,2.0,,,
3,BC_01_0258,./test_imgs/BC_01_0258.png,58,2020-05-20,1,2,1,1.3,2.0,2.0,...,1.0,7.0,0.0,,21.17,1.0,3.0,,,0.0
4,BC_01_0260,./test_imgs/BC_01_0260.png,56,2020-05-20,1,2,2,15.0,3.0,3.0,...,1.0,8.0,1.0,3.0,20.57,1.0,3.0,,,


In [9]:
train_df['암의 장경'] = train_df['암의 장경'].fillna(train_df['암의 장경'].mean())
train_df = train_df.fillna(0)

test_df['암의 장경'] = test_df['암의 장경'].fillna(train_df['암의 장경'].mean())
test_df = test_df.fillna(0)

In [10]:
# train_df, val_df, train_labels, val_labels = train_test_split(
#                                                     train_df.drop(columns=['N_category']), 
#                                                     train_df['N_category'], 
#                                                     test_size=0.2, 
#                                                     random_state=CFG['SEED']
#                                                 )

In [11]:
train_df, val_df, train_labels, val_labels = train_test_split(
                                                    train_df.drop(columns=['N_category']), 
                                                    train_df['N_category'], 
                                                    test_size=0.2, 
                                                    random_state=CFG['SEED'],
                                                    stratify=train_df['N_category']
                                                    
                                                )

In [12]:
def get_values(value):
    return value.values.reshape(-1, 1)

numeric_cols = ['나이', '암의 장경', 'ER_Allred_score', 'PR_Allred_score', 'KI-67_LI_percent', 'HER2_SISH_ratio']
ignore_cols = ['ID', 'img_path', 'mask_path', '수술연월일', 'N_category']

for col in train_df.columns:
    if col in ignore_cols:
        continue
    if col in numeric_cols:
        scaler = StandardScaler()
        train_df[col] = scaler.fit_transform(get_values(train_df[col]))
        val_df[col] = scaler.transform(get_values(val_df[col]))
        test_df[col] = scaler.transform(get_values(test_df[col]))
    else:
        le = LabelEncoder()
        train_df[col] = le.fit_transform(get_values(train_df[col]))
        val_df[col] = le.transform(get_values(val_df[col]))
        test_df[col] = le.transform(get_values(test_df[col]))

## CustomDataset

In [13]:
class CustomDataset(Dataset):
    def __init__(self, medical_df, labels, transforms=None):
        self.medical_df = medical_df
        self.transforms = transforms
        self.labels = labels
        
    def __getitem__(self, index):
        img_path = self.medical_df['img_path'].iloc[index]
        img_folder = img_path.split('/')[-2]
        img_folder = img_folder + '_HnE_vector'
        img_name = img_path.split('/')[-1]
        img_name = img_name.replace('png' , 'npy')
        
        img_path = 'C:/Users/sanda/Desktop/dacon/' + img_folder + '/' + img_name
        image = np.load(img_path) # fixed
        #image = cv2.imread(img_path)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
                
        if self.labels is not None:
            tabular = torch.Tensor(self.medical_df.drop(columns=['ID', 'img_path', 'mask_path', '수술연월일']).iloc[index])
            label = self.labels[index]
            return image, tabular, label
        else:
            tabular = torch.Tensor(self.medical_df.drop(columns=['ID', 'img_path', '수술연월일']).iloc[index])
            return image, tabular
        
    def __len__(self):
        return len(self.medical_df)

In [14]:
# # fixed

# train_transforms = A.Compose([
#                             A.HorizontalFlip(),
#                             A.VerticalFlip(),
#                             A.Rotate(limit=90, border_mode=cv2.BORDER_CONSTANT,p=0.3),
#                             A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
#                             A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
#                             A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=True, p=1.0),
#                             ToTensorV2()
#                             ])

# test_transforms = A.Compose([
#                             A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
#                             A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
#                             A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=True, p=1.0),
#                             ToTensorV2()
#                             ])

In [15]:
T = transforms.Compose([
                        
                        #transforms.Grayscale(num_output_channels=1),
                        transforms.RandomRotation(degrees=90),
                        transforms.RandomHorizontalFlip(p=0.5),
                        transforms.RandomVerticalFlip(p=0.5),
                        #transforms.ColorJitter(brightness=0.1, contrast=0.2, saturation=0.2, hue=0),
                        #transforms.Resize(image_resize), # (h, w) 순서
                        transforms.ToTensor(),
                        transforms.Normalize((0.5), (0.5))
                   
                       ])

valid_T = transforms.Compose([
                        
                        #transforms.Grayscale(num_output_channels=1),
                        #transforms.Resize(image_resize), # (h, w) 순서
                        transforms.ToTensor(),
                        transforms.Normalize((0.5), (0.5))

                       ])

In [16]:
from torchvision.datasets import ImageFolder

img_path = 'C:\\Users\\sanda\\Desktop\\dacon\\traing_imgs_HnE_MIL224'

#train_dataset = CustomDataset(train_df, train_labels.values, train_transforms)
train_dataset = ImageFolder(root=os.path.join(img_path,'train'), transform=T)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)


#val_dataset = CustomDataset(val_df, val_labels.values, test_transforms)
val_dataset = ImageFolder(root=os.path.join(img_path,'val'),transform =valid_T)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)


## Model Architecture

In [17]:
# class ImgFeatureExtractor(nn.Module):
#     def __init__(self):
#         super(ImgFeatureExtractor, self).__init__()
#         #self.backbone = models.efficientnet_b0(pretrained=True) # fixed
#         self.backbone = models.convnext_tiny(pretrained=False)
#         #self.backbone = models.convnext_base(pretrained=False)
#         self.first_conv = nn.Conv2d(96,3,kernel_size=(3,3)) #fixed
#         #self.embedding = nn.Linear(1000,512) # fixed
#         self.embedding = nn.Linear(1000,128) 
#     def forward(self, x):
#         x = self.first_conv(x) #fixed
#         x = self.backbone(x)
#         x = self.embedding(x) #fixed
#         return x

In [18]:
# class ImgFeatureExtractor(nn.Module):
#     def __init__(self):
#         super(ImgFeatureExtractor, self).__init__()
#         #self.backbone = models.efficientnet_b0(pretrained=True) # fixed
#         self.backbone = models.convnext_tiny(pretrained=True)
#         #self.backbone = models.convnext_base(pretrained=False)
#         self.first_conv = nn.Conv2d(96,3,kernel_size=(3,3)) #fixed
#         #self.embedding = nn.Linear(1000,512) # fixed
#         #self.embedding = nn.Linear(1000,128) 
#     def forward(self, x):
#         x = self.first_conv(x) #fixed
#         x = self.backbone(x)
#         #x = self.embedding(x) #fixed
#         return x

In [19]:
# class TabularFeatureExtractor(nn.Module):
#     def __init__(self):
#         super(TabularFeatureExtractor, self).__init__() #fixed
#         self.embedding = nn.Sequential(
#               #nn.Linear(in_features=23, out_features=128)  
#             nn.Linear(in_features=23, out_features=128), 
#             nn.BatchNorm1d(128),
#             nn.LeakyReLU(),
#             nn.Linear(in_features=128, out_features=256),
#             nn.BatchNorm1d(256),
#             nn.LeakyReLU(),
#             nn.Linear(in_features=256, out_features=512),
#             nn.BatchNorm1d(512),
#             nn.LeakyReLU(),
#             nn.Linear(in_features=512, out_features=512)
#         )
        
#     def forward(self, x):
#         x = self.embedding(x)
#         return x

In [20]:
# class ClassificationModel(nn.Module):
#     def __init__(self):
#         super(ClassificationModel, self).__init__()
#         self.img_feature_extractor = ImgFeatureExtractor()
#         self.tabular_feature_extractor = TabularFeatureExtractor()
#         self.classifier = nn.Sequential(
#             nn.Linear(in_features=640, out_features=1), # fixed
#             #nn.Linear(in_features=1024, out_features=1),
#             nn.Sigmoid(),
#         )
        
#     def forward(self, img, tabular):
#         img_feature = self.img_feature_extractor(img)
#         tabular_feature = self.tabular_feature_extractor(tabular)
#         feature = torch.cat([img_feature, tabular_feature], dim=-1)
#         output = self.classifier(feature)
#         return output

In [21]:
# class ClassificationModel(nn.Module):
#     def __init__(self):
#         super(ClassificationModel, self).__init__()
#         self.img_feature_extractor = ImgFeatureExtractor()
#         #self.tabular_feature_extractor = TabularFeatureExtractor()
#         self.classifier = nn.Sequential(
#             nn.Linear(in_features=1000, out_features=1), # fixed
#             #nn.Linear(in_features=1024, out_features=1),
#             nn.Sigmoid(),
#         )
        
#     def forward(self, img, tabular):
#         img_feature = self.img_feature_extractor(img)
#         #tabular_feature = self.tabular_feature_extractor(tabular)
#         #feature = torch.cat([img_feature, tabular_feature], dim=-1)
#         output = self.classifier(img_feature)
#         return output

In [22]:
# class ImgFeatureExtractor(nn.Module):
#     def __init__(self):
#         super(ImgFeatureExtractor, self).__init__()
#         #self.backbone = models.efficientnet_b0(pretrained=True) # fixed
#         self.backbone = models.convnext_tiny(pretrained=False)
#         #self.backbone = models.convnext_base(pretrained=False)
#         self.first_conv = nn.Conv2d(96,3,kernel_size=(3,3)) #fixed
#         #self.embedding = nn.Linear(1000,512) # fixed
#         #self.embedding = nn.Linear(1000,128)
#         self.classifier = nn.Sequential(
#             nn.Linear(in_features=1000, out_features=1), # fixed
#             #nn.Linear(in_features=1024, out_features=1),
#             nn.Sigmoid(),
#         )
#     def forward(self, x):
#         x = self.first_conv(x) #fixed
#         x = self.backbone(x)
#         #x = self.embedding(x) #fixed
#         x = self.classifier(x)
#         return x

In [23]:
# class ImgFeatureExtractor(nn.Module):
#     def __init__(self):
#         super(ImgFeatureExtractor, self).__init__()
#         #self.backbone = models.efficientnet_b0(pretrained=True) # fixed
#         self.backbone = models.resnet50(pretrained=True)
#         self.backbone.conv1 = nn.Conv2d(96, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
#         self.backbone.avgpool = nn.AdaptiveMaxPool2d(output_size=(1, 1))
#         self.backbone.fc = nn.Linear(in_features=2048, out_features=1)
# #         self.classifier = nn.Sequential(
# #             nn.Linear(in_features=1000, out_features=1), # fixed
# #             #nn.Linear(in_features=1024, out_features=1),
# #             nn.Sigmoid(),
# #         )
#     def forward(self, x):
#         x = self.backbone(x)
#         #x = self.embedding(x) #fixed
#         #x = self.classifier(x)
#         return x

In [24]:
import timm 

class model_MIL(nn.Module):
    def __init__(self):
        super().__init__()
        #self.backbone = models.efficientnet_b0(pretrained=True) # fixed
        self.backbone = timm.create_model('resnet50', pretrained=True , num_classes = 1)
        self.clf = nn.Sigmoid()
        
#         self.classifier = nn.Sequential(
#             nn.Linear(in_features=1000, out_features=1), # fixed
#             #nn.Linear(in_features=1024, out_features=1),
#             nn.Sigmoid(),
#         )
    def forward(self, x):
        x = self.backbone(x)
        x = self.clf(x)
        return x

## Train

In [25]:
# def train(model, optimizer, train_loader, val_loader, scheduler, device):
#     model.to(device)
#     criterion = nn.BCEWithLogitsLoss().to(device)
    
#     best_score = 0
#     best_model = None
    
#     for epoch in range(1, CFG['EPOCHS']+1):
#         model.train()
#         train_loss = []
#         for img, tabular, label in tqdm(iter(train_loader)):
#             img = img.float().to(device)
#             tabular = tabular.float().to(device)
#             label = label.float().to(device)
            
#             optimizer.zero_grad()
            
#             model_pred = model(img, tabular)
            
#             loss = criterion(model_pred, label.reshape(-1,1))
            
#             loss.backward()
#             optimizer.step()
            
#             train_loss.append(loss.item())
        
#         val_loss, val_score = validation(model, criterion, val_loader, device)
#         print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] Val Score : [{val_score:.5f}]')
        
#         if scheduler is not None:
#             scheduler.step(val_score)
        
#         if best_score < val_score:
#             best_score = val_score
#             best_model = model
#             print("best model 저장됨")
    
#     return best_model

In [26]:
import copy

def train(model, optimizer, train_loader, val_loader, scheduler, device):
    
    model.to(device)
    criterion = nn.BCEWithLogitsLoss().to(device) # fixed
    #criterion = nn.BCELoss().to(device)
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        
        train_loss = []
        #for img, tabular, label in tqdm(iter(train_loader)): # fixed
        for img, label in tqdm(iter(train_loader)):
            img = img.float().to(device)
            #tabular = tabular.float().to(device)
            label = label.float().to(device)
            
            optimizer.zero_grad()
            
            model_pred = model(img)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_score = validation(model, criterion, val_loader, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] Val Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_score)
        
        if best_score < val_score:
            best_score = val_score
            best_model = model
            best_model_weights = copy.deepcopy(best_model.state_dict())
            torch.save(best_model_weights,os.path.join('./MIL_model_trained2.pt'))
            print("best model 저장됨")
        
    return best_model

In [27]:
# def validation(model, criterion, val_loader, device):
#     model.eval()
#     pred_labels = []
#     true_labels = []
#     val_loss = []
#     threshold = 0.5
#     with torch.no_grad():
#         for img, tabular, label in tqdm(iter(val_loader)):
#             true_labels += label.tolist()
            
#             img = img.float().to(device)
#             tabular = tabular.float().to(device)
#             label = label.float().to(device)
            
#             model_pred = model(img, tabular)
            
#             loss = criterion(model_pred, label.reshape(-1,1))
            
#             val_loss.append(loss.item())
            
#             model_pred = model_pred.squeeze(1).to('cpu')  
#             pred_labels += model_pred.tolist()
    
#     pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
#     val_score = metrics.f1_score(y_true=true_labels, y_pred=pred_labels, average='macro')
#     return np.mean(val_loss), val_score

In [28]:
def validation(model, criterion, val_loader, device):
    model.eval()
    pred_labels = []
    true_labels = []
    val_loss = []
    threshold = 0.5
    with torch.no_grad():
        #for img, tabular, label in tqdm(iter(val_loader)): #fixed
        for img, label in tqdm(iter(val_loader)):    
            true_labels += label.tolist()
            
            img = img.float().to(device)
            #tabular = tabular.float().to(device)
            label = label.float().to(device)
            
            model_pred = model(img)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            val_loss.append(loss.item())
            
            model_pred = model_pred.squeeze(1).to('cpu')  
            pred_labels += model_pred.tolist()
    
    pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
    val_score = metrics.f1_score(y_true=true_labels, y_pred=pred_labels, average='macro')
    return np.mean(val_loss), val_score

## Run!!

In [29]:
# model = nn.DataParallel(ClassificationModel())
# model.eval()
# optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)

# infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

In [30]:
model = model_MIL()
#model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/1606 [00:00<?, ?it/s]

  0%|          | 0/531 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.66822] Val Loss : [0.65552] Val Score : [0.62358]
best model 저장됨


  0%|          | 0/1606 [00:00<?, ?it/s]

  0%|          | 0/531 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.64619] Val Loss : [0.65077] Val Score : [0.57779]


  0%|          | 0/1606 [00:00<?, ?it/s]

  0%|          | 0/531 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.63862] Val Loss : [0.64085] Val Score : [0.61562]
Epoch 00003: reducing learning rate of group 0 to 5.0000e-05.


  0%|          | 0/1606 [00:00<?, ?it/s]

  0%|          | 0/531 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.63058] Val Loss : [0.63880] Val Score : [0.63517]
best model 저장됨


  0%|          | 0/1606 [00:00<?, ?it/s]

  0%|          | 0/531 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.62656] Val Loss : [0.63804] Val Score : [0.63754]
best model 저장됨


  0%|          | 0/1606 [00:00<?, ?it/s]

  0%|          | 0/531 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.62266] Val Loss : [0.63967] Val Score : [0.63920]
best model 저장됨


  0%|          | 0/1606 [00:00<?, ?it/s]

  0%|          | 0/531 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.61983] Val Loss : [0.64519] Val Score : [0.58511]


  0%|          | 0/1606 [00:00<?, ?it/s]

  0%|          | 0/531 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.61806] Val Loss : [0.64018] Val Score : [0.65504]
best model 저장됨


  0%|          | 0/1606 [00:00<?, ?it/s]

  0%|          | 0/531 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.61437] Val Loss : [0.64640] Val Score : [0.58949]


  0%|          | 0/1606 [00:00<?, ?it/s]

  0%|          | 0/531 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.61124] Val Loss : [0.64549] Val Score : [0.61170]
Epoch 00010: reducing learning rate of group 0 to 2.5000e-05.


## Inference

In [31]:
# test_dataset = CustomDataset(test_df, None, test_transforms)
# test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [32]:
# def inference(model, test_loader, device):
#     model.to(device)
#     model.eval()
#     preds = []
#     threshold = 0.5
    
#     with torch.no_grad():
#         for img, tabular in tqdm(iter(test_loader)):
#             img = img.float().to(device)
#             tabular = tabular.float().to(device)
            
#             model_pred = model(img, tabular)
            
#             model_pred = model_pred.squeeze(1).to('cpu')
            
#             preds += model_pred.tolist()
    
#     preds = np.where(np.array(preds) > threshold, 1, 0)
    
#     return preds

In [33]:
# def inference(model, test_loader, device):
#     model.to(device)
#     model.eval()
#     preds = []
#     threshold = 0.5
    
#     with torch.no_grad():
#         for img, tabular in tqdm(iter(test_loader)):
#             img = img.float().to(device)
#             tabular = tabular.float().to(device)
            
#             model_pred = model(img)
            
#             model_pred = model_pred.squeeze(1).to('cpu')
            
#             preds += model_pred.tolist()
    
#     preds = np.where(np.array(preds) > threshold, 1, 0)
    
#     return preds

In [34]:
# preds = inference(infer_model, test_loader, device)

## Submission

In [35]:
# submit = pd.read_csv('./sample_submission.csv')

In [36]:
# submit['N_category'] = preds
# submit.to_csv('./submit.csv', index=False)

In [37]:
# submit.head(10)

In [38]:
# torch.save(infer_model.state_dict(),os.path.join('./MIL_model_trained.pt'))

In [42]:
from PIL import Image

def inference(model, file_link ,device):
    
    model.to(device)
    model.eval()
    preds = []
    threshold = 0.5
    
    #file_link='C:\\Users\\sanda\\Desktop\\dacon\\traing_imgs_HnE_MIL224\\val\\True\\BC_01_0702_88.png'

    origin_img = Image.open(file_link)

    img = valid_T(origin_img)

    with torch.no_grad():
        
            img = img.float().to(device)
            
            #tabular = tabular.float().to(device)
            
            img = img.unsqueeze(0)
            
            model_pred = model(img)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            
            preds += model_pred.tolist()
            
#         for img, tabular in tqdm(iter(test_loader)):
        
         
    #preds = np.where(np.array(preds) > threshold, 1, 0)
    
    return preds

In [43]:
import glob

file_links = glob.glob('C:\\Users\\sanda\\Desktop\\dacon\\traing_imgs_HnE_MIL224\\train\\True' + '\\*.'+ 'png')
print(len(file_links))

27448


In [44]:
dumb_list = []

for file in tqdm(file_links):
    pred = inference(infer_model,file, device)
    if (pred[0] < 0.8) and (pred[0] > 0.2) :
        #print(format(pred[0],'.8f'))
        dumb_list.append(file)

  0%|          | 0/27448 [00:00<?, ?it/s]

In [45]:
print(len(dumb_list))

3340


In [None]:
# f = ''
# pred = inference(infer_model,f, device)
# pred