## Import

In [None]:
import random
import pandas as pd
import numpy as np
import os
import cv2
from tqdm.auto import tqdm
from google.colab import drive

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torchvision.models as models

from sklearn import metrics
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action='ignore') 

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

GPU 설정

## Hyperparameter Setting

In [None]:
CFG = {
    'IMG_SIZE':512,
    'EPOCHS':10,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':8,
    'SEED':41
}

## Fixed RandomSeed

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing
#### 1. Load Dataframe
#### 2. 결측치 보완
#### 3. Train / Validation Split
#### 4. Numeric Feature Scaling / Categorical Featrue Label-Encoding

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


drive 위치로 dir 이동

In [None]:
%cd '/content/drive/Shareddrives/KUBIG22Dacon/data'

/content/drive/Shareddrives/KUBIG22Dacon/data


In [None]:
train_df = pd.read_csv('/content/drive/Shareddrives/KUBIG22Dacon/Kyungsuk/train_ks_2_pp.csv')
test_df = pd.read_csv('/content/drive/Shareddrives/KUBIG22Dacon/Kyungsuk/test_ks_2_pp.csv')

In [None]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                1000 non-null   object 
 1   img_path          1000 non-null   object 
 2   mask_path         1000 non-null   object 
 3   나이                1000 non-null   float64
 4   수술연월일             1000 non-null   object 
 5   진단명               1000 non-null   int64  
 6   암의 위치             1000 non-null   int64  
 7   암의 개수             1000 non-null   int64  
 8   암의 장경             1000 non-null   float64
 9   NG                1000 non-null   int64  
 10  HG                1000 non-null   int64  
 11  HG_score_1        1000 non-null   int64  
 12  HG_score_2        1000 non-null   int64  
 13  HG_score_3        1000 non-null   int64  
 14  DCIS_or_LCIS_여부   1000 non-null   int64  
 15  T_category        1000 non-null   int64  
 16  ER                1000 non-null   int64  
 

In [None]:
train_df.head()

Unnamed: 0,ID,img_path,mask_path,나이,수술연월일,진단명,암의 위치,암의 개수,암의 장경,NG,...,HG_score_1,HG_score_2,HG_score_3,DCIS_or_LCIS_여부,T_category,ER,PR,KI-67_LI_percent,HER2,HER2_IHC
976,BC_01_3277,./train_imgs/BC_01_3277.png,-,-2.549733,2010-02-05,0,1,0,-0.816809,2,...,2,2,2,0,1,1,1,-0.167857,0,0
668,BC_01_2734,./train_imgs/BC_01_2734.png,-,-1.216144,2011-10-13,3,0,0,-0.902901,0,...,0,0,0,0,1,1,1,-0.773501,1,2
836,BC_01_2962,./train_imgs/BC_01_2962.png,-,-0.593803,2007-11-13,0,1,0,-0.644626,1,...,0,0,0,0,1,1,1,-0.400797,0,0
479,BC_01_2228,./train_imgs/BC_01_2228.png,-,0.739786,2013-12-09,0,2,0,-1.247269,0,...,1,0,0,2,1,1,1,-0.73856,1,2
83,BC_01_0405,./train_imgs/BC_01_0405.png,-,-0.949427,2020-05-07,0,0,1,-0.730718,1,...,1,1,0,0,1,1,1,0.810492,0,0


### 결측치 처리

In [None]:
train_df, val_df, train_labels, val_labels = train_test_split(
                                                    train_df.drop(columns=['N_category']), 
                                                    train_df['N_category'], 
                                                    test_size=0.2, 
                                                    random_state=CFG['SEED']
                                                )

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
X, y = train_df.drop(columns=['ID', 'img_path', 'mask_path', '수술연월일']), train_labels
X_val,y_val = val_df.drop(columns=['ID', 'img_path', 'mask_path', '수술연월일']), val_labels
X_test = test_df.drop(columns=['ID','img_path','수술연월일'])

clf = LogisticRegression(random_state=CFG['SEED'], ).fit(X, y)

log_label = clf.predict(X_test)
log_preds = clf.predict_proba(X_test)[:,1]
log_score = (clf.score(X_val, y_val))
val_log = clf.predict_proba(X_val)[:,1]

print(log_score, log_label, log_preds)

0.76 [0 1 0 0 1 1 1 1 1 1 1 0 1 1 0 1 1 0 1 0 1 1 0 1 0 1 1 0 1 1 1 0 1 1 1 1 0
 0 1 0 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 0 1 1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 1 0 1 1 1 1 1 1 0 0 1 0 0 1 0 1
 1 1 0 0 1 0 0 1 1 0 1 0 1 1 0 1 0 0 0 0 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0
 0 0 1 1 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1
 1 0 0 0 0 0 1 1 1 1 0 0 1 1 1 1 0 0 0 1 0 0 0 0 1 1 0 0 1 0 1 0 1 0 1 0 1
 1 0 0 0 1 0 0 1 0 1 0 0 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0] [0.47773241 0.6754366  0.22703535 0.4678486  0.93803599 0.85179726
 0.55909331 0.8583065  0.67564049 0.94716583 0.78603272 0.31915702
 0.89913919 0.86667708 0.30925631 0.61204413 0.96231683 0.22887649
 0.60621594 0.28148766 0.63276878 0.77012742 0.18649816 0.79344901
 0.21975116 0.80798458 0.65395855 0.36454365 0.96659798 0.72515614
 0.85910671 0.44075376 0.94617977 0.7609717  0.54780217 0.92948862
 0.12014858 0.40334162 0.94565193 0.34120495 0.9444182  0.72942196
 0.81691051 0.8983

## MLP for Tabular

In [None]:
class CustomDataset_MLP(Dataset):
  def __init__(self, medical_df, labels):
    self.medical_df = medical_df
    self.labels = labels
  def __getitem__(self,index):
    if self.labels is not None:
      tabular = torch.Tensor(self.medical_df.drop(columns=['ID', 'img_path', 'mask_path', '수술연월일']).iloc[index])
      label = self.labels[index]
      return tabular, label
    else:
      
      tabular = torch.Tensor(self.medical_df.drop(columns=['ID', 'img_path', '수술연월일']).iloc[index])
      return tabular
    
  def __len__(self):
    return len(self.medical_df)



In [None]:
train_dataset = CustomDataset_MLP(train_df, train_labels.values,)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset_MLP(val_df, val_labels.values,)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.embedding = nn.Sequential(
            nn.Linear(in_features=17, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=512)
        )
        
    def forward(self, x):
        x = self.embedding(x)
        return x

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.MLP = MLP()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=512, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Linear(in_features=64,out_features=1),
            nn.Sigmoid(),
        )
        
    def forward(self, tabular):
        feature = self.MLP(tabular)
        output = self.classifier(feature)
        return output

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.BCEWithLogitsLoss().to(device)
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for tabular, label in tqdm(iter(train_loader)):
            tabular = tabular.float().to(device)
            label = label.float().to(device)

            
            optimizer.zero_grad()
            
            model_pred = model(tabular)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_score = validation(model, criterion, val_loader, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] Val Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_score)
        
        if best_score < val_score:
            best_score = val_score
            best_model = model
    
    return best_model, best_score

In [None]:
def validation(model, criterion, val_loader, device):
    model.eval()
    pred_labels = []
    true_labels = []
    val_loss = []
    threshold = 0.5
    with torch.no_grad():
        for tabular, label in tqdm(iter(val_loader)):
            true_labels += label.tolist()
            tabular = tabular.float().to(device)
            label = label.float().to(device)
            
            model_pred = model(tabular)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            val_loss.append(loss.item())
            
            model_pred = model_pred.squeeze(1).to('cpu')  
            pred_labels += model_pred.tolist()
    pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
    val_score = metrics.f1_score(y_true=true_labels, y_pred=pred_labels, average='macro')
    return np.mean(val_loss), val_score

In [None]:
model = nn.DataParallel(Model())
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)
MLP_model, MLP_score= train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.69235] Val Loss : [0.68278] Val Score : [0.63093]


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.65716] Val Loss : [0.66777] Val Score : [0.68366]


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.64326] Val Loss : [0.65550] Val Score : [0.73350]


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.63432] Val Loss : [0.65440] Val Score : [0.71182]


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.63004] Val Loss : [0.64439] Val Score : [0.71909]
Epoch 00005: reducing learning rate of group 0 to 5.0000e-05.


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.62997] Val Loss : [0.65008] Val Score : [0.71820]


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.62728] Val Loss : [0.64385] Val Score : [0.72932]
Epoch 00007: reducing learning rate of group 0 to 2.5000e-05.


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.61504] Val Loss : [0.64417] Val Score : [0.74747]


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.62447] Val Loss : [0.64691] Val Score : [0.70410]


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.62036] Val Loss : [0.63888] Val Score : [0.72460]
Epoch 00010: reducing learning rate of group 0 to 1.2500e-05.


In [None]:
test_dataset = CustomDataset_MLP(test_df, None, )
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
def inference_MLP(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    threshold = 0.5
    
    with torch.no_grad():
        for tabular in tqdm(iter(test_loader)):
            tabular = tabular.float().to(device)
            
            model_pred = model(tabular)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            
            preds += model_pred.tolist()
        
    return preds

In [None]:
MLP_preds = inference_MLP(MLP_model, test_loader, device)

  0%|          | 0/32 [00:00<?, ?it/s]

In [None]:
MLP_preds, MLP_score

([0.44308269023895264,
  0.8998644948005676,
  0.08446623384952545,
  0.15317711234092712,
  0.9012064337730408,
  0.9158560633659363,
  0.5698404908180237,
  0.8609517812728882,
  0.8430992960929871,
  0.9072239398956299,
  0.8379660844802856,
  0.2528573274612427,
  0.9549095034599304,
  0.8785894513130188,
  0.0649370476603508,
  0.31433728337287903,
  0.9738233685493469,
  0.05509939044713974,
  0.42827314138412476,
  0.22682321071624756,
  0.5807005763053894,
  0.9255242943763733,
  0.05187111720442772,
  0.28078117966651917,
  0.046798914670944214,
  0.8641433119773865,
  0.8093162178993225,
  0.08453220874071121,
  0.944776177406311,
  0.5246151685714722,
  0.8139822483062744,
  0.21789884567260742,
  0.8805183172225952,
  0.9045882821083069,
  0.6739509105682373,
  0.9583544731140137,
  0.07125649601221085,
  0.20360177755355835,
  0.9763156175613403,
  0.25586801767349243,
  0.9474079608917236,
  0.7642279863357544,
  0.8703442811965942,
  0.964888870716095,
  0.32814332842826

In [None]:
def inference_MLP_val(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    threshold = 0.5
    
    with torch.no_grad():
        for tabular, label in tqdm(iter(test_loader)):
            tabular = tabular.float().to(device)
            
            model_pred = model(tabular)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            
            preds += model_pred.tolist()
        
    return preds

In [None]:
val_MLP = inference_MLP_val(MLP_model, val_loader,device)

  0%|          | 0/25 [00:00<?, ?it/s]

In [None]:
val_MLP

[0.8984087705612183,
 0.817233681678772,
 0.23082274198532104,
 0.06801800429821014,
 0.04433545470237732,
 0.05404161661863327,
 0.2275758981704712,
 0.9855839014053345,
 0.04063383862376213,
 0.11107294261455536,
 0.0384334921836853,
 0.9262372851371765,
 0.1401529312133789,
 0.9864178895950317,
 0.4573754668235779,
 0.15145787596702576,
 0.26582175493240356,
 0.664721667766571,
 0.7014223337173462,
 0.8696525692939758,
 0.020954754203557968,
 0.03900965303182602,
 0.18085366487503052,
 0.1314736157655716,
 0.3242985010147095,
 0.05608257278800011,
 0.08241104334592819,
 0.2193318009376526,
 0.2622698247432709,
 0.7814571261405945,
 0.051696229726076126,
 0.43278270959854126,
 0.9448561668395996,
 0.5564383268356323,
 0.47745481133461,
 0.518635630607605,
 0.700237512588501,
 0.14039939641952515,
 0.16638833284378052,
 0.7811059951782227,
 0.2869538962841034,
 0.9513005018234253,
 0.5055500864982605,
 0.9528329372406006,
 0.7031394839286804,
 0.3474964201450348,
 0.9106385111808777,


## MLP for images

In [None]:
CFG['EPOCHS']==20

False

In [None]:
class CustomDataset(Dataset):
    def __init__(self, medical_df, labels, transforms=None):
        self.medical_df = medical_df
        self.transforms = transforms
        self.labels = labels
        
    def __getitem__(self, index):
        img_path = self.medical_df['img_path'].iloc[index]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transforms is not None:
            image = self.transforms(image=image)['image']
            
                
        if self.labels is not None:
            label = self.labels[index]
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.medical_df)

In [None]:
train_transforms = A.Compose([
                            A.HorizontalFlip(),
                            A.VerticalFlip(),
                            A.Rotate(limit=90, border_mode=cv2.BORDER_CONSTANT,p=0.3),
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

test_transforms = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

In [None]:
train_dataset = CustomDataset(train_df, train_labels.values, train_transforms)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_df, val_labels.values, test_transforms)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
class ImgFeatureExtractor(nn.Module):
    def __init__(self):
        super(ImgFeatureExtractor, self).__init__()
        self.backbone = models.efficientnet_b0(pretrained=True) #efficient_net 베이스로 사용함
        self.embedding = nn.Linear(1000,512)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.embedding(x)
        return x

In [None]:
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.img_feature_extractor = ImgFeatureExtractor()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=512, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Linear(in_features=64,out_features=1),
            nn.Sigmoid(),
        )
        
    def forward(self, img):
        feature = self.img_feature_extractor(img)
        output = self.classifier(feature)
        return output

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.BCEWithLogitsLoss().to(device)
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for img, label in tqdm(iter(train_loader)):
            img = img.float().to(device)
            label = label.float().to(device)

            
            optimizer.zero_grad()
            
            model_pred = model(img)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_score = validation(model, criterion, val_loader, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] Val Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_score)
        
        if best_score < val_score:
            best_score = val_score
            best_model = model
    
    return best_model, best_score

In [None]:
def validation(model, criterion, val_loader, device):
    model.eval()
    pred_labels = []
    true_labels = []
    val_loss = []
    threshold = 0.5
    with torch.no_grad():
        for img, label in tqdm(iter(val_loader)):
            true_labels += label.tolist()
            
            img = img.float().to(device)
            label = label.float().to(device)
            
            model_pred = model(img)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            val_loss.append(loss.item())
            
            model_pred = model_pred.squeeze(1).to('cpu')  
            pred_labels += model_pred.tolist()
    
    pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
    val_score = metrics.f1_score(y_true=true_labels, y_pred=pred_labels, average='macro')
    return np.mean(val_loss), val_score

In [None]:
model = nn.DataParallel(ClassificationModel())
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)

img_model, img_score = train(model, optimizer, train_loader, val_loader, scheduler, device)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-3dd342df.pth


  0%|          | 0.00/20.5M [00:00<?, ?B/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

In [None]:
test_dataset = CustomDataset(test_df, None, test_transforms)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    threshold = 0.5
    
    with torch.no_grad():
        for img in tqdm(iter(test_loader)):
            img = img.float().to(device)
            
            model_pred = model(img, )
            
            model_pred = model_pred.squeeze(1).to('cpu')
            
            preds += model_pred.tolist()
        
    return preds

In [None]:
img_preds = inference(img_model, test_loader, device)

In [None]:
def inference_img(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    threshold = 0.5
    
    with torch.no_grad():
        for img, label in tqdm(iter(test_loader)):
            img = img.float().to(device)
            
            model_pred = model(img, )
            
            model_pred = model_pred.squeeze(1).to('cpu')
            
            preds += model_pred.tolist()
        
    return preds

In [None]:
val_img = inference_img(img_model, val_loader, device)

## ensemble

In [None]:
pred = []
prob = []
for i in range(len(log_preds)):
  log = log_preds[i] 
  MLP = MLP_preds[i] 
  img = img_preds[i] 
  abs_log = abs(log-0.5)
  abs_MLP = abs(MLP-0.5)
  abs_img = abs(img-0.5)
  if max(abs_log,abs_MLP,abs_img) == abs_log:
    prob.append(log)
  elif max(abs_log,abs_MLP,abs_img) == abs_MLP:
    prob.append(MLP)
  else:
    prob.append(img)

for j in prob:
  if j>0.5:
    pred.append(1)
  else:
    pred.append(0)

print(pred)

In [None]:
print(prob)

In [None]:
submit = pd.read_csv('./sample_submission.csv')

In [None]:
submit['N_category'] = pred
submit.to_csv('./ky_ks.csv', index=False)

### val score 확인용

In [None]:
pred = []
prob = []
for i in range(len(val_log)):
  log = val_log[i] 
  MLP = val_MLP[i] 
  img = val_img[i] 
  abs_log = abs(log-0.5)
  abs_MLP = abs(MLP-0.5)
  abs_img = abs(img-0.5)
  if max(abs_log,abs_MLP,abs_img) == abs_log:
    prob.append(log)
  elif max(abs_log,abs_MLP,abs_img) == abs_MLP:
    prob.append(MLP)
  else:
    prob.append(img)

for j in prob:
  if j>0.5:
    pred.append(1)
  else:
    pred.append(0)

print(pred)

In [None]:
import sklearn.metrics as mt
mt.accuracy_score(pred,val_labels)


## Bagging 이용한 ensemble (사용 안함)

In [None]:
pred = []
prob = []
for i in range(len(log_preds)):
  log = log_preds[i] 
  MLP = MLP_preds[i] 
  img = img_preds[i] 
  if log > 0.5:
    log = 1
  else:
    log = 0

  if MLP > 0.5:
    MLP = 1
  else:
    MLP = 0

  if img > 0.5:
    img = 1
  else:
    img = 0

  bag = []
  bag.append(log, MLP, img)
  if bag.count(1)>=2:
    pred.append(1)
  else:
    pred.append(0)

print(pred)

In [None]:
submit = pd.read_csv('./sample_submission.csv')

In [None]:
submit['N_category'] = pred
submit.to_csv('./ky_ks_bagging.csv', index=False)

Bagging 점수 확인용

In [None]:
pred = []
prob = []
for i in range(len(val_log)):
  log = val_log[i] 
  MLP = val_MLP[i] 
  img = val_img[i] 
  if log > 0.5:
    log = 1
  else:
    log = 0

  if MLP > 0.5:
    MLP = 1
  else:
    MLP = 0

  if img > 0.5:
    img = 1
  else:
    img = 0
    
  bag = []
  bag.append(log, MLP, img)
  if bag.count(1)>=2:
    pred.append(1)
  else:
    pred.append(0)

print(pred)

In [None]:
import sklearn.metrics as mt
mt.accuracy_score(pred,val_labels)
