## pseudo labeling 제외

- RegNet040 5Fold ensemble
- Pseudo labeling

- Ubuntu 18.04, Cuda 11
- opencv-python
- numpy
- pandas
- timm
- torch==1.8.0 torchvision 0.9.0 with cuda 11.1
- natsort
- scikit-learn
- pillow
- torch_optimizer
- tqdm
- ptflops
- easydict
- matplotlib

# Library

In [1]:
import os
import cv2
import time
import random
import logging  # 로그 출력
import easydict  # 속성으로 dict 값에 access할 수 있음
import numpy as np
import pandas as pd
from tqdm import tqdm  # process bar
from os.path import join as opj
from ptflops import get_model_complexity_info
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
# from PIL import Image

import timm
import torch
import torch.nn as nn
import torch_optimizer as optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, grad_scaler
from torchvision import transforms

import warnings
warnings.filterwarnings('ignore')

# Config

Hyper-parameter 정의

In [2]:
args = easydict.EasyDict(
    {'exp_num':'0',
     
     # Path settings
     'data_path':'./data',
     'Kfold':5,
     'model_path':'results/',

     # Model parameter settings  # 고려사항
#      'encoder_name':'regnety_040',  
     'encoder_name':'regnety_160',
#      'encoder_name':'efficientnet_b3',
     'drop_path_rate':0.2,
     
     # Training parameter settings
     ## Base Parameter
     'img_size':224,  # 고려사항
     'batch_size':16, # 고려사항
#      'epochs':100,  # 고려사항
     'epochs':150,
     'optimizer':'Lamb',
     'initial_lr':5e-6,
     'weight_decay':1e-3,

     ## Augmentation
     'aug_ver':2,

     ## Scheduler (OnecycleLR)
     'scheduler':'cycle',
     'warm_epoch':5,
     'max_lr':1e-3,

     ### Cosine Annealing
     'min_lr':5e-6,
     'tmax':145,

     ## etc.
     'patience':20,
     'clipping':None,

     # Hardware settings
     'amp':True,
     'multi_gpu':False,
     'logging':False,
     'num_workers':0,  # RuntimeError: DataLoader worker 오류 발생으로 0으로 설정
#      'num_workers':4,  # 고려사항
     'seed':42
    })

# Utils for training and Logging

In [3]:
# Warmup Learning rate scheduler
from torch.optim.lr_scheduler import _LRScheduler
class WarmUpLR(_LRScheduler):
    """warmup_training learning rate scheduler
    Args:
        optimizer: optimizer(e.g. SGD)
        total_iters: totoal_iters of warmup phase
    """
    def __init__(self, optimizer, total_iters, last_epoch=-1):
        
        self.total_iters = total_iters
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        """we will use the first m batches, and set the learning
        rate to base_lr * m / total_iters
        """
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

# Logging
def get_root_logger(logger_name='basicsr',
                    log_level=logging.INFO,
                    log_file=None):

    logger = logging.getLogger(logger_name)
    # if the logger has been initialized, just return it
    if logger.hasHandlers():
        return logger

    format_str = '%(asctime)s %(levelname)s: %(message)s'
    logging.basicConfig(format=format_str, level=log_level)

    if log_file is not None:
        file_handler = logging.FileHandler(log_file, 'w')
        file_handler.setFormatter(logging.Formatter(format_str))
        file_handler.setLevel(log_level)
        logger.addHandler(file_handler)

    return logger

class AvgMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        self.losses = []

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        self.losses.append(val)

# Data Preprocessing
- 원본 이미지 사이즈가 큰 점을 감안해 (256,256)로 resize하여 데이터를 새롭게 저장

In [4]:
# df = pd.read_csv('./data/train_df.csv')

# # Resize Train Images
# save_path = './data/train_256'  # 새로 저장할 폴더 경로
# os.makedirs(save_path, exist_ok=True)
# for img in tqdm(df['file_name']):  # train_df의 'file_name' 컬럼을 참고하여
#     name = os.path.basename(img)
#     img = cv2.imread(opj('./data/train/', img))  # 해당 경로에 있는 png 이미지 읽어서
#     img = cv2.resize(img, dsize=(256, 256))  # resize한 후
# #   img = cv2.resize(img, dsize=(256, 256), interpolation=cv2.INTER_AREA) # 고려사항
#     img = cv2.imwrite(opj(save_path, name), img)  # 새 폴더에 저장

# # Resize Test Images
# df = pd.read_csv('./data/test_df.csv')
# save_path = './data/test_256'
# os.makedirs(save_path, exist_ok=True)
# for img in tqdm(df['file_name']):
#     name = os.path.basename(img)
#     img = cv2.imread(opj('./data/test/', img))
#     img = cv2.resize(img, dsize=(256, 256))
# #   img = cv2.resize(img, dsize=(256, 256), interpolation=cv2.INTER_AREA) # 고려사항
#     img = cv2.imwrite(opj(save_path, name), img)

# Dataset & Loader

In [5]:
class Train_Dataset(Dataset):
    def __init__(self, df, transform=None):
        self.file_name = df['file_name'].values
        # 각 label을 str->index로 변환
        labels = ['bottle-broken_large', 'bottle-broken_small', 'bottle-contamination', 'bottle-good', 'cable-bent_wire', 'cable-cable_swap', 'cable-combined', 'cable-cut_inner_insulation', 'cable-cut_outer_insulation', 'cable-good', 'cable-missing_cable', 'cable-missing_wire', 'cable-poke_insulation', 'capsule-crack', 'capsule-faulty_imprint', 'capsule-good', 'capsule-poke', 'capsule-scratch', 'capsule-squeeze', 'carpet-color', 'carpet-cut', 'carpet-good', 'carpet-hole', 'carpet-metal_contamination', 'carpet-thread', 'grid-bent', 'grid-broken', 'grid-glue', 'grid-good', 'grid-metal_contamination', 'grid-thread', 'hazelnut-crack', 'hazelnut-cut', 'hazelnut-good', 'hazelnut-hole', 'hazelnut-print', 'leather-color', 'leather-cut', 'leather-fold', 'leather-glue', 'leather-good', 'leather-poke', 'metal_nut-bent', 'metal_nut-color', 'metal_nut-flip', 'metal_nut-good', 'metal_nut-scratch', 'pill-color', 'pill-combined', 'pill-contamination', 'pill-crack', 'pill-faulty_imprint', 'pill-good', 'pill-pill_type', 'pill-scratch', 'screw-good', 'screw-manipulated_front', 'screw-scratch_head', 'screw-scratch_neck', 'screw-thread_side', 'screw-thread_top', 'tile-crack', 'tile-glue_strip', 'tile-good', 'tile-gray_stroke', 'tile-oil', 'tile-rough', 'toothbrush-defective', 'toothbrush-good', 'transistor-bent_lead', 'transistor-cut_lead', 'transistor-damaged_case', 'transistor-good', 'transistor-misplaced', 'wood-color', 'wood-combined', 'wood-good', 'wood-hole', 'wood-liquid', 'wood-scratch', 'zipper-broken_teeth', 'zipper-combined', 'zipper-fabric_border', 'zipper-fabric_interior', 'zipper-good', 'zipper-rough', 'zipper-split_teeth', 'zipper-squeezed_teeth']
        new = dict(zip(range(len(labels)),labels))
        label_decoder = {val:key for key, val in new.items()}
        df['label'] = df['label'].replace(label_decoder)

        self.target = df['label'].values  # 목표는 label
        self.transform = transform

        print(f'Dataset size:{len(self.file_name)}')

    def __getitem__(self, idx):  # train 경로에 있는 png 이미지 읽어서 float32로 변환
        image = cv2.imread(opj('./data/train_256/', self.file_name[idx])).astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0  # BGR=>RGB 변환
        
        # PIL 사용 # 고려사항
#         image = Image.open(opj('./data/train_256/', self.file_name[idx])).convert('RGB')
#         image = self.transform(image)
        
        target = self.target[idx]
#         print(f'target:{target}')

        if self.transform is not None:
        # HWC => CHW-layout 변환
            image = self.transform(torch.from_numpy(image.transpose(2,0,1)))

        return image, target

    def __len__(self):
        return len(self.file_name)

class Test_dataset(Dataset):
    def __init__(self, df, transform=None):
        self.test_file_name = df['file_name'].values
        self.transform = transform

        print(f'Test Dataset size:{len(self.test_file_name)}')

    def __getitem__(self, idx): # test 경로에 있는 png 이미지 읽어서 float32로 변환
        image = cv2.imread(opj('./data/test_256/', self.test_file_name[idx])).astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0  # BGR=>RGB 변환

        if self.transform is not None:
            image = self.transform(torch.from_numpy(image.transpose(2,0,1)))

        return image

    def __len__(self):
        return len(self.test_file_name)

def get_loader(df, phase: str, batch_size, shuffle,
               num_workers, transform):
    if phase == 'test':
        dataset = Test_dataset(df, transform)  
        # num_workers : 데이터 로딩에 사용하는 subprocess 개수
        # pin_memory : True - 데이터로더가 Tensor를 CUDA 고정 메모리에 올림
        # drop_last : batch의 크기에 따른 의존도 높은 함수를 사용할 때 우려되는 경우 마지막 batch를 사용하지 않을 수 있음
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)
    else:
        dataset = Train_Dataset(df, transform)
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True,
                                 drop_last=False)
    return data_loader

def get_train_augmentation(img_size, ver):
    if ver == 1: # for validset
        transform = transforms.Compose([
#                 transforms.ToTensor(),  # 고려사항
                transforms.Resize((img_size, img_size)),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
                ])

    if ver == 2:
        transform = transforms.Compose([
#                 transforms.ToTensor(),  # 고려사항
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),  # 추가
                transforms.RandomAffine((20)),  # x, y축으로 이미지 늘림
                transforms.RandomRotation(90),
                transforms.Resize((img_size, img_size)),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])
    
    
    return transform

# Network

In [6]:
class Network(nn.Module):
    def __init__(self, args):
        super().__init__()
        # 사전 학습된 모델 사용하기
        self.encoder = timm.create_model(args.encoder_name, pretrained=True,
                                    drop_path_rate=args.drop_path_rate,
                                    )
#         num_head = self.encoder.head.fc.in_features  # Number of parallel attention heads
#         self.encoder.head.fc = nn.Linear(num_head, 88)
        
        if 'regnet' in args.encoder_name:        
            num_head = self.encoder.head.fc.in_features
            self.encoder.head.fc = nn.Linear(num_head, 88)
        
        elif 'efficient' in args.encoder_name:
            num_head = self.encoder.classifier.in_features
            self.encoder.classifier = nn.Linear(num_head, 88)

    def forward(self, x):
        x = self.encoder(x)
        return x

class Network_test(nn.Module):
    def __init__(self, encoder_name):
        super().__init__()
        self.encoder = timm.create_model(encoder_name, pretrained=True,
                                    drop_path_rate=0,
                                    )
#         num_head = self.encoder.head.fc.in_features
#         self.encoder.head.fc = nn.Linear(num_head, 88)
        
        if 'regnet' in encoder_name:        
            num_head = self.encoder.head.fc.in_features
            self.encoder.head.fc = nn.Linear(num_head, 88)
        
        elif 'efficient' in encoder_name:
            num_head = self.encoder.classifier.in_features
            self.encoder.classifier = nn.Linear(num_head, 88)
    
    def forward(self, x):
        x = self.encoder(x)
        return x

# Trainer for Training & Validation

In [7]:
class Trainer():
    def __init__(self, args, save_path):
        '''
        args: arguments
        save_path: Model 가중치 저장 경로
        '''
        super(Trainer, self).__init__()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        # Logging
        log_file = os.path.join(save_path, 'aws_log_0507_2_esb.log')
        self.logger = get_root_logger(logger_name='IR', log_level=logging.INFO, log_file=log_file)
        self.logger.info(args)
        # self.logger.info(args.tag)

        # Train, Valid Set load
        ############################################################################
        df_train = pd.read_csv(opj(args.data_path, 'train_df.csv'))
        print('Read train_df.csv')
            
#         if args.step == 0 :
#             df_train = pd.read_csv(opj(args.data_path, 'train_df.csv'))
#             print('Read train_df.csv')
#         else :
#             df_train = pd.read_csv(opj(args.data_path, f'aws_0505_esb_train_{args.step}step.csv'))
#             print(f'Read {args.step}step.csv')

#         if args.image_type is not None:
#             df_train['file_name'] = df_train['file_name'].apply(lambda x:x.replace('train_imgs', args.image_type))
#             df_train['file_name'] = df_train['file_name'].apply(lambda x:x.replace('test_imgs', 'test_512'))

        kf = StratifiedKFold(n_splits=args.Kfold, shuffle=True, random_state=args.seed)
        for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(df_train)), y=df_train['label'])):
            df_train.loc[val_idx, 'fold'] = fold
        val_idx = list(df_train[df_train['fold'] == int(args.fold)].index)

        df_val = df_train[df_train['fold'] == args.fold].reset_index(drop=True)
        df_train = df_train[df_train['fold'] != args.fold].reset_index(drop=True)

        # Augmentation
        self.train_transform = get_train_augmentation(img_size=args.img_size, ver=args.aug_ver)
        self.test_transform = get_train_augmentation(img_size=args.img_size, ver=1)

        # TrainLoader
        self.train_loader = get_loader(df_train, phase='train', batch_size=args.batch_size, shuffle=True,
                                       num_workers=args.num_workers, transform=self.train_transform)
        self.val_loader = get_loader(df_val, phase='train', batch_size=args.batch_size, shuffle=False,
                                       num_workers=args.num_workers, transform=self.test_transform)

        # Network
        self.model = Network(args).to(self.device)
        macs, params = get_model_complexity_info(self.model, (3, args.img_size, args.img_size), as_strings=True,
                                                 print_per_layer_stat=False, verbose=False)
        self.logger.info('{:<30}  {:<8}'.format('Computational complexity: ', macs))
        self.logger.info('{:<30}  {:<8}'.format('Number of parameters: ', params))

        # Loss
        self.criterion = nn.CrossEntropyLoss()
        
        # Optimizer & Scheduler
        self.optimizer = optim.Lamb(self.model.parameters(), lr=args.initial_lr, weight_decay=args.weight_decay)
        
        iter_per_epoch = len(self.train_loader)
        self.warmup_scheduler = WarmUpLR(self.optimizer, iter_per_epoch * args.warm_epoch)

        if args.scheduler == 'step':
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=args.milestone, gamma=args.lr_factor, verbose=True)
        elif args.scheduler == 'cos':
            tmax = args.tmax # half-cycle 
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max = tmax, eta_min=args.min_lr, verbose=True)
        elif args.scheduler == 'cycle':
            self.scheduler = torch.optim.lr_scheduler.OneCycleLR(self.optimizer, max_lr=args.max_lr, steps_per_epoch=iter_per_epoch, epochs=args.epochs)

        if args.multi_gpu:
            self.model = nn.DataParallel(self.model).to(self.device)

        # Train / Validate
        best_loss = np.inf
        best_acc = 0
        best_epoch = 0
        early_stopping = 0
        start = time.time()
        for epoch in range(1, args.epochs+1):
            self.epoch = epoch

            if args.scheduler == 'cos':
                if epoch > args.warm_epoch:
                    self.scheduler.step()

            # Training
            train_loss, train_acc, train_f1 = self.training(args)

            # Model weight in Multi_GPU or Single GPU
            state_dict= self.model.module.state_dict() if args.multi_gpu else self.model.state_dict()

            # Validation
            val_loss, val_acc, val_f1 = self.validate(args, phase='val')

            # Save models
            if val_loss < best_loss:
                early_stopping = 0
                best_epoch = epoch
                best_loss = val_loss
                best_acc = val_acc
                best_f1 = val_f1

                torch.save({'epoch':epoch,
                            'state_dict':state_dict,
                            'optimizer': self.optimizer.state_dict(),
                            'scheduler': self.scheduler.state_dict(),
                    }, os.path.join(save_path, 'aws_best_model_0507_2_esb.pth'))
                self.logger.info(f'-----------------SAVE:{best_epoch}epoch----------------')
            else:
                early_stopping += 1

            # Early Stopping
            if early_stopping == args.patience:
                break

        self.logger.info(f'\nBest Val Epoch:{best_epoch} | Val Loss:{best_loss:.4f} | Val Acc:{best_acc:.4f} | Val F1:{best_f1:.4f}')
        end = time.time()
        self.logger.info(f'Total Process time:{(end - start) / 60:.3f}Minute')

    # Training
    def training(self, args):
        self.model.train()
        train_loss = AvgMeter()
        train_acc = 0
        preds_list = []
        targets_list = []

        scaler = grad_scaler.GradScaler()
        for i, (images, targets) in enumerate(tqdm(self.train_loader)):
            images = torch.tensor(images, device=self.device, dtype=torch.float32)
            # ValueError: too many dimensions 'str'
#             targets = torch.tensor(int(targets), device=self.device, dtype=torch.long)
            targets = torch.tensor(targets, device=self.device, dtype=torch.long)
            
            if self.epoch <= args.warm_epoch:
                self.warmup_scheduler.step()

            self.model.zero_grad(set_to_none=True)
            if args.amp:
                with autocast():
                    preds = self.model(images)
                    loss = self.criterion(preds, targets)
                scaler.scale(loss).backward()

                # Gradient Clipping
                if args.clipping is not None:
                    scaler.unscale_(self.optimizer)
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)

                scaler.step(self.optimizer)
                scaler.update()

            else:
                preds = self.model(images)
                loss = self.criterion(preds, targets)
                loss.backward()
                nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)
                self.optimizer.step()

            if args.scheduler == 'cycle':
                if self.epoch > args.warm_epoch:
                    self.scheduler.step()

            # Metric
            train_acc += (preds.argmax(dim=1) == targets).sum().item()
            preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
            targets_list.extend(targets.cpu().detach().numpy())
            # log
            train_loss.update(loss.item(), n=images.size(0))

        train_acc /= len(self.train_loader.dataset)
        train_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

        self.logger.info(f'Epoch:[{self.epoch:03d}/{args.epochs:03d}]')
        self.logger.info(f'Train Loss:{train_loss.avg:.3f} | Acc:{train_acc:.4f} | F1:{train_f1:.4f}')
        return train_loss.avg, train_acc, train_f1
            
    # Validation or Dev
    def validate(self, args, phase='val'):
        self.model.eval()
        with torch.no_grad():
            val_loss = AvgMeter()
            val_acc = 0
            preds_list = []
            targets_list = []

            for i, (images, targets) in enumerate(self.val_loader):
                images = torch.tensor(images, device=self.device, dtype=torch.float32)
                targets = torch.tensor(targets, device=self.device, dtype=torch.long)

                preds = self.model(images)
                loss = self.criterion(preds, targets)

                # Metric
                val_acc += (preds.argmax(dim=1) == targets).sum().item()
                preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
                targets_list.extend(targets.cpu().detach().numpy())

                # log
                val_loss.update(loss.item(), n=images.size(0))
            val_acc /= len(self.val_loader.dataset)
            val_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

            self.logger.info(f'{phase} Loss:{val_loss.avg:.3f} | Acc:{val_acc:.4f} | F1:{val_f1:.4f}')
        return val_loss.avg, val_acc, val_f1

# Main

In [8]:
def main(args):
    print('<---- Training Params ---->')
    
    # Random Seed
    seed = args.seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True

    save_path = os.path.join(args.model_path, (args.exp_num).zfill(3))
    
    # Create model directory
    os.makedirs(save_path, exist_ok=True)
    Trainer(args, save_path)

    return save_path

# Inference & Make pseudo label set

In [9]:
def predict(encoder_name, test_loader, device, model_path):
    model = Network_test(encoder_name).to(device)
    model.load_state_dict(torch.load(opj(model_path, 'aws_best_model_0507_2_esb.pth'))['state_dict'])
    model.eval()
    preds_list = []
    with torch.no_grad():
        for images in tqdm(test_loader):
            images = torch.as_tensor(images, device=device, dtype=torch.float32)
            preds = model(images)
            preds = torch.softmax(preds, dim=1)
            preds_list.extend(preds.cpu().tolist())

    return np.array(preds_list)

def ensemble_5fold(model_path_list, test_loader, device):
    predict_list = []
    for model_path in model_path_list:  # 고려사항
#         prediction = predict(encoder_name= 'efficientnet_b3', 
        prediction = predict(encoder_name= 'regnety_160', 
#         prediction = predict(encoder_name= 'regnety_040', 
                             test_loader = test_loader, device = device, model_path = model_path)
        predict_list.append(prediction)
    ensemble = (predict_list[0] + predict_list[1] + predict_list[2] + predict_list[3] + predict_list[4])/len(predict_list)

    return ensemble


# def make_pseudo_df(train_df, test_df, ensemble, step, threshold = 0.9, z_sample = 500): 
#     train_df_copy = train_df.copy()
#     test_df_copy = test_df.copy()

#     test_df_copy['label'] = ensemble.argmax(axis=1)
#     pseudo_test_df = test_df_copy.iloc[np.where(ensemble > threshold)[0]].reset_index(drop=True)
#     z_idx  = pseudo_test_df[pseudo_test_df['label'] == 0].sample(n=z_sample, random_state=42).index.tolist()
#     ot_idx = pseudo_test_df[pseudo_test_df['label'].isin([*range(1,88)])].index.tolist()  # 고려사항
#     pseudo_test_df = pseudo_test_df.iloc[z_idx + ot_idx]

#     train_df_copy = train_df_copy.append(pseudo_test_df, ignore_index=True).reset_index(drop=True) # reset_index
#     train_df_copy.to_csv(f'./data/0505_1_train_{step}step.csv', index=False)
#     print(f'Make train_{step}step.csv')

# Train & Inference
- 5fold Training -> Inference & Ensemble -> Make or Update Pseudo label set -> Add Dataset(Trainset + Pseudo label set)
다음과 과정을 반복하기 때문에 Training과 Inference를 동시에 진행했습니다.

In [10]:
# img_size = 256  # 고려사항
img_size = 224
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sub = pd.read_csv('./data/sample_submission.csv')
df_train = pd.read_csv('./data/train_df.csv')
df_test = pd.read_csv('./data/test_df.csv')

In [11]:
# df_test['file_name'] = df_test['file_name'].apply(lambda x:x.replace('test_imgs', 'test_1024'))
test_transform = get_train_augmentation(img_size=img_size, ver=1)
test_dataset = Test_dataset(df_test, test_transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)  # 고려사항

start = 0 # first time : Only Trainset
# steps = 6 # Number of pseudo labeling times 
models_path = []
for s_fold in range(5): # 5fold
    args.fold = s_fold
    args.exp_num = str(s_fold)
    save_path = main(args)
    models_path.append(save_path)
ensemble = ensemble_5fold(models_path, test_loader, device)
    
# pseudo labeling하는 경우
# for step in range(start, steps+1): 
#     models_path = []
#     args.step = step
#     for s_fold in range(5): # 5fold
#         args.fold = s_fold
#         args.exp_num = str(s_fold)
#         save_path = main(args)
#         models_path.append(save_path)
#     ensemble = ensemble_5fold(models_path, test_loader, device)
#     make_pseudo_df(df_train, df_test, ensemble, step+1)

2022-05-07 15:38:01,550 INFO: {'exp_num': '0', 'data_path': './data', 'Kfold': 5, 'model_path': 'results/', 'encoder_name': 'regnety_160', 'drop_path_rate': 0.2, 'img_size': 224, 'batch_size': 16, 'epochs': 150, 'optimizer': 'Lamb', 'initial_lr': 5e-06, 'weight_decay': 0.001, 'aug_ver': 2, 'scheduler': 'cycle', 'warm_epoch': 5, 'max_lr': 0.001, 'min_lr': 5e-06, 'tmax': 145, 'patience': 20, 'clipping': None, 'amp': True, 'multi_gpu': False, 'logging': False, 'num_workers': 0, 'seed': 42, 'fold': 0}


Test Dataset size:2154
<---- Training Params ---->
Read train_df.csv
Dataset size:3421
Dataset size:856


2022-05-07 15:38:02,360 INFO: Loading pretrained weights from url (https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pth)
Downloading: "https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pth" to C:\Users\user/.cache\torch\hub\checkpoints\regnety_160-a5fe301d.pth
2022-05-07 15:40:40,487 INFO: Computational complexity:       15.93 GMac
2022-05-07 15:40:40,488 INFO: Number of parameters:           80.83 M 
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:05<00:00,  1.71it/s]
2022-05-07 15:42:45,834 INFO: Epoch:[001/150]
2022-05-07 15:42:45,835 INFO: Train Loss:4.486 | Acc:0.0082 | F1:0.0034
2022-05-07 15:42:55,904 INFO: val Loss:4.493 | Acc:0.0023 | F1:0.0003
2022-05-07 15:42:57,663 INFO: -----------------SAVE:1epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:55<00:00,  1.85it/s]
2022-05-07 15:44:53,405 INFO: Epoch:[002/150]
2022-05-07 15:44:53,406 INFO: T

100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.83it/s]
2022-05-07 16:25:20,488 INFO: Epoch:[021/150]
2022-05-07 16:25:20,489 INFO: Train Loss:0.343 | Acc:0.9111 | F1:0.5328
2022-05-07 16:25:29,265 INFO: val Loss:0.277 | Acc:0.9206 | F1:0.5491
2022-05-07 16:25:31,207 INFO: -----------------SAVE:21epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:57<00:00,  1.82it/s]
2022-05-07 16:27:28,552 INFO: Epoch:[022/150]
2022-05-07 16:27:28,552 INFO: Train Loss:0.311 | Acc:0.9187 | F1:0.5745
2022-05-07 16:27:37,350 INFO: val Loss:0.266 | Acc:0.9229 | F1:0.5684
2022-05-07 16:27:39,326 INFO: -----------------SAVE:22epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:57<00:00,  1.83it/s]
2022-05-07 16:29:36,564 INFO: Epoch:[023/150]
2022-05-07 16:29:36,564 INFO: Train Loss:0.287 | Acc:0.9246 | F1:0

100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.83it/s]
2022-05-07 17:15:59,235 INFO: Epoch:[045/150]
2022-05-07 17:15:59,235 INFO: Train Loss:0.133 | Acc:0.9667 | F1:0.8691
2022-05-07 17:16:08,025 INFO: val Loss:0.208 | Acc:0.9556 | F1:0.7758
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.83it/s]
2022-05-07 17:18:04,897 INFO: Epoch:[046/150]
2022-05-07 17:18:04,898 INFO: Train Loss:0.144 | Acc:0.9617 | F1:0.8448
2022-05-07 17:18:13,641 INFO: val Loss:0.193 | Acc:0.9498 | F1:0.7652
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.83it/s]
2022-05-07 17:20:10,559 INFO: Epoch:[047/150]
2022-05-07 17:20:10,559 INFO: Train Loss:0.134 | Acc:0.9623 | F1:0.8598
2022-05-07 17:20:19,302 INFO: val Loss:0.182 | Acc:0.9568 | F1:0.8231
100%|███████████████████████████████████████████████████████████████████████

100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:57<00:00,  1.83it/s]
2022-05-07 18:10:35,956 INFO: Epoch:[071/150]
2022-05-07 18:10:35,956 INFO: Train Loss:0.080 | Acc:0.9778 | F1:0.9174
2022-05-07 18:10:44,691 INFO: val Loss:0.220 | Acc:0.9626 | F1:0.7990
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.83it/s]
2022-05-07 18:12:41,450 INFO: Epoch:[072/150]
2022-05-07 18:12:41,450 INFO: Train Loss:0.083 | Acc:0.9757 | F1:0.9123
2022-05-07 18:12:50,179 INFO: val Loss:0.150 | Acc:0.9603 | F1:0.8103
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.84it/s]
2022-05-07 18:14:46,562 INFO: Epoch:[073/150]
2022-05-07 18:14:46,563 INFO: Train Loss:0.077 | Acc:0.9798 | F1:0.9273
2022-05-07 18:14:55,279 INFO: val Loss:0.166 | Acc:0.9638 | F1:0.8252
100%|███████████████████████████████████████████████████████████████████████

<---- Training Params ---->
Read train_df.csv
Dataset size:3421
Dataset size:856


2022-05-07 18:33:47,393 INFO: Loading pretrained weights from url (https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pth)
2022-05-07 18:33:47,768 INFO: Computational complexity:       15.93 GMac
2022-05-07 18:33:47,768 INFO: Number of parameters:           80.83 M 
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:57<00:00,  1.83it/s]
2022-05-07 18:35:44,888 INFO: Epoch:[001/150]
2022-05-07 18:35:44,889 INFO: Train Loss:4.484 | Acc:0.0111 | F1:0.0041
2022-05-07 18:35:53,771 INFO: val Loss:4.502 | Acc:0.0000 | F1:0.0000
2022-05-07 18:35:55,545 INFO: -----------------SAVE:1epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:59<00:00,  1.80it/s]
2022-05-07 18:37:54,742 INFO: Epoch:[002/150]
2022-05-07 18:37:54,742 INFO: Train Loss:4.468 | Acc:0.0120 | F1:0.0040
2022-05-07 18:38:03,664 INFO: val Loss:4.486 | Acc:0.0000 | F1:0.0000
2022-05-07 18:38:05,495 INFO: ------

2022-05-07 19:18:34,506 INFO: -----------------SAVE:21epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.83it/s]
2022-05-07 19:20:31,311 INFO: Epoch:[022/150]
2022-05-07 19:20:31,312 INFO: Train Loss:0.338 | Acc:0.9129 | F1:0.5566
2022-05-07 19:20:40,079 INFO: val Loss:0.367 | Acc:0.9159 | F1:0.5434
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:57<00:00,  1.82it/s]
2022-05-07 19:22:37,658 INFO: Epoch:[023/150]
2022-05-07 19:22:37,658 INFO: Train Loss:0.321 | Acc:0.9179 | F1:0.5982
2022-05-07 19:22:46,407 INFO: val Loss:0.314 | Acc:0.9229 | F1:0.5676
2022-05-07 19:22:48,270 INFO: -----------------SAVE:23epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:57<00:00,  1.83it/s]
2022-05-07 19:24:45,369 INFO: Epoch:[024/150]
2022-05-07 19:24:45,369 INFO: Train Loss:0.298 | Acc:0.9199 | F1:0

2022-05-07 20:11:14,786 INFO: -----------------SAVE:46epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:57<00:00,  1.83it/s]
2022-05-07 20:13:12,015 INFO: Epoch:[047/150]
2022-05-07 20:13:12,015 INFO: Train Loss:0.129 | Acc:0.9626 | F1:0.8555
2022-05-07 20:13:20,766 INFO: val Loss:0.207 | Acc:0.9568 | F1:0.7620
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.83it/s]
2022-05-07 20:15:17,630 INFO: Epoch:[048/150]
2022-05-07 20:15:17,631 INFO: Train Loss:0.136 | Acc:0.9632 | F1:0.8662
2022-05-07 20:15:26,393 INFO: val Loss:0.254 | Acc:0.9428 | F1:0.6863
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.83it/s]
2022-05-07 20:17:23,344 INFO: Epoch:[049/150]
2022-05-07 20:17:23,345 INFO: Train Loss:0.150 | Acc:0.9643 | F1:0.8542
2022-05-07 20:17:32,097 INFO: val Loss:0.224 | Acc:0.9498 | F1:0.6978


2022-05-07 21:05:50,612 INFO: Total Process time:152.047Minute
2022-05-07 21:05:50,613 INFO: {'exp_num': '2', 'data_path': './data', 'Kfold': 5, 'model_path': 'results/', 'encoder_name': 'regnety_160', 'drop_path_rate': 0.2, 'img_size': 224, 'batch_size': 16, 'epochs': 150, 'optimizer': 'Lamb', 'initial_lr': 5e-06, 'weight_decay': 0.001, 'aug_ver': 2, 'scheduler': 'cycle', 'warm_epoch': 5, 'max_lr': 0.001, 'min_lr': 5e-06, 'tmax': 145, 'patience': 20, 'clipping': None, 'amp': True, 'multi_gpu': False, 'logging': False, 'num_workers': 0, 'seed': 42, 'fold': 2}


<---- Training Params ---->
Read train_df.csv
Dataset size:3422
Dataset size:855


2022-05-07 21:05:51,448 INFO: Loading pretrained weights from url (https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pth)
2022-05-07 21:05:51,817 INFO: Computational complexity:       15.93 GMac
2022-05-07 21:05:51,818 INFO: Number of parameters:           80.83 M 
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:59<00:00,  1.79it/s]
2022-05-07 21:07:51,364 INFO: Epoch:[001/150]
2022-05-07 21:07:51,365 INFO: Train Loss:4.482 | Acc:0.0091 | F1:0.0043
2022-05-07 21:08:01,889 INFO: val Loss:4.496 | Acc:0.0012 | F1:0.0002
2022-05-07 21:08:03,687 INFO: -----------------SAVE:1epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:00<00:00,  1.78it/s]
2022-05-07 21:10:03,880 INFO: Epoch:[002/150]
2022-05-07 21:10:03,880 INFO: Train Loss:4.470 | Acc:0.0091 | F1:0.0041
2022-05-07 21:10:13,156 INFO: val Loss:4.485 | Acc:0.0023 | F1:0.0007
2022-05-07 21:10:14,956 INFO: ------

2022-05-07 21:52:02,877 INFO: -----------------SAVE:21epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:00<00:00,  1.78it/s]
2022-05-07 21:54:03,040 INFO: Epoch:[022/150]
2022-05-07 21:54:03,041 INFO: Train Loss:0.326 | Acc:0.9138 | F1:0.5531
2022-05-07 21:54:12,269 INFO: val Loss:0.326 | Acc:0.9181 | F1:0.5639
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:00<00:00,  1.78it/s]
2022-05-07 21:56:12,433 INFO: Epoch:[023/150]
2022-05-07 21:56:12,434 INFO: Train Loss:0.314 | Acc:0.9141 | F1:0.5775
2022-05-07 21:56:21,692 INFO: val Loss:0.255 | Acc:0.9275 | F1:0.5768
2022-05-07 21:56:23,406 INFO: -----------------SAVE:23epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:59<00:00,  1.79it/s]
2022-05-07 21:58:23,253 INFO: Epoch:[024/150]
2022-05-07 21:58:23,254 INFO: Train Loss:0.302 | Acc:0.9220 | F1:0

100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:02<00:00,  1.75it/s]
2022-05-07 22:48:21,136 INFO: Epoch:[047/150]
2022-05-07 22:48:21,137 INFO: Train Loss:0.140 | Acc:0.9638 | F1:0.8511
2022-05-07 22:48:30,506 INFO: val Loss:0.133 | Acc:0.9696 | F1:0.8233
2022-05-07 22:48:32,514 INFO: -----------------SAVE:47epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:01<00:00,  1.76it/s]
2022-05-07 22:50:33,844 INFO: Epoch:[048/150]
2022-05-07 22:50:33,845 INFO: Train Loss:0.140 | Acc:0.9649 | F1:0.8621
2022-05-07 22:50:43,108 INFO: val Loss:0.186 | Acc:0.9567 | F1:0.7555
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:01<00:00,  1.77it/s]
2022-05-07 22:52:44,144 INFO: Epoch:[049/150]
2022-05-07 22:52:44,144 INFO: Train Loss:0.129 | Acc:0.9673 | F1:0.8713
2022-05-07 22:52:53,404 INFO: val Loss:0.135 | Acc:0.9626 | F1:0.7995


100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:10<00:00,  1.63it/s]
2022-05-07 23:46:42,528 INFO: Epoch:[073/150]
2022-05-07 23:46:42,529 INFO: Train Loss:0.075 | Acc:0.9798 | F1:0.9272
2022-05-07 23:46:52,527 INFO: val Loss:0.164 | Acc:0.9591 | F1:0.8119
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:12<00:00,  1.62it/s]
2022-05-07 23:49:04,751 INFO: Epoch:[074/150]
2022-05-07 23:49:04,752 INFO: Train Loss:0.079 | Acc:0.9807 | F1:0.9291
2022-05-07 23:49:14,798 INFO: val Loss:0.161 | Acc:0.9626 | F1:0.8135
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:14<00:00,  1.59it/s]
2022-05-07 23:51:29,381 INFO: Epoch:[075/150]
2022-05-07 23:51:29,382 INFO: Train Loss:0.087 | Acc:0.9784 | F1:0.9237
2022-05-07 23:51:39,403 INFO: val Loss:0.176 | Acc:0.9626 | F1:0.8282
100%|███████████████████████████████████████████████████████████████████████

100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:07<00:00,  1.68it/s]
2022-05-08 00:47:29,636 INFO: Epoch:[099/150]
2022-05-08 00:47:29,636 INFO: Train Loss:0.036 | Acc:0.9898 | F1:0.9629
2022-05-08 00:47:39,258 INFO: val Loss:0.177 | Acc:0.9661 | F1:0.8482
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:12<00:00,  1.61it/s]
2022-05-08 00:49:51,863 INFO: Epoch:[100/150]
2022-05-08 00:49:51,864 INFO: Train Loss:0.040 | Acc:0.9892 | F1:0.9514
2022-05-08 00:50:02,051 INFO: val Loss:0.163 | Acc:0.9614 | F1:0.8165
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:09<00:00,  1.65it/s]
2022-05-08 00:52:11,598 INFO: Epoch:[101/150]
2022-05-08 00:52:11,598 INFO: Train Loss:0.044 | Acc:0.9895 | F1:0.9658
2022-05-08 00:52:22,212 INFO: val Loss:0.169 | Acc:0.9626 | F1:0.8131
100%|███████████████████████████████████████████████████████████████████████

<---- Training Params ---->
Read train_df.csv
Dataset size:3422
Dataset size:855


2022-05-08 01:04:21,130 INFO: Loading pretrained weights from url (https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pth)
2022-05-08 01:04:21,523 INFO: Computational complexity:       15.93 GMac
2022-05-08 01:04:21,524 INFO: Number of parameters:           80.83 M 
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:01<00:00,  1.75it/s]
2022-05-08 01:06:23,516 INFO: Epoch:[001/150]
2022-05-08 01:06:23,516 INFO: Train Loss:4.488 | Acc:0.0067 | F1:0.0021
2022-05-08 01:06:32,789 INFO: val Loss:4.486 | Acc:0.0012 | F1:0.0003
2022-05-08 01:06:34,532 INFO: -----------------SAVE:1epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:03<00:00,  1.73it/s]
2022-05-08 01:08:38,126 INFO: Epoch:[002/150]
2022-05-08 01:08:38,126 INFO: Train Loss:4.467 | Acc:0.0108 | F1:0.0027
2022-05-08 01:08:47,567 INFO: val Loss:4.480 | Acc:0.0000 | F1:0.0000
2022-05-08 01:08:49,608 INFO: ------

100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:03<00:00,  1.74it/s]
2022-05-08 01:53:28,597 INFO: Epoch:[022/150]
2022-05-08 01:53:28,597 INFO: Train Loss:0.346 | Acc:0.9097 | F1:0.5377
2022-05-08 01:53:37,877 INFO: val Loss:0.306 | Acc:0.9099 | F1:0.5423
2022-05-08 01:53:39,848 INFO: -----------------SAVE:22epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:02<00:00,  1.75it/s]
2022-05-08 01:55:42,247 INFO: Epoch:[023/150]
2022-05-08 01:55:42,247 INFO: Train Loss:0.300 | Acc:0.9158 | F1:0.5900
2022-05-08 01:55:51,491 INFO: val Loss:0.282 | Acc:0.9275 | F1:0.6071
2022-05-08 01:55:53,484 INFO: -----------------SAVE:23epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:02<00:00,  1.75it/s]
2022-05-08 01:57:55,631 INFO: Epoch:[024/150]
2022-05-08 01:57:55,632 INFO: Train Loss:0.289 | Acc:0.9246 | F1:0

100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:21<00:00,  1.51it/s]
2022-05-08 02:47:10,674 INFO: Epoch:[046/150]
2022-05-08 02:47:10,674 INFO: Train Loss:0.133 | Acc:0.9641 | F1:0.8620
2022-05-08 02:47:21,915 INFO: val Loss:0.197 | Acc:0.9485 | F1:0.7109
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:18<00:00,  1.54it/s]
2022-05-08 02:49:40,822 INFO: Epoch:[047/150]
2022-05-08 02:49:40,823 INFO: Train Loss:0.127 | Acc:0.9638 | F1:0.8481
2022-05-08 02:49:51,051 INFO: val Loss:0.181 | Acc:0.9532 | F1:0.7417
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:03<00:00,  1.73it/s]
2022-05-08 02:51:54,555 INFO: Epoch:[048/150]
2022-05-08 02:51:54,556 INFO: Train Loss:0.149 | Acc:0.9594 | F1:0.8385
2022-05-08 02:52:03,926 INFO: val Loss:0.163 | Acc:0.9544 | F1:0.7770
100%|███████████████████████████████████████████████████████████████████████

100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:06<00:00,  1.69it/s]
2022-05-08 03:48:47,117 INFO: Epoch:[072/150]
2022-05-08 03:48:47,117 INFO: Train Loss:0.071 | Acc:0.9839 | F1:0.9463
2022-05-08 03:48:57,066 INFO: val Loss:0.237 | Acc:0.9462 | F1:0.8048
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:11<00:00,  1.63it/s]
2022-05-08 03:51:08,155 INFO: Epoch:[073/150]
2022-05-08 03:51:08,155 INFO: Train Loss:0.082 | Acc:0.9795 | F1:0.9311
2022-05-08 03:51:18,381 INFO: val Loss:0.161 | Acc:0.9626 | F1:0.7938
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:05<00:00,  1.70it/s]
2022-05-08 03:53:24,208 INFO: Epoch:[074/150]
2022-05-08 03:53:24,209 INFO: Train Loss:0.069 | Acc:0.9804 | F1:0.9177
2022-05-08 03:53:33,121 INFO: val Loss:0.212 | Acc:0.9591 | F1:0.8119
2022-05-08 03:53:33,122 INFO: 
Best Val Epoch:54 | Val Loss:0.1535 | Val Acc

<---- Training Params ---->
Read train_df.csv
Dataset size:3422
Dataset size:855


2022-05-08 03:53:33,963 INFO: Loading pretrained weights from url (https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pth)
2022-05-08 03:53:34,358 INFO: Computational complexity:       15.93 GMac
2022-05-08 03:53:34,358 INFO: Number of parameters:           80.83 M 
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:01<00:00,  1.77it/s]
2022-05-08 03:55:35,561 INFO: Epoch:[001/150]
2022-05-08 03:55:35,562 INFO: Train Loss:4.489 | Acc:0.0064 | F1:0.0024
2022-05-08 03:55:44,465 INFO: val Loss:4.496 | Acc:0.0023 | F1:0.0014
2022-05-08 03:55:46,968 INFO: -----------------SAVE:1epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [02:02<00:00,  1.75it/s]
2022-05-08 03:57:49,120 INFO: Epoch:[002/150]
2022-05-08 03:57:49,121 INFO: Train Loss:4.469 | Acc:0.0172 | F1:0.0044
2022-05-08 03:57:58,135 INFO: val Loss:4.483 | Acc:0.0023 | F1:0.0027
2022-05-08 03:58:00,020 INFO: ------

100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.84it/s]
2022-05-08 04:41:43,973 INFO: Epoch:[022/150]
2022-05-08 04:41:43,973 INFO: Train Loss:0.319 | Acc:0.9176 | F1:0.5777
2022-05-08 04:41:52,716 INFO: val Loss:0.276 | Acc:0.9216 | F1:0.5803
2022-05-08 04:41:54,578 INFO: -----------------SAVE:22epoch----------------
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.83it/s]
2022-05-08 04:43:51,213 INFO: Epoch:[023/150]
2022-05-08 04:43:51,214 INFO: Train Loss:0.306 | Acc:0.9217 | F1:0.6069
2022-05-08 04:44:00,032 INFO: val Loss:0.312 | Acc:0.9216 | F1:0.5967
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.84it/s]
2022-05-08 04:45:56,619 INFO: Epoch:[024/150]
2022-05-08 04:45:56,620 INFO: Train Loss:0.269 | Acc:0.9255 | F1:0.6174
2022-05-08 04:46:05,444 INFO: val Loss:0.298 | Acc:0.9216 | F1:0.5600


100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.84it/s]
2022-05-08 05:34:10,412 INFO: Epoch:[047/150]
2022-05-08 05:34:10,413 INFO: Train Loss:0.125 | Acc:0.9690 | F1:0.8793
2022-05-08 05:34:19,171 INFO: val Loss:0.254 | Acc:0.9462 | F1:0.7142
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.84it/s]
2022-05-08 05:36:15,636 INFO: Epoch:[048/150]
2022-05-08 05:36:15,636 INFO: Train Loss:0.133 | Acc:0.9641 | F1:0.8524
2022-05-08 05:36:24,406 INFO: val Loss:0.219 | Acc:0.9509 | F1:0.7064
100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [01:56<00:00,  1.83it/s]
2022-05-08 05:38:21,147 INFO: Epoch:[049/150]
2022-05-08 05:38:21,147 INFO: Train Loss:0.142 | Acc:0.9646 | F1:0.8612
2022-05-08 05:38:29,952 INFO: val Loss:0.183 | Acc:0.9626 | F1:0.7868
2022-05-08 05:38:31,932 INFO: -----------------SAVE:49epoch----------------


100%|██████████████████████████████████████████████████████████████████████████████████| 34/34 [00:21<00:00,  1.57it/s]
2022-05-08 06:22:28,450 INFO: Loading pretrained weights from url (https://dl.fbaipublicfiles.com/deit/regnety_160-a5fe301d.pth)
100%|██████████████████████████████████████████████████████████████████████████████████| 34/34 [00:21<00:00,  1.59it/s]


In [12]:
# For submission
sub.iloc[:, 1] = ensemble.argmax(axis=1)
labels = ['bottle-broken_large', 'bottle-broken_small', 'bottle-contamination', 'bottle-good', 'cable-bent_wire', 'cable-cable_swap', 'cable-combined', 'cable-cut_inner_insulation', 'cable-cut_outer_insulation', 'cable-good', 'cable-missing_cable', 'cable-missing_wire', 'cable-poke_insulation', 'capsule-crack', 'capsule-faulty_imprint', 'capsule-good', 'capsule-poke', 'capsule-scratch', 'capsule-squeeze', 'carpet-color', 'carpet-cut', 'carpet-good', 'carpet-hole', 'carpet-metal_contamination', 'carpet-thread', 'grid-bent', 'grid-broken', 'grid-glue', 'grid-good', 'grid-metal_contamination', 'grid-thread', 'hazelnut-crack', 'hazelnut-cut', 'hazelnut-good', 'hazelnut-hole', 'hazelnut-print', 'leather-color', 'leather-cut', 'leather-fold', 'leather-glue', 'leather-good', 'leather-poke', 'metal_nut-bent', 'metal_nut-color', 'metal_nut-flip', 'metal_nut-good', 'metal_nut-scratch', 'pill-color', 'pill-combined', 'pill-contamination', 'pill-crack', 'pill-faulty_imprint', 'pill-good', 'pill-pill_type', 'pill-scratch', 'screw-good', 'screw-manipulated_front', 'screw-scratch_head', 'screw-scratch_neck', 'screw-thread_side', 'screw-thread_top', 'tile-crack', 'tile-glue_strip', 'tile-good', 'tile-gray_stroke', 'tile-oil', 'tile-rough', 'toothbrush-defective', 'toothbrush-good', 'transistor-bent_lead', 'transistor-cut_lead', 'transistor-damaged_case', 'transistor-good', 'transistor-misplaced', 'wood-color', 'wood-combined', 'wood-good', 'wood-hole', 'wood-liquid', 'wood-scratch', 'zipper-broken_teeth', 'zipper-combined', 'zipper-fabric_border', 'zipper-fabric_interior', 'zipper-good', 'zipper-rough', 'zipper-split_teeth', 'zipper-squeezed_teeth']
original_labels = dict(zip(range(len(labels)),labels))
sub['label'] = sub['label'].replace(original_labels)
sub

Unnamed: 0,index,label
0,0,tile-glue_strip
1,1,grid-good
2,2,transistor-good
3,3,tile-gray_stroke
4,4,tile-good
...,...,...
2149,2149,tile-gray_stroke
2150,2150,screw-good
2151,2151,grid-good
2152,2152,cable-good


In [13]:
sub.to_csv('./data/0507_2_esb_submission.csv', index=False)

In [14]:
# 정상 샘플 개수
good_cnt = 0
for i in range(len(sub)):
    if sub['label'][i][-4:] == 'good':
        good_cnt += 1
print(good_cnt)

1116


In [15]:
# 학습에 사용한 모델의 batch_size, epoch, img_size 기록
print('batch_size =', args.batch_size)
print('epochs =', args.epochs)
print('img_size =', args.img_size)

batch_size = 16
epochs = 150
img_size = 224


In [16]:
print('model =', args.encoder_name)

model = regnety_160
