In [1]:
!nvidia-smi

Sun Aug  1 02:32:40 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    26W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Directory 설정, 구글 드라이브 import

In [2]:
cur_dir = '/content/drive/MyDrive/KLUE_TC'

## Utils

In [3]:
!pip install adamp
!pip install git+https://github.com/GY-Jeong/transformers

Collecting git+https://github.com/GY-Jeong/transformers
  Cloning https://github.com/GY-Jeong/transformers to /tmp/pip-req-build-lv5r9mrz
  Running command git clone -q https://github.com/GY-Jeong/transformers /tmp/pip-req-build-lv5r9mrz
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone


In [4]:
import os
import random
import torch
import numpy as np
from torch import nn

from torch.optim import Adam, AdamW, SGD
from adamp import AdamP
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR, ExponentialLR, CosineAnnealingWarmRestarts

from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
from transformers import get_linear_schedule_with_warmup


def set_seeds(seed=42):
    # 랜덤 시드를 설정하여 매 코드를 실행할 때마다 동일한 결과를 얻게 합니다.
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.benchmark = False


def save_checkpoint(state, model_dir, model_filename):
    print('saving model ...')
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    torch.save(state, os.path.join(model_dir, model_filename))
    # torch.save(state, os.path.join(model_filename))


def get_optimizer(model, args):
    if args.optimizer == 'adam':
        optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adamW':
        optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adamP':
        optimizer = AdamP(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'SGD':
        optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    # 모든 parameter들의 grad값을 0으로 초기화
    optimizer.zero_grad()

    return optimizer


def get_scheduler(optimizer, args):
    if args.scheduler == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer, patience=args.plateau_patience, factor=args.plateau_factor, mode='max',
                                      verbose=True)
    elif args.scheduler == 'linear_warmup':
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps,
                                                    num_training_steps=args.total_steps)
    elif args.scheduler == 'step_lr':
        scheduler = StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)
    elif args.scheduler == 'exp_lr':
        scheduler = ExponentialLR(optimizer, gamma=args.gamma)
    elif args.scheduler == 'cosine_annealing':
        scheduler = CosineAnnealingLR(optimizer, T_max=args.t_max, eta_min=args.eta_min)
    elif args.scheduler == 'cosine_annealing_warmstart':
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=args.T_0, T_mult=args.T_mult, eta_min=args.eta_min,
                                                last_epoch=-1)

    return scheduler


def update_params(loss, model, optimizer, batch_idx, max_len, args):
    if args.gradient_accumulation:
        # normalize loss to account for batch accumulation
        loss = loss / args.accum_iter 

        # backward pass
        loss.backward()

        # weights update
        if ((batch_idx + 1) % args.accum_iter == 0) or (batch_idx + 1 == max_len):
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
            optimizer.step()
            optimizer.zero_grad()
    else:
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
        optimizer.step()
        optimizer.zero_grad()


def load_tokenizer(args):
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name
        else args.model_name_or_path,
        use_fast=True,
    )

    return tokenizer


def load_model(args, model_name=None):
    if not model_name:
        model_name = args.model_name
    model_path = os.path.join(args.model_dir, model_name)
    print("Loading Model from:", model_path)
    load_state = torch.load(model_path)
    # load_state = torch.load(model_name)

    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        args.config_name
        if args.config_name
        else args.model_name_or_path,
    )

    config.num_labels = 7

    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    ).to(args.device)

    # model.classifier = nn.Sequential(
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 1024),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 512),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(512, 7),
    # )

    model.load_state_dict(load_state['state_dict'], strict=True)

    # model = model.to(args.device)

    print("Loading Model from:", model_path, "...Finished.")

    return model


def get_model(args):
    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        args.config_name
        if args.config_name
        else args.model_name_or_path,
    )

    config.num_labels = 7
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    ).to(args.device)

    # model.classifier = nn.Sequential(
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 1024),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 512),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(512, 7),
    # )

    # print(model)
    #model.classifier.dropout = nn.Dropout(p=0.3, inplace = False)

    model = model.to(args.device)

    return model


def get_loaders(args, train, valid, is_inference=False):
    pin_memory = True
    train_loader, valid_loader = None, None

    if is_inference:
        test_dataset = YNAT_dataset(args, valid, is_inference)
        test_loader = torch.utils.data.DataLoader(test_dataset, num_workers=args.num_workers, shuffle=False,
                                                  batch_size=args.batch_size, pin_memory=pin_memory)
        return test_loader

    if train is not None:
        train_dataset = YNAT_dataset(args, train, is_inference)
        train_loader = torch.utils.data.DataLoader(train_dataset, num_workers=args.num_workers, shuffle=True,
                                                   batch_size=args.batch_size, pin_memory=pin_memory)
    if valid is not None:
        valid_dataset = YNAT_dataset(args, valid, is_inference)
        valid_loader = torch.utils.data.DataLoader(valid_dataset, num_workers=args.num_workers, shuffle=False,
                                                   batch_size=args.batch_size, pin_memory=pin_memory)

    return train_loader, valid_loader


# loss계산하고 parameter update!
def compute_loss(preds, targets, args):
    """
    Args :
        preds   : (batch_size, max_seq_len)
        targets : (batch_size, max_seq_len)
    """
    # print(preds, targets)
    loss = get_criterion(preds, targets, args)
    # 마지막 시퀀스에 대한 값만 loss 계산
    # loss = loss[:, -1]
    # loss = torch.mean(loss)
    return loss


def get_criterion(pred, target, args):
    if args.criterion == 'BCE':
        loss = nn.BCELoss(reduction="none")
    elif args.criterion == "BCELogit":
        loss = nn.BCEWithLogitsLoss(reduction="none")
    elif args.criterion == "MSE":
        loss = nn.MSELoss(reduction="none")
    elif args.criterion == "L1":
        loss = nn.L1Loss(reduction="none")
    elif args.criterion == "CE":
        #weights = [1,1,2,1,1,1,1] #as class distribution
        #class_weights = torch.FloatTensor(weights).cuda()
        #loss = nn.CrossEntropyLoss(weight=class_weights)
        loss = nn.CrossEntropyLoss()
    # NLL, CrossEntropy not available
    return loss(pred, target)


def make_vocab(args):
    print("============ READ VOCABS ============")
    vocabs = []
    for i in range(7):
        vocab = set()
        f = open(args.vocab_dir + str(i) + '.txt', 'r')
        while True:
            line = f.readline()
            if not line: break
            vocab.add(line[:-1])
        f.close()
        vocabs.append(vocab)
        print(f"category {i} reading end, size : {len(vocab)}")
    return vocabs

## Dataloader

In [5]:
import os
import torch
import pandas as pd


class Preprocess:
    def __init__(self, args):
        self.args = args
        self.train_data = None
        self.test_data = None

    def load_data(self, file_name):
        csv_file_name = os.path.join(self.args.data_dir, file_name)
        df = pd.read_csv(csv_file_name)
        #del df['Unnamed: 0']
        return df.values

    def load_train_data(self):
        self.train_data = self.load_data('train_data.csv')

    def load_test_data(self):
        self.test_data = self.load_data('test_data.csv')


class YNAT_dataset(torch.utils.data.Dataset):
    def __init__(self, args, data, is_inference):
        self.args = args
        self.data = data
        self.is_inference = is_inference

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data[index]
        element = [row[i] for i in range(len(row))]
        #print(type(row))
        # np.array -> torch.tensor 형변환
        #for i, col in enumerate(row):
        #    if type(col) == str:
        #        pass
        #    else:
        #        row[i] = torch.tensor(col)

        return element



## Trainer

In [6]:
from sklearn.metrics import accuracy_score
from torch.nn.functional import one_hot
from tqdm import tqdm
from sklearn import metrics


def run(args, tokenizer, train_data, valid_data, cv_count):
    train_loader, valid_loader = get_loaders(args, train_data, valid_data)

    # only when using warmup scheduler
    # args.total_steps = int(len(train_loader.dataset) / args.batch_size) * args.n_epochs
    # args.warmup_steps = int(args.total_steps * args.warmup_ratio)

    model = get_model(args)
    optimizer = get_optimizer(model, args)
    scheduler = get_scheduler(optimizer, args)

    best_acc = -1
    early_stopping_counter = 0
    for epoch in range(args.n_epochs):

        print(f"Start Training: Epoch {epoch + 1}")

        if not args.cv_strategy:
            model_name = args.run_name
        else:
            model_name = f"{args.run_name.split('.pt')[0]}_{cv_count}.pt"

        # TRAIN
        train_acc, train_loss = train(args, model, tokenizer, train_loader, optimizer)

        # VALID
        acc, val_loss = validate(args, model, tokenizer, valid_loader)

        # TODO: model save or early stopping
        if args.scheduler == 'plateau':
            last_lr = optimizer.param_groups[0]['lr']
        else:
            last_lr = scheduler.get_last_lr()[0]

        print({"epoch": epoch, "train_loss": train_loss, "train_acc": train_acc,
                   "valid_acc": acc, "val_loss": val_loss, "learning_rate": last_lr})

        if acc > best_acc:
            best_acc = acc
            # torch.nn.DataParallel로 감싸진 경우 원래의 model을 가져옵니다.
            model_to_save = model.module if hasattr(model, 'module') else model
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model_to_save.state_dict(),
            },
                args.model_dir, model_name,
            )
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= args.patience:
                print(f'EarlyStopping counter: {early_stopping_counter} out of {args.patience}')
                break

        # scheduler
        if args.scheduler == 'plateau':
            scheduler.step(best_acc)
        else:
            scheduler.step()

    return best_acc


def inference(args, test_data):
    # ckpt_file_names = []
    all_fold_preds = []
    all_fold_argmax_preds = []

    if not args.cv_strategy:
        ckpt_file_names = [args.model_name]
    else:
        ckpt_file_names = [f"{args.model_name.split('.pt')[0]}_{i + 1}.pt" for i in range(args.fold_num)]

    tokenizer = load_tokenizer(args)

    for fold_idx, ckpt in enumerate(ckpt_file_names):
        model = load_model(args, ckpt)
        model.eval()
        test_loader = get_loaders(args, None, test_data, True)

        total_preds = []
        total_argmax_preds = []
        total_ids = []

        for step, batch in tqdm(enumerate(test_loader), desc='Inferencing', total=len(test_loader)):
            idx, text = batch
            tokenized_examples = tokenizer(
                text,
                max_length=args.max_seq_len,
                padding="max_length",
                return_tensors="pt"
            ).to(args.device)

            token_label_0_type_ids = []
            token_label_1_type_ids = []
            token_label_2_type_ids = []
            token_label_3_type_ids = []
            token_label_4_type_ids = []
            token_label_5_type_ids = []
            token_label_6_type_ids = []

            for row in tokenized_examples['input_ids']:
                row = tokenizer.convert_ids_to_tokens(row)
                # print(row)
                label_0 = [1 if token in args.vocab[0] else 0 for token in row]
                label_1 = [1 if token in args.vocab[1] else 0 for token in row]
                label_2 = [1 if token in args.vocab[2] else 0 for token in row]
                label_3 = [1 if token in args.vocab[3] else 0 for token in row]
                label_4 = [1 if token in args.vocab[4] else 0 for token in row]
                label_5 = [1 if token in args.vocab[5] else 0 for token in row]
                label_6 = [1 if token in args.vocab[6] else 0 for token in row]
                # print(label_2)
                token_label_0_type_ids.append(label_0)
                token_label_1_type_ids.append(label_1)
                token_label_2_type_ids.append(label_2)
                token_label_3_type_ids.append(label_3)
                token_label_4_type_ids.append(label_4)
                token_label_5_type_ids.append(label_5)
                token_label_6_type_ids.append(label_6)
                # temp = []
                # row = tokenizer.convert_ids_to_tokens(row)
                # for token in row:
                #     for i, element in enumerate(args.vocab):
                #         if token in element:
                #             temp.append(i+1)
                #             break
                #     else:
                #         temp.append(0)
                # #print(temp)
                # token_label_type_ids.append(temp)

            # token_label_type_ids = torch.tensor(token_label_type_ids).to(args.device)
            # tokenized_examples['token_label_type_ids'] = token_label_type_ids

            token_label_0_type_ids = torch.tensor(token_label_0_type_ids).to(args.device)
            token_label_1_type_ids = torch.tensor(token_label_1_type_ids).to(args.device)
            token_label_2_type_ids = torch.tensor(token_label_2_type_ids).to(args.device)
            token_label_3_type_ids = torch.tensor(token_label_3_type_ids).to(args.device)
            token_label_4_type_ids = torch.tensor(token_label_4_type_ids).to(args.device)
            token_label_5_type_ids = torch.tensor(token_label_5_type_ids).to(args.device)
            token_label_6_type_ids = torch.tensor(token_label_6_type_ids).to(args.device)

            tokenized_examples['token_label_0_type_ids'] = token_label_0_type_ids
            tokenized_examples['token_label_1_type_ids'] = token_label_1_type_ids
            tokenized_examples['token_label_2_type_ids'] = token_label_2_type_ids
            tokenized_examples['token_label_3_type_ids'] = token_label_3_type_ids
            tokenized_examples['token_label_4_type_ids'] = token_label_4_type_ids
            tokenized_examples['token_label_5_type_ids'] = token_label_5_type_ids
            tokenized_examples['token_label_6_type_ids'] = token_label_6_type_ids

            # token_label_type_ids = []
            # for row in tokenized_examples['input_ids']:
            #     temp = []
            #     row = tokenizer.convert_ids_to_tokens(row)
            #     for token in row:
            #         for i, element in enumerate(args.vocab):
            #             if token in element:
            #                 temp.append(i+1)
            #                 break
            #         else:
            #             temp.append(0)
            #     #print(temp)
            #     token_label_type_ids.append(temp)

            # token_label_type_ids = torch.tensor(token_label_type_ids).to(args.device)
            # tokenized_examples['token_label_type_ids'] = token_label_type_ids

            preds = model(**tokenized_examples)
            logits = preds['logits']
            #logits = logits[:,0,:]
            argmax_logits = torch.argmax(logits, dim=1)

            if args.device == 'cuda':
                logits = logits.to('cpu').detach().numpy()
                argmax_logits = argmax_logits.to('cpu').detach().numpy()
                # token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()
                token_label_0_type_ids = token_label_0_type_ids.to('cpu').detach().numpy()
                token_label_1_type_ids = token_label_1_type_ids.to('cpu').detach().numpy()
                token_label_2_type_ids = token_label_2_type_ids.to('cpu').detach().numpy()
                token_label_3_type_ids = token_label_3_type_ids.to('cpu').detach().numpy()
                token_label_4_type_ids = token_label_4_type_ids.to('cpu').detach().numpy()
                token_label_5_type_ids = token_label_5_type_ids.to('cpu').detach().numpy()
                token_label_6_type_ids = token_label_6_type_ids.to('cpu').detach().numpy()
            else:  # cpu
                logits = logits.detach().numpy()
                argmax_logits = argmax_logits.detach().numpy()
                # token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()
                token_label_0_type_ids = token_label_0_type_ids.detach().numpy()
                token_label_1_type_ids = token_label_1_type_ids.detach().numpy()
                token_label_2_type_ids = token_label_2_type_ids.detach().numpy()
                token_label_3_type_ids = token_label_3_type_ids.detach().numpy()
                token_label_4_type_ids = token_label_4_type_ids.detach().numpy()
                token_label_5_type_ids = token_label_5_type_ids.detach().numpy()
                token_label_6_type_ids = token_label_6_type_ids.detach().numpy()

            total_preds += list(logits)
            total_argmax_preds += list(argmax_logits)
            total_ids += list(idx)

        all_fold_preds.append(total_preds)
        all_fold_argmax_preds.append(total_argmax_preds)

        output_file_name = "output.csv" if not args.cv_strategy else f"output_{fold_idx + 1}.csv"
        write_path = os.path.join(args.output_dir, output_file_name)
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(write_path, 'w', encoding='utf8') as w:
            print("writing prediction : {}".format(write_path))
            w.write("index,topic_idx\n")
            for index, p in zip(total_ids, total_argmax_preds):
                w.write('{},{}\n'.format(index, p))

    if len(all_fold_preds) > 1:
        # Soft voting ensemble
        votes = np.sum(all_fold_preds, axis=0)
        votes = np.argmax(votes, axis=1)

        write_path = os.path.join(args.output_dir, "output_softvote.csv")
        #write_path = "output_softvote.csv"
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(write_path, 'w', encoding='utf8') as w:
            print("writing prediction : {}".format(write_path))
            w.write("index,topic_idx\n")
            for id, p in zip(total_ids, votes):
                w.write('{},{}\n'.format(id, p))


def train(args, model, tokenizer, train_loader, optimizer):
    model.train()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in tqdm(enumerate(train_loader), desc='Training', total=len(train_loader)):
        idx, text, label = batch
        label = label.to(args.device)
        # print(idx[:10])
        # print(text[:10])
        # print(label[:10])
        tokenized_examples = tokenizer(
            text,
            max_length=args.max_seq_len,
            padding="max_length",
            return_tensors="pt"
        ).to(args.device)
        
        # tokenize
        # 모델의 입력으로
        # label은 one-hot?
        # loss 주고
        # argmax를 golden

        token_label_0_type_ids = []
        token_label_1_type_ids = []
        token_label_2_type_ids = []
        token_label_3_type_ids = []
        token_label_4_type_ids = []
        token_label_5_type_ids = []
        token_label_6_type_ids = []

        for row in tokenized_examples['input_ids']:
            row = tokenizer.convert_ids_to_tokens(row)
            # print(row)
            label_0 = [1 if token in args.vocab[0] else 0 for token in row]
            label_1 = [1 if token in args.vocab[1] else 0 for token in row]
            label_2 = [1 if token in args.vocab[2] else 0 for token in row]
            label_3 = [1 if token in args.vocab[3] else 0 for token in row]
            label_4 = [1 if token in args.vocab[4] else 0 for token in row]
            label_5 = [1 if token in args.vocab[5] else 0 for token in row]
            label_6 = [1 if token in args.vocab[6] else 0 for token in row]
            # print(label_2)
            token_label_0_type_ids.append(label_0)
            token_label_1_type_ids.append(label_1)
            token_label_2_type_ids.append(label_2)
            token_label_3_type_ids.append(label_3)
            token_label_4_type_ids.append(label_4)
            token_label_5_type_ids.append(label_5)
            token_label_6_type_ids.append(label_6)
            # temp = []
            # row = tokenizer.convert_ids_to_tokens(row)
            # for token in row:
            #     for i, element in enumerate(args.vocab):
            #         if token in element:
            #             temp.append(i+1)
            #             break
            #     else:
            #         temp.append(0)
            # #print(temp)
            # token_label_type_ids.append(temp)

        # token_label_type_ids = torch.tensor(token_label_type_ids).to(args.device)
        # tokenized_examples['token_label_type_ids'] = token_label_type_ids

        token_label_0_type_ids = torch.tensor(token_label_0_type_ids).to(args.device)
        token_label_1_type_ids = torch.tensor(token_label_1_type_ids).to(args.device)
        token_label_2_type_ids = torch.tensor(token_label_2_type_ids).to(args.device)
        token_label_3_type_ids = torch.tensor(token_label_3_type_ids).to(args.device)
        token_label_4_type_ids = torch.tensor(token_label_4_type_ids).to(args.device)
        token_label_5_type_ids = torch.tensor(token_label_5_type_ids).to(args.device)
        token_label_6_type_ids = torch.tensor(token_label_6_type_ids).to(args.device)

        tokenized_examples['token_label_0_type_ids'] = token_label_0_type_ids
        tokenized_examples['token_label_1_type_ids'] = token_label_1_type_ids
        tokenized_examples['token_label_2_type_ids'] = token_label_2_type_ids
        tokenized_examples['token_label_3_type_ids'] = token_label_3_type_ids
        tokenized_examples['token_label_4_type_ids'] = token_label_4_type_ids
        tokenized_examples['token_label_5_type_ids'] = token_label_5_type_ids
        tokenized_examples['token_label_6_type_ids'] = token_label_6_type_ids

        preds = model(**tokenized_examples, labels = label)
        # print(preds)
        logits = preds['logits']
        # logits = logits[:,0,:]
        softmax_logits = nn.Softmax(dim=1)(logits)
        argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        # loss = compute_loss(logits,
        #                     label, args)
        loss = preds['loss']
        # print(loss)

        update_params(loss, model, optimizer, step, len(train_loader), args)

        if step % args.log_steps == 0:
            print(f"Training steps: {step} Loss: {str(loss.item())}")

        if args.device == 'cuda':
            argmax_logits = argmax_logits.to('cpu').detach().numpy()
            label = label.to('cpu').detach().numpy()
            loss = loss.to('cpu').detach().numpy()
            # token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()
            token_label_0_type_ids = token_label_0_type_ids.to('cpu').detach().numpy()
            token_label_1_type_ids = token_label_1_type_ids.to('cpu').detach().numpy()
            token_label_2_type_ids = token_label_2_type_ids.to('cpu').detach().numpy()
            token_label_3_type_ids = token_label_3_type_ids.to('cpu').detach().numpy()
            token_label_4_type_ids = token_label_4_type_ids.to('cpu').detach().numpy()
            token_label_5_type_ids = token_label_5_type_ids.to('cpu').detach().numpy()
            token_label_6_type_ids = token_label_6_type_ids.to('cpu').detach().numpy()
        else:  # cpu
            argmax_logits = argmax_logits.detach().numpy()
            label = label.detach().numpy()
            loss = loss.detach().numpy()
            # token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()
            token_label_0_type_ids = token_label_0_type_ids.detach().numpy()
            token_label_1_type_ids = token_label_1_type_ids.detach().numpy()
            token_label_2_type_ids = token_label_2_type_ids.detach().numpy()
            token_label_3_type_ids = token_label_3_type_ids.detach().numpy()
            token_label_4_type_ids = token_label_4_type_ids.detach().numpy()
            token_label_5_type_ids = token_label_5_type_ids.detach().numpy()
            token_label_6_type_ids = token_label_6_type_ids.detach().numpy()

        total_preds.append(argmax_logits)
        total_targets.append(label)
        losses.append(loss)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    acc = accuracy_score(total_targets, total_preds)
    loss_avg = sum(losses) / len(losses)
    print(f'TRAIN ACC : {acc}, TRAIN LOSS : {loss_avg}')
    return acc, loss_avg


def validate(args, model, tokenizer, valid_loader):
    model.eval()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in tqdm(enumerate(valid_loader), desc='Training', total=len(valid_loader)):
        idx, text, label = batch
        label = label.to(args.device)
        tokenized_examples = tokenizer(
            text,
            max_length=args.max_seq_len,
            padding="max_length",
            return_tensors="pt"
        ).to(args.device)

        # tokenize
        # 모델의 입력으로
        # label은 one-hot?
        # loss 주고
        # argmax를 golden

        token_label_0_type_ids = []
        token_label_1_type_ids = []
        token_label_2_type_ids = []
        token_label_3_type_ids = []
        token_label_4_type_ids = []
        token_label_5_type_ids = []
        token_label_6_type_ids = []

        for row in tokenized_examples['input_ids']:
            row = tokenizer.convert_ids_to_tokens(row)
            # print(row)
            label_0 = [1 if token in args.vocab[0] else 0 for token in row]
            label_1 = [1 if token in args.vocab[1] else 0 for token in row]
            label_2 = [1 if token in args.vocab[2] else 0 for token in row]
            label_3 = [1 if token in args.vocab[3] else 0 for token in row]
            label_4 = [1 if token in args.vocab[4] else 0 for token in row]
            label_5 = [1 if token in args.vocab[5] else 0 for token in row]
            label_6 = [1 if token in args.vocab[6] else 0 for token in row]
            # print(label_2)
            token_label_0_type_ids.append(label_0)
            token_label_1_type_ids.append(label_1)
            token_label_2_type_ids.append(label_2)
            token_label_3_type_ids.append(label_3)
            token_label_4_type_ids.append(label_4)
            token_label_5_type_ids.append(label_5)
            token_label_6_type_ids.append(label_6)
            # temp = []
            # row = tokenizer.convert_ids_to_tokens(row)
            # for token in row:
            #     for i, element in enumerate(args.vocab):
            #         if token in element:
            #             temp.append(i+1)
            #             break
            #     else:
            #         temp.append(0)
            # #print(temp)
            # token_label_type_ids.append(temp)

        # token_label_type_ids = torch.tensor(token_label_type_ids).to(args.device)
        # tokenized_examples['token_label_type_ids'] = token_label_type_ids

        token_label_0_type_ids = torch.tensor(token_label_0_type_ids).to(args.device)
        token_label_1_type_ids = torch.tensor(token_label_1_type_ids).to(args.device)
        token_label_2_type_ids = torch.tensor(token_label_2_type_ids).to(args.device)
        token_label_3_type_ids = torch.tensor(token_label_3_type_ids).to(args.device)
        token_label_4_type_ids = torch.tensor(token_label_4_type_ids).to(args.device)
        token_label_5_type_ids = torch.tensor(token_label_5_type_ids).to(args.device)
        token_label_6_type_ids = torch.tensor(token_label_6_type_ids).to(args.device)

        tokenized_examples['token_label_0_type_ids'] = token_label_0_type_ids
        tokenized_examples['token_label_1_type_ids'] = token_label_1_type_ids
        tokenized_examples['token_label_2_type_ids'] = token_label_2_type_ids
        tokenized_examples['token_label_3_type_ids'] = token_label_3_type_ids
        tokenized_examples['token_label_4_type_ids'] = token_label_4_type_ids
        tokenized_examples['token_label_5_type_ids'] = token_label_5_type_ids
        tokenized_examples['token_label_6_type_ids'] = token_label_6_type_ids

        # token_label_type_ids = []
        # for row in tokenized_examples['input_ids']:
        #     temp = []
        #     row = tokenizer.convert_ids_to_tokens(row)
        #     for token in row:
        #         for i, element in enumerate(args.vocab):
        #             if token in element:
        #                 temp.append(i+1)
        #                 break
        #         else:
        #             temp.append(0)
        #     #print(temp)
        #     token_label_type_ids.append(temp)

        # token_label_type_ids = torch.tensor(token_label_type_ids).to(args.device)
        # tokenized_examples['token_label_type_ids'] = token_label_type_ids

        preds = model(**tokenized_examples, labels = label)
        logits = preds['logits']
        # logits = logits[:,0,:]
        softmax_logits = nn.Softmax(dim=1)(logits)
        argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        # loss = compute_loss(logits,
        #                     label, args)
        loss = preds['loss']

        if step % args.log_steps == 0:
            print(f"Validation steps: {step} Loss: {str(loss.item())}")

        # if args.device == 'cuda':
        #     argmax_logits = argmax_logits.to('cpu').detach().numpy()
        #     label = label.to('cpu').detach().numpy()
        #     loss = loss.to('cpu').detach().numpy()
        #     token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()
        # else:  # cpu
        #     argmax_logits = argmax_logits.detach().numpy()
        #     label = label.detach().numpy()
        #     loss = loss.detach().numpy()
        #     token_label_type_ids = token_label_type_ids.detach().numpy()

        if args.device == 'cuda':
            argmax_logits = argmax_logits.to('cpu').detach().numpy()
            label = label.to('cpu').detach().numpy()
            loss = loss.to('cpu').detach().numpy()
            # token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()
            token_label_0_type_ids = token_label_0_type_ids.to('cpu').detach().numpy()
            token_label_1_type_ids = token_label_1_type_ids.to('cpu').detach().numpy()
            token_label_2_type_ids = token_label_2_type_ids.to('cpu').detach().numpy()
            token_label_3_type_ids = token_label_3_type_ids.to('cpu').detach().numpy()
            token_label_4_type_ids = token_label_4_type_ids.to('cpu').detach().numpy()
            token_label_5_type_ids = token_label_5_type_ids.to('cpu').detach().numpy()
            token_label_6_type_ids = token_label_6_type_ids.to('cpu').detach().numpy()
        else:  # cpu
            argmax_logits = argmax_logits.detach().numpy()
            label = label.detach().numpy()
            loss = loss.detach().numpy()
            # token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()
            token_label_0_type_ids = token_label_0_type_ids.detach().numpy()
            token_label_1_type_ids = token_label_1_type_ids.detach().numpy()
            token_label_2_type_ids = token_label_2_type_ids.detach().numpy()
            token_label_3_type_ids = token_label_3_type_ids.detach().numpy()
            token_label_4_type_ids = token_label_4_type_ids.detach().numpy()
            token_label_5_type_ids = token_label_5_type_ids.detach().numpy()
            token_label_6_type_ids = token_label_6_type_ids.detach().numpy()
        total_preds.append(argmax_logits)
        total_targets.append(label)
        losses.append(loss)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    target_names = ['IT과학', '경제', '사회', '생활문화', '세계', '스포츠', '정치']
    print(metrics.classification_report(total_targets, total_preds, target_names=target_names))
    matrix = metrics.confusion_matrix(total_targets, total_preds)
    print(matrix.diagonal()/matrix.sum(axis=1))

    acc = accuracy_score(total_targets, total_preds)
    loss_avg = sum(losses) / len(losses)
    print(f'VALID ACC : {acc}, VALID LOSS : {loss_avg}')
    return acc, loss_avg


## Train

In [7]:
import torch
from sklearn.model_selection import KFold, StratifiedKFold
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
from datetime import datetime
from pytz import timezone


def main(args):
    if not args.run_name:
        args.run_name = datetime.now(timezone("Asia/Seoul")).strftime("%Y-%m-%d-%H:%M:%S")

    set_seeds(args.seed)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    args.device = device

    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name
        else args.model_name_or_path,
        use_fast=True,
    )

    preprocess = Preprocess(args)
    preprocess.load_train_data()
    train_data_origin = preprocess.train_data

    print(f"Size of train data : {len(train_data_origin)}")
    # print(f"size of test data : {len(test_data)}")

    if args.cv_strategy == 'random':
        kf = KFold(n_splits=args.fold_num, shuffle=True)
        splits = kf.split(X=train_data_origin)
    else:
        # default
        # 여기 각 label로 바꿔야됨
        train_labels = [sequence[-1] for sequence in train_data_origin]
        skf = StratifiedKFold(n_splits=args.fold_num, shuffle=True)
        splits = skf.split(X=train_data_origin, y=train_labels)

    acc_avg = 0
    for fold_num, (train_index, valid_index) in enumerate(splits):
        train_data = train_data_origin[train_index]
        valid_data = train_data_origin[valid_index]
        best_acc = run(args, tokenizer, train_data, valid_data, fold_num + 1)

        if not args.cv_strategy:
            break

        acc_avg += best_acc

    if args.cv_strategy:
        acc_avg /= args.fold_num

        print("*" * 50, 'auc_avg', "*" * 50)
        print(acc_avg)


## Run

In [8]:
import argparse
import easydict

def parse_args():
    args = easydict.EasyDict({'run_name' : 'temp',
                             'seed':42,
                             'device' :'cuda',
                             'data_dir': cur_dir + '/data/open/',
                             'model_dir' : '/content/drive/MyDrive/KLUE_TC/models',
                             'model_name_or_path' : 'klue/roberta-large',
                             'config_name' : None,
                             'tokenizer_name' : None,
                             'output_dir' : '/content/drive/MyDrive/KLUE_TC/output/vocab20_multi_layer',
                             'vocab_dir' : '/content/drive/MyDrive/KLUE_TC/data/vocab/vocab20/',
                             
                             'accum_iter' : 8,
                             'gradient_accumulation' : True,

                             'cv_strategy' : 'stratified',
                             'fold_num' : 4,

                             'num_workers' : 1,

                             # 훈련
                             'n_epochs' : 20,
                             'batch_size' : 32,
                             'lr' : 5e-6,
                             'clip_grad' : 15,
                             'patience' : 3,
                             'max_seq_len' : 40,

                             # Optimizer
                             'optimizer' : 'adamP',

                             # Optimizer-parameters
                             'weight_decay' : 0.05,
                             'momentum' : 0.9,

                             # Scheduler
                             'scheduler' : 'step_lr',

                             # Scheduler-parameters
                             # plateau
                             'plateau_patience' : 10,
                             'plateau_factor' : 0.5,
                              
                             't_max' : 10,
                             'T_0' : 10,
                             'T_mult' : 2,
                             '--eta_min' : 0.01,

                             # linear_warmup
                             'warmup_ratio' : 0.3,

                             # Step LR
                             'step_size' : 50,
                             'gamma' : 0.1,

                             'criterion' : 'CE',

                             'log_steps' : 100})
    
    return args

In [9]:
if __name__ == '__main__':
    args = parse_args()
    args['vocab'] = make_vocab(args)
    main(args)

category 0 reading end, size : 1446
category 1 reading end, size : 1522
category 2 reading end, size : 2107
category 3 reading end, size : 1853
category 4 reading end, size : 2006
category 5 reading end, size : 1527
category 6 reading end, size : 1692


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=337.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=547.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=248477.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=173.0, style=ProgressStyle(description_…


Size of train data : 45654


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1346854671.0, style=ProgressStyle(descr…




Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'roberta.embeddings.token_label_2_

Start Training: Epoch 1


Training:   0%|          | 1/1070 [00:00<14:18,  1.25it/s]

Training steps: 0 Loss: 2.0062527656555176


Training:   9%|▉         | 101/1070 [00:54<07:14,  2.23it/s]

Training steps: 100 Loss: 1.8858485221862793


Training:  19%|█▉        | 201/1070 [01:39<06:51,  2.11it/s]

Training steps: 200 Loss: 1.4463015794754028


Training:  28%|██▊       | 301/1070 [02:25<05:37,  2.28it/s]

Training steps: 300 Loss: 1.1098557710647583


Training:  37%|███▋      | 401/1070 [03:10<05:13,  2.13it/s]

Training steps: 400 Loss: 0.982510507106781


Training:  47%|████▋     | 501/1070 [03:55<04:10,  2.27it/s]

Training steps: 500 Loss: 0.8340370655059814


Training:  56%|█████▌    | 601/1070 [04:40<03:38,  2.14it/s]

Training steps: 600 Loss: 0.6515785455703735


Training:  66%|██████▌   | 701/1070 [05:25<02:41,  2.28it/s]

Training steps: 700 Loss: 0.4163161814212799


Training:  75%|███████▍  | 801/1070 [06:10<02:04,  2.16it/s]

Training steps: 800 Loss: 0.22525319457054138


Training:  84%|████████▍ | 901/1070 [06:55<01:13,  2.31it/s]

Training steps: 900 Loss: 0.6563634872436523


Training:  94%|█████████▎| 1001/1070 [07:39<00:32,  2.15it/s]

Training steps: 1000 Loss: 0.7149391770362854


Training: 100%|██████████| 1070/1070 [08:10<00:00,  2.18it/s]

TRAIN ACC : 0.6934871495327103, TRAIN LOSS : 0.897064161857712



Training:   1%|          | 2/357 [00:00<01:02,  5.64it/s]

Validation steps: 0 Loss: 0.23588982224464417


Training:  29%|██▊       | 102/357 [00:17<00:43,  5.81it/s]

Validation steps: 100 Loss: 0.5470835566520691


Training:  57%|█████▋    | 202/357 [00:34<00:26,  5.76it/s]

Validation steps: 200 Loss: 0.7237041592597961


Training:  85%|████████▍ | 302/357 [00:52<00:09,  5.85it/s]

Validation steps: 300 Loss: 0.9513809680938721


Training: 100%|██████████| 357/357 [01:01<00:00,  5.83it/s]


              precision    recall  f1-score   support

        IT과학       0.78      0.85      0.81      1206
          경제       0.75      0.86      0.80      1555
          사회       0.86      0.57      0.69      1841
        생활문화       0.83      0.91      0.87      1483
          세계       0.92      0.83      0.87      1908
         스포츠       0.90      0.98      0.94      1734
          정치       0.83      0.91      0.87      1687

    accuracy                           0.84     11414
   macro avg       0.84      0.84      0.83     11414
weighted avg       0.84      0.84      0.83     11414

[0.84908789 0.85723473 0.56816947 0.91301416 0.82861635 0.98039216
 0.90989923]
VALID ACC : 0.8387068512353251, VALID LOSS : 0.5011296989996227
{'epoch': 0, 'train_loss': 0.897064161857712, 'train_acc': 0.6934871495327103, 'valid_acc': 0.8387068512353251, 'val_loss': 0.5011296989996227, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1070 [00:00<12:31,  1.42it/s]

Training steps: 0 Loss: 0.4284224212169647


Training:   9%|▉         | 101/1070 [01:09<10:43,  1.51it/s]

Training steps: 100 Loss: 0.4983474910259247


Training:  19%|█▉        | 201/1070 [02:18<10:23,  1.39it/s]

Training steps: 200 Loss: 0.4965784549713135


Training:  28%|██▊       | 301/1070 [03:27<08:47,  1.46it/s]

Training steps: 300 Loss: 0.41886603832244873


Training:  37%|███▋      | 401/1070 [04:35<07:57,  1.40it/s]

Training steps: 400 Loss: 0.5957246422767639


Training:  47%|████▋     | 501/1070 [05:44<06:28,  1.47it/s]

Training steps: 500 Loss: 0.6140683889389038


Training:  56%|█████▌    | 601/1070 [06:52<05:34,  1.40it/s]

Training steps: 600 Loss: 0.5241502523422241


Training:  66%|██████▌   | 701/1070 [08:00<04:03,  1.51it/s]

Training steps: 700 Loss: 0.4633500277996063


Training:  75%|███████▍  | 801/1070 [09:09<03:06,  1.44it/s]

Training steps: 800 Loss: 0.43641820549964905


Training:  84%|████████▍ | 901/1070 [10:17<01:51,  1.51it/s]

Training steps: 900 Loss: 0.35129058361053467


Training:  94%|█████████▎| 1001/1070 [11:25<00:47,  1.45it/s]

Training steps: 1000 Loss: 0.4354659914970398


Training: 100%|██████████| 1070/1070 [12:13<00:00,  1.46it/s]

TRAIN ACC : 0.8439252336448598, TRAIN LOSS : 0.47142012342114314



Training:   0%|          | 1/357 [00:00<02:26,  2.42it/s]

Validation steps: 0 Loss: 0.18679681420326233


Training:  28%|██▊       | 101/357 [00:41<01:48,  2.37it/s]

Validation steps: 100 Loss: 0.5490589737892151


Training:  56%|█████▋    | 201/357 [01:22<01:01,  2.52it/s]

Validation steps: 200 Loss: 0.5991089940071106


Training:  84%|████████▍ | 301/357 [02:03<00:23,  2.38it/s]

Validation steps: 300 Loss: 0.6494219303131104


Training: 100%|██████████| 357/357 [02:26<00:00,  2.44it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.86      0.84      1206
          경제       0.90      0.77      0.83      1555
          사회       0.77      0.78      0.78      1841
        생활문화       0.87      0.90      0.88      1483
          세계       0.90      0.90      0.90      1908
         스포츠       0.97      0.96      0.97      1734
          정치       0.87      0.91      0.89      1687

    accuracy                           0.87     11414
   macro avg       0.87      0.87      0.87     11414
weighted avg       0.87      0.87      0.87     11414

[0.85903814 0.76655949 0.78435633 0.9008766  0.90408805 0.96078431
 0.91464138]
VALID ACC : 0.871035570352199, VALID LOSS : 0.3999063709557724
{'epoch': 1, 'train_loss': 0.47142012342114314, 'train_acc': 0.8439252336448598, 'valid_acc': 0.871035570352199, 'val_loss': 0.3999063709557724, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1070 [00:00<12:20,  1.44it/s]

Training steps: 0 Loss: 0.5836256742477417


Training:   9%|▉         | 101/1070 [01:08<10:36,  1.52it/s]

Training steps: 100 Loss: 0.4273194670677185


Training:  19%|█▉        | 201/1070 [02:16<09:54,  1.46it/s]

Training steps: 200 Loss: 0.3654562830924988


Training:  28%|██▊       | 301/1070 [03:23<08:37,  1.49it/s]

Training steps: 300 Loss: 0.5640991926193237


Training:  37%|███▋      | 401/1070 [04:31<07:53,  1.41it/s]

Training steps: 400 Loss: 0.7192752361297607


Training:  47%|████▋     | 501/1070 [05:39<06:26,  1.47it/s]

Training steps: 500 Loss: 0.45923563838005066


Training:  56%|█████▌    | 601/1070 [06:47<05:31,  1.41it/s]

Training steps: 600 Loss: 0.22060640156269073


Training:  66%|██████▌   | 701/1070 [07:54<04:08,  1.49it/s]

Training steps: 700 Loss: 0.5325562953948975


Training:  75%|███████▍  | 801/1070 [09:01<03:10,  1.41it/s]

Training steps: 800 Loss: 0.35375702381134033


Training:  84%|████████▍ | 901/1070 [10:09<01:50,  1.52it/s]

Training steps: 900 Loss: 0.3396657705307007


Training:  94%|█████████▎| 1001/1070 [11:17<00:47,  1.47it/s]

Training steps: 1000 Loss: 0.07875685393810272


Training: 100%|██████████| 1070/1070 [12:04<00:00,  1.48it/s]

TRAIN ACC : 0.8644275700934579, TRAIN LOSS : 0.4082957772381395



Training:   0%|          | 1/357 [00:00<02:22,  2.49it/s]

Validation steps: 0 Loss: 0.21230825781822205


Training:  28%|██▊       | 101/357 [00:40<01:46,  2.41it/s]

Validation steps: 100 Loss: 0.5204907655715942


Training:  56%|█████▋    | 201/357 [01:21<01:01,  2.54it/s]

Validation steps: 200 Loss: 0.4329472482204437


Training:  84%|████████▍ | 301/357 [02:01<00:23,  2.40it/s]

Validation steps: 300 Loss: 0.5081349611282349


Training: 100%|██████████| 357/357 [02:23<00:00,  2.48it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.87      0.84      1206
          경제       0.89      0.79      0.84      1555
          사회       0.80      0.78      0.79      1841
        생활문화       0.89      0.89      0.89      1483
          세계       0.93      0.90      0.91      1908
         스포츠       0.93      0.99      0.96      1734
          정치       0.89      0.92      0.90      1687

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.87396352 0.79356913 0.7821836  0.89143628 0.90041929 0.98615917
 0.92175459]
VALID ACC : 0.8790082355002629, VALID LOSS : 0.3724028216805361
{'epoch': 2, 'train_loss': 0.4082957772381395, 'train_acc': 0.8644275700934579, 'valid_acc': 0.8790082355002629, 'val_loss': 0.3724028216805361, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1070 [00:00<12:37,  1.41it/s]

Training steps: 0 Loss: 0.313499391078949


Training:   9%|▉         | 101/1070 [01:08<10:49,  1.49it/s]

Training steps: 100 Loss: 0.19061997532844543


Training:  19%|█▉        | 201/1070 [02:15<10:07,  1.43it/s]

Training steps: 200 Loss: 0.31427866220474243


Training:  28%|██▊       | 301/1070 [03:22<08:32,  1.50it/s]

Training steps: 300 Loss: 0.2850017249584198


Training:  37%|███▋      | 401/1070 [04:30<07:48,  1.43it/s]

Training steps: 400 Loss: 0.2392338067293167


Training:  47%|████▋     | 501/1070 [05:36<06:19,  1.50it/s]

Training steps: 500 Loss: 0.26089632511138916


Training:  56%|█████▌    | 601/1070 [06:44<05:17,  1.48it/s]

Training steps: 600 Loss: 0.4488559663295746


Training:  66%|██████▌   | 701/1070 [07:50<04:00,  1.54it/s]

Training steps: 700 Loss: 0.5456345677375793


Training:  75%|███████▍  | 801/1070 [08:57<03:01,  1.48it/s]

Training steps: 800 Loss: 0.26291191577911377


Training:  84%|████████▍ | 901/1070 [10:04<01:49,  1.54it/s]

Training steps: 900 Loss: 0.5109648704528809


Training:  94%|█████████▎| 1001/1070 [11:12<00:47,  1.44it/s]

Training steps: 1000 Loss: 0.14111997187137604


Training: 100%|██████████| 1070/1070 [11:58<00:00,  1.49it/s]

TRAIN ACC : 0.8749123831775701, TRAIN LOSS : 0.37149112863448736



Training:   0%|          | 1/357 [00:00<02:30,  2.37it/s]

Validation steps: 0 Loss: 0.21907813847064972


Training:  28%|██▊       | 101/357 [00:40<01:40,  2.55it/s]

Validation steps: 100 Loss: 0.5204446315765381


Training:  56%|█████▋    | 201/357 [01:20<01:04,  2.43it/s]

Validation steps: 200 Loss: 0.5740158557891846


Training:  84%|████████▍ | 301/357 [01:59<00:21,  2.56it/s]

Validation steps: 300 Loss: 0.5400170683860779


Training: 100%|██████████| 357/357 [02:22<00:00,  2.50it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.85      0.84      1206
          경제       0.82      0.86      0.84      1555
          사회       0.85      0.70      0.77      1841
        생활문화       0.91      0.88      0.90      1483
          세계       0.91      0.93      0.92      1908
         스포츠       0.96      0.97      0.97      1734
          정치       0.84      0.95      0.90      1687

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.85406302 0.86237942 0.69853341 0.8779501  0.92557652 0.97174164
 0.95376408]
VALID ACC : 0.8777816716313299, VALID LOSS : 0.371871475589459
{'epoch': 3, 'train_loss': 0.37149112863448736, 'train_acc': 0.8749123831775701, 'valid_acc': 0.8777816716313299, 'val_loss': 0.371871475589459, 'learning_rate': 5e-06}
Start Training: Epoch 5



Training:   0%|          | 1/1070 [00:00<12:00,  1.48it/s]

Training steps: 0 Loss: 0.5245177745819092


Training:   9%|▉         | 101/1070 [01:07<10:48,  1.49it/s]

Training steps: 100 Loss: 0.19317522644996643


Training:  19%|█▉        | 201/1070 [02:14<09:47,  1.48it/s]

Training steps: 200 Loss: 0.3496588170528412


Training:  28%|██▊       | 301/1070 [03:21<08:18,  1.54it/s]

Training steps: 300 Loss: 0.26922720670700073


Training:  37%|███▋      | 401/1070 [04:28<07:33,  1.47it/s]

Training steps: 400 Loss: 0.15170584619045258


Training:  47%|████▋     | 501/1070 [05:35<06:09,  1.54it/s]

Training steps: 500 Loss: 0.3140542805194855


Training:  56%|█████▌    | 601/1070 [06:41<05:17,  1.48it/s]

Training steps: 600 Loss: 0.4181772768497467


Training:  66%|██████▌   | 701/1070 [07:49<04:05,  1.50it/s]

Training steps: 700 Loss: 0.2707649767398834


Training:  75%|███████▍  | 801/1070 [08:56<03:07,  1.43it/s]

Training steps: 800 Loss: 0.551018476486206


Training:  84%|████████▍ | 901/1070 [10:03<01:51,  1.51it/s]

Training steps: 900 Loss: 0.3206818103790283


Training:  94%|█████████▎| 1001/1070 [11:09<00:47,  1.45it/s]

Training steps: 1000 Loss: 0.33103448152542114


Training: 100%|██████████| 1070/1070 [11:55<00:00,  1.49it/s]

TRAIN ACC : 0.8840245327102804, TRAIN LOSS : 0.34535162237899325



Training:   0%|          | 1/357 [00:00<02:30,  2.36it/s]

Validation steps: 0 Loss: 0.3049152195453644


Training:  28%|██▊       | 101/357 [00:41<01:46,  2.41it/s]

Validation steps: 100 Loss: 0.4937536120414734


Training:  56%|█████▋    | 201/357 [01:20<01:00,  2.56it/s]

Validation steps: 200 Loss: 0.5382625460624695


Training:  84%|████████▍ | 301/357 [02:01<00:22,  2.54it/s]

Validation steps: 300 Loss: 0.4125556945800781


Training: 100%|██████████| 357/357 [02:23<00:00,  2.50it/s]

              precision    recall  f1-score   support

        IT과학       0.80      0.89      0.84      1206
          경제       0.80      0.86      0.83      1555
          사회       0.86      0.69      0.77      1841
        생활문화       0.88      0.90      0.89      1483
          세계       0.93      0.91      0.92      1908
         스포츠       0.96      0.98      0.97      1734
          정치       0.89      0.93      0.91      1687

    accuracy                           0.88     11414
   macro avg       0.87      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.89303483 0.86430868 0.69038566 0.90424815 0.9072327  0.97520185
 0.93479549]
VALID ACC : 0.8789206237953391, VALID LOSS : 0.3544741595889173
{'epoch': 4, 'train_loss': 0.34535162237899325, 'train_acc': 0.8840245327102804, 'valid_acc': 0.8789206237953391, 'val_loss': 0.3544741595889173, 'learning_rate': 5e-06}
Start Training: Epoch 6



Training:   0%|          | 1/1070 [00:00<11:39,  1.53it/s]

Training steps: 0 Loss: 0.4436620771884918


Training:   9%|▉         | 101/1070 [01:07<10:28,  1.54it/s]

Training steps: 100 Loss: 0.38090282678604126


Training:  19%|█▉        | 201/1070 [02:14<09:48,  1.48it/s]

Training steps: 200 Loss: 0.214277446269989


Training:  28%|██▊       | 301/1070 [03:21<08:19,  1.54it/s]

Training steps: 300 Loss: 0.3941366672515869


Training:  37%|███▋      | 401/1070 [04:28<07:45,  1.44it/s]

Training steps: 400 Loss: 0.09964198619127274


Training:  47%|████▋     | 501/1070 [05:35<06:19,  1.50it/s]

Training steps: 500 Loss: 0.22389835119247437


Training:  56%|█████▌    | 601/1070 [06:42<05:26,  1.44it/s]

Training steps: 600 Loss: 0.20672152936458588


Training:  66%|██████▌   | 701/1070 [07:49<04:05,  1.50it/s]

Training steps: 700 Loss: 0.3129611611366272


Training:  75%|███████▍  | 801/1070 [08:56<03:08,  1.43it/s]

Training steps: 800 Loss: 0.16928714513778687


Training:  84%|████████▍ | 901/1070 [10:03<01:49,  1.54it/s]

Training steps: 900 Loss: 0.20032908022403717


Training:  94%|█████████▎| 1001/1070 [11:09<00:46,  1.48it/s]

Training steps: 1000 Loss: 0.3525541126728058


Training: 100%|██████████| 1070/1070 [11:56<00:00,  1.49it/s]

TRAIN ACC : 0.8896028037383178, TRAIN LOSS : 0.3251725169403531



Training:   0%|          | 1/357 [00:00<02:22,  2.50it/s]

Validation steps: 0 Loss: 0.2529447674751282


Training:  28%|██▊       | 101/357 [00:39<01:44,  2.46it/s]

Validation steps: 100 Loss: 0.5497485399246216


Training:  56%|█████▋    | 201/357 [01:19<01:00,  2.58it/s]

Validation steps: 200 Loss: 0.4362175166606903


Training:  84%|████████▍ | 301/357 [01:59<00:22,  2.47it/s]

Validation steps: 300 Loss: 0.32926371693611145


Training: 100%|██████████| 357/357 [02:21<00:00,  2.53it/s]


              precision    recall  f1-score   support

        IT과학       0.78      0.91      0.84      1206
          경제       0.90      0.78      0.84      1555
          사회       0.85      0.73      0.79      1841
        생활문화       0.88      0.91      0.89      1483
          세계       0.92      0.93      0.92      1908
         스포츠       0.96      0.98      0.97      1734
          정치       0.87      0.95      0.91      1687

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.90713101 0.78327974 0.73003802 0.908294   0.93238994 0.97750865
 0.95139301]
VALID ACC : 0.883301209041528, VALID LOSS : 0.3601157509510507
{'epoch': 5, 'train_loss': 0.3251725169403531, 'train_acc': 0.8896028037383178, 'valid_acc': 0.883301209041528, 'val_loss': 0.3601157509510507, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 7


Training:   0%|          | 1/1070 [00:00<11:57,  1.49it/s]

Training steps: 0 Loss: 0.28990626335144043


Training:   9%|▉         | 101/1070 [01:08<10:42,  1.51it/s]

Training steps: 100 Loss: 0.33620521426200867


Training:  19%|█▉        | 201/1070 [02:15<09:59,  1.45it/s]

Training steps: 200 Loss: 0.584373414516449


Training:  28%|██▊       | 301/1070 [02:58<05:29,  2.33it/s]

Training steps: 300 Loss: 0.4800716042518616


Training:  37%|███▋      | 401/1070 [03:42<05:04,  2.20it/s]

Training steps: 400 Loss: 0.19311773777008057


Training:  47%|████▋     | 501/1070 [04:26<04:03,  2.34it/s]

Training steps: 500 Loss: 0.18055196106433868


Training:  56%|█████▌    | 601/1070 [05:10<03:35,  2.18it/s]

Training steps: 600 Loss: 0.20577850937843323


Training:  66%|██████▌   | 701/1070 [05:54<02:38,  2.33it/s]

Training steps: 700 Loss: 0.3373119831085205


Training:  75%|███████▍  | 801/1070 [06:38<02:02,  2.20it/s]

Training steps: 800 Loss: 0.3601161241531372


Training:  84%|████████▍ | 901/1070 [07:22<01:11,  2.35it/s]

Training steps: 900 Loss: 0.537907063961029


Training:  94%|█████████▎| 1001/1070 [08:06<00:31,  2.18it/s]

Training steps: 1000 Loss: 0.15367284417152405


Training: 100%|██████████| 1070/1070 [08:36<00:00,  2.07it/s]

TRAIN ACC : 0.8932827102803739, TRAIN LOSS : 0.31245394144729477



Training:   1%|          | 2/357 [00:00<01:01,  5.78it/s]

Validation steps: 0 Loss: 0.43630608916282654


Training:  29%|██▊       | 102/357 [00:17<00:42,  5.96it/s]

Validation steps: 100 Loss: 0.5662625432014465


Training:  57%|█████▋    | 202/357 [00:34<00:26,  5.84it/s]

Validation steps: 200 Loss: 0.41664090752601624


Training:  85%|████████▍ | 302/357 [00:51<00:09,  5.99it/s]

Validation steps: 300 Loss: 0.3461570739746094


Training: 100%|██████████| 357/357 [01:00<00:00,  5.93it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.92      0.85      1206
          경제       0.85      0.83      0.84      1555
          사회       0.87      0.70      0.78      1841
        생활문화       0.87      0.91      0.89      1483
          세계       0.93      0.92      0.93      1908
         스포츠       0.96      0.98      0.97      1734
          정치       0.89      0.94      0.91      1687

    accuracy                           0.88     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.88      0.88     11414

[0.9212272  0.83472669 0.7017925  0.90964262 0.92348008 0.98096886
 0.93953764]
VALID ACC : 0.8847029963203084, VALID LOSS : 0.34342300960225386
{'epoch': 6, 'train_loss': 0.31245394144729477, 'train_acc': 0.8932827102803739, 'valid_acc': 0.8847029963203084, 'val_loss': 0.34342300960225386, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 8


Training:   0%|          | 1/1070 [00:00<08:08,  2.19it/s]

Training steps: 0 Loss: 0.2728296220302582


Training:   9%|▉         | 101/1070 [00:44<06:56,  2.33it/s]

Training steps: 100 Loss: 0.3085060715675354


Training:  19%|█▉        | 201/1070 [01:28<06:34,  2.20it/s]

Training steps: 200 Loss: 0.2464016228914261


Training:  28%|██▊       | 301/1070 [02:12<05:29,  2.34it/s]

Training steps: 300 Loss: 0.13491278886795044


Training:  37%|███▋      | 401/1070 [02:56<05:04,  2.20it/s]

Training steps: 400 Loss: 0.2769268751144409


Training:  47%|████▋     | 501/1070 [03:40<04:02,  2.35it/s]

Training steps: 500 Loss: 0.5734915137290955


Training:  56%|█████▌    | 601/1070 [04:24<03:34,  2.18it/s]

Training steps: 600 Loss: 0.391431987285614


Training:  66%|██████▌   | 701/1070 [05:07<02:37,  2.34it/s]

Training steps: 700 Loss: 0.32085832953453064


Training:  75%|███████▍  | 801/1070 [05:52<02:02,  2.20it/s]

Training steps: 800 Loss: 0.2839888334274292


Training:  84%|████████▍ | 901/1070 [06:35<01:12,  2.33it/s]

Training steps: 900 Loss: 0.16952289640903473


Training:  94%|█████████▎| 1001/1070 [07:19<00:31,  2.18it/s]

Training steps: 1000 Loss: 0.3944845199584961


Training: 100%|██████████| 1070/1070 [07:50<00:00,  2.28it/s]

TRAIN ACC : 0.898481308411215, TRAIN LOSS : 0.29207635057445996



Training:   1%|          | 2/357 [00:00<01:01,  5.80it/s]

Validation steps: 0 Loss: 0.23599755764007568


Training:  29%|██▊       | 102/357 [00:17<00:43,  5.91it/s]

Validation steps: 100 Loss: 0.5582170486450195


Training:  57%|█████▋    | 202/357 [00:34<00:26,  5.89it/s]

Validation steps: 200 Loss: 0.425739049911499


Training:  85%|████████▍ | 302/357 [00:50<00:09,  5.96it/s]

Validation steps: 300 Loss: 0.3725145757198334


Training: 100%|██████████| 357/357 [01:00<00:00,  5.94it/s]


              precision    recall  f1-score   support

        IT과학       0.85      0.86      0.86      1206
          경제       0.82      0.86      0.84      1555
          사회       0.85      0.74      0.79      1841
        생활문화       0.88      0.91      0.89      1483
          세계       0.91      0.94      0.92      1908
         스포츠       0.97      0.97      0.97      1734
          정치       0.91      0.93      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.89      0.89     11414

[0.86318408 0.85980707 0.73601304 0.90761969 0.93658281 0.97462514
 0.9253112 ]
VALID ACC : 0.8863676187138602, VALID LOSS : 0.3462930342459044
{'epoch': 7, 'train_loss': 0.29207635057445996, 'train_acc': 0.898481308411215, 'valid_acc': 0.8863676187138602, 'val_loss': 0.3462930342459044, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 9


Training:   0%|          | 1/1070 [00:00<12:09,  1.46it/s]

Training steps: 0 Loss: 0.34936413168907166


Training:   9%|▉         | 101/1070 [01:07<10:21,  1.56it/s]

Training steps: 100 Loss: 0.14423169195652008


Training:  19%|█▉        | 201/1070 [02:14<09:43,  1.49it/s]

Training steps: 200 Loss: 0.22473078966140747


Training:  28%|██▊       | 301/1070 [03:20<08:14,  1.56it/s]

Training steps: 300 Loss: 0.5770344138145447


Training:  37%|███▋      | 401/1070 [04:26<07:40,  1.45it/s]

Training steps: 400 Loss: 0.3151771128177643


Training:  47%|████▋     | 501/1070 [05:32<06:17,  1.51it/s]

Training steps: 500 Loss: 0.05983012169599533


Training:  56%|█████▌    | 601/1070 [06:39<05:24,  1.45it/s]

Training steps: 600 Loss: 0.23971110582351685


Training:  66%|██████▌   | 701/1070 [07:45<04:02,  1.52it/s]

Training steps: 700 Loss: 0.43308067321777344


Training:  75%|███████▍  | 801/1070 [08:51<03:05,  1.45it/s]

Training steps: 800 Loss: 0.3646637201309204


Training:  84%|████████▍ | 901/1070 [09:57<01:48,  1.56it/s]

Training steps: 900 Loss: 0.20706692337989807


Training:  94%|█████████▎| 1001/1070 [11:03<00:46,  1.49it/s]

Training steps: 1000 Loss: 0.3438853621482849


Training: 100%|██████████| 1070/1070 [11:49<00:00,  1.51it/s]

TRAIN ACC : 0.9021028037383177, TRAIN LOSS : 0.2790588005576457



Training:   0%|          | 1/357 [00:00<02:20,  2.54it/s]

Validation steps: 0 Loss: 0.14671741425991058


Training:  28%|██▊       | 101/357 [00:38<01:43,  2.47it/s]

Validation steps: 100 Loss: 0.5362423658370972


Training:  56%|█████▋    | 201/357 [01:18<01:00,  2.60it/s]

Validation steps: 200 Loss: 0.377191424369812


Training:  84%|████████▍ | 301/357 [01:57<00:22,  2.51it/s]

Validation steps: 300 Loss: 0.36410337686538696


Training: 100%|██████████| 357/357 [02:19<00:00,  2.57it/s]


              precision    recall  f1-score   support

        IT과학       0.84      0.86      0.85      1206
          경제       0.86      0.83      0.84      1555
          사회       0.85      0.74      0.79      1841
        생활문화       0.89      0.90      0.89      1483
          세계       0.89      0.95      0.92      1908
         스포츠       0.96      0.98      0.97      1734
          정치       0.90      0.93      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.89      0.89     11414

[0.86401327 0.83151125 0.74416078 0.89683075 0.94916143 0.98212226
 0.93360996]
VALID ACC : 0.8869809006483266, VALID LOSS : 0.3521357977173242
{'epoch': 8, 'train_loss': 0.2790588005576457, 'train_acc': 0.9021028037383177, 'valid_acc': 0.8869809006483266, 'val_loss': 0.3521357977173242, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 10


Training:   0%|          | 1/1070 [00:00<12:33,  1.42it/s]

Training steps: 0 Loss: 0.4840072989463806


Training:   9%|▉         | 101/1070 [01:07<10:36,  1.52it/s]

Training steps: 100 Loss: 0.3829553723335266


Training:  19%|█▉        | 201/1070 [02:13<09:59,  1.45it/s]

Training steps: 200 Loss: 0.2516052722930908


Training:  28%|██▊       | 301/1070 [03:18<08:25,  1.52it/s]

Training steps: 300 Loss: 0.1842389553785324


Training:  37%|███▋      | 401/1070 [04:24<07:39,  1.46it/s]

Training steps: 400 Loss: 0.25129517912864685


Training:  47%|████▋     | 501/1070 [05:30<06:12,  1.53it/s]

Training steps: 500 Loss: 0.08037810772657394


Training:  56%|█████▌    | 601/1070 [06:36<05:15,  1.49it/s]

Training steps: 600 Loss: 0.22826837003231049


Training:  66%|██████▌   | 701/1070 [07:42<03:57,  1.55it/s]

Training steps: 700 Loss: 0.589212954044342


Training:  75%|███████▍  | 801/1070 [08:48<02:59,  1.50it/s]

Training steps: 800 Loss: 0.2173081338405609


Training:  84%|████████▍ | 901/1070 [09:54<01:48,  1.56it/s]

Training steps: 900 Loss: 0.2878960072994232


Training:  94%|█████████▎| 1001/1070 [11:00<00:47,  1.46it/s]

Training steps: 1000 Loss: 0.352294921875


Training: 100%|██████████| 1070/1070 [11:45<00:00,  1.52it/s]

TRAIN ACC : 0.9076810747663552, TRAIN LOSS : 0.265912357456634



Training:   0%|          | 1/357 [00:00<02:18,  2.56it/s]

Validation steps: 0 Loss: 0.21069833636283875


Training:  28%|██▊       | 101/357 [00:39<01:37,  2.62it/s]

Validation steps: 100 Loss: 0.5101799964904785


Training:  56%|█████▋    | 201/357 [01:17<01:01,  2.55it/s]

Validation steps: 200 Loss: 0.36169320344924927


Training:  84%|████████▍ | 301/357 [01:57<00:21,  2.57it/s]

Validation steps: 300 Loss: 0.3605315685272217


Training: 100%|██████████| 357/357 [02:18<00:00,  2.58it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.88      0.85      1206
          경제       0.87      0.83      0.85      1555
          사회       0.83      0.76      0.79      1841
        생활문화       0.91      0.88      0.90      1483
          세계       0.91      0.93      0.92      1908
         스포츠       0.96      0.98      0.97      1734
          정치       0.87      0.95      0.91      1687

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.89      0.89     11414

[0.87562189 0.82508039 0.75882672 0.88132165 0.93134172 0.97750865
 0.95139301]
VALID ACC : 0.8866304538286315, VALID LOSS : 0.35075018219701964
{'epoch': 9, 'train_loss': 0.265912357456634, 'train_acc': 0.9076810747663552, 'valid_acc': 0.8866304538286315, 'val_loss': 0.35075018219701964, 'learning_rate': 5e-06}
Start Training: Epoch 11



Training:   0%|          | 1/1070 [00:00<11:31,  1.55it/s]

Training steps: 0 Loss: 0.2015635073184967


Training:   9%|▉         | 101/1070 [01:06<10:18,  1.57it/s]

Training steps: 100 Loss: 0.24361209571361542


Training:  19%|█▉        | 201/1070 [02:12<09:40,  1.50it/s]

Training steps: 200 Loss: 0.1616172045469284


Training:  28%|██▊       | 301/1070 [03:18<08:12,  1.56it/s]

Training steps: 300 Loss: 0.18001899123191833


Training:  37%|███▋      | 401/1070 [04:24<07:36,  1.47it/s]

Training steps: 400 Loss: 0.27509742975234985


Training:  47%|████▋     | 501/1070 [05:30<06:05,  1.56it/s]

Training steps: 500 Loss: 0.09887200593948364


Training:  56%|█████▌    | 601/1070 [06:36<05:14,  1.49it/s]

Training steps: 600 Loss: 0.5824267268180847


Training:  66%|██████▌   | 701/1070 [07:42<03:57,  1.56it/s]

Training steps: 700 Loss: 0.1135253757238388


Training:  75%|███████▍  | 801/1070 [08:48<03:01,  1.48it/s]

Training steps: 800 Loss: 0.09403153508901596


Training:  84%|████████▍ | 901/1070 [09:54<01:47,  1.57it/s]

Training steps: 900 Loss: 0.37117937207221985


Training:  94%|█████████▎| 1001/1070 [11:00<00:46,  1.49it/s]

Training steps: 1000 Loss: 0.5170069932937622


Training: 100%|██████████| 1070/1070 [11:46<00:00,  1.51it/s]

TRAIN ACC : 0.9116530373831776, TRAIN LOSS : 0.2513808918681657



Training:   0%|          | 1/357 [00:00<02:23,  2.48it/s]

Validation steps: 0 Loss: 0.12535271048545837


Training:  28%|██▊       | 101/357 [00:39<01:42,  2.50it/s]

Validation steps: 100 Loss: 0.6040952205657959


Training:  56%|█████▋    | 201/357 [01:17<00:59,  2.63it/s]

Validation steps: 200 Loss: 0.38206905126571655


Training:  84%|████████▍ | 301/357 [01:57<00:21,  2.59it/s]

Validation steps: 300 Loss: 0.3652174174785614


Training: 100%|██████████| 357/357 [02:19<00:00,  2.56it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85      1206
          경제       0.87      0.83      0.85      1555
          사회       0.85      0.74      0.79      1841
        생활문화       0.90      0.89      0.89      1483
          세계       0.89      0.95      0.92      1908
         스포츠       0.97      0.97      0.97      1734
          정치       0.89      0.94      0.91      1687

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.89      0.88     11414

[0.88723051 0.83086817 0.7370994  0.88604181 0.94968553 0.97174164
 0.94013041]
VALID ACC : 0.8862800070089364, VALID LOSS : 0.3528164430137943
{'epoch': 10, 'train_loss': 0.2513808918681657, 'train_acc': 0.9116530373831776, 'valid_acc': 0.8862800070089364, 'val_loss': 0.3528164430137943, 'learning_rate': 5e-06}
Start Training: Epoch 12



Training:   0%|          | 1/1070 [00:00<11:49,  1.51it/s]

Training steps: 0 Loss: 0.13081608712673187


Training:   9%|▉         | 101/1070 [01:06<10:39,  1.52it/s]

Training steps: 100 Loss: 0.3478683531284332


Training:  19%|█▉        | 201/1070 [02:12<09:55,  1.46it/s]

Training steps: 200 Loss: 0.29599234461784363


Training:  28%|██▊       | 301/1070 [03:18<08:22,  1.53it/s]

Training steps: 300 Loss: 0.27204468846321106


Training:  37%|███▋      | 401/1070 [04:24<07:39,  1.45it/s]

Training steps: 400 Loss: 0.2728050649166107


Training:  47%|████▋     | 501/1070 [05:30<06:03,  1.57it/s]

Training steps: 500 Loss: 0.33558347821235657


Training:  56%|█████▌    | 601/1070 [06:36<05:12,  1.50it/s]

Training steps: 600 Loss: 0.3831370174884796


Training:  66%|██████▌   | 701/1070 [07:42<03:57,  1.55it/s]

Training steps: 700 Loss: 0.21072068810462952


Training:  75%|███████▍  | 801/1070 [08:47<03:00,  1.49it/s]

Training steps: 800 Loss: 0.19819559156894684


Training:  84%|████████▍ | 901/1070 [09:53<01:47,  1.57it/s]

Training steps: 900 Loss: 0.07483223080635071


Training:  94%|█████████▎| 1001/1070 [11:00<00:47,  1.45it/s]

Training steps: 1000 Loss: 0.15266376733779907


Training: 100%|██████████| 1070/1070 [11:45<00:00,  1.52it/s]

TRAIN ACC : 0.9177570093457944, TRAIN LOSS : 0.23618571606995625



Training:   0%|          | 1/357 [00:00<02:20,  2.53it/s]

Validation steps: 0 Loss: 0.28101277351379395


Training:  28%|██▊       | 101/357 [00:39<01:38,  2.60it/s]

Validation steps: 100 Loss: 0.5448737144470215


Training:  56%|█████▋    | 201/357 [01:18<01:03,  2.46it/s]

Validation steps: 200 Loss: 0.491438090801239


Training:  84%|████████▍ | 301/357 [01:57<00:21,  2.60it/s]

Validation steps: 300 Loss: 0.36258289217948914


Training: 100%|██████████| 357/357 [02:19<00:00,  2.56it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85      1206
          경제       0.85      0.84      0.85      1555
          사회       0.87      0.66      0.75      1841
        생활문화       0.87      0.91      0.89      1483
          세계       0.92      0.92      0.92      1908
         스포츠       0.97      0.98      0.97      1734
          정치       0.84      0.96      0.89      1687

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.89054726 0.84308682 0.66159696 0.91301416 0.92295597 0.97635525
 0.9608773 ]
VALID ACC : 0.8789206237953391, VALID LOSS : 0.3763319416081204
{'epoch': 11, 'train_loss': 0.23618571606995625, 'train_acc': 0.9177570093457944, 'valid_acc': 0.8789206237953391, 'val_loss': 0.3763319416081204, 'learning_rate': 5e-06}
EarlyStopping counter: 3 out of 3


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'roberta.embeddings.token_label_2_

Start Training: Epoch 1


Training:   0%|          | 1/1070 [00:00<07:42,  2.31it/s]

Training steps: 0 Loss: 1.9498846530914307


Training:   9%|▉         | 101/1070 [00:44<06:56,  2.32it/s]

Training steps: 100 Loss: 1.8756359815597534


Training:  19%|█▉        | 201/1070 [01:29<06:40,  2.17it/s]

Training steps: 200 Loss: 1.553446888923645


Training:  28%|██▊       | 301/1070 [02:13<05:31,  2.32it/s]

Training steps: 300 Loss: 1.2766300439834595


Training:  37%|███▋      | 401/1070 [02:58<05:10,  2.15it/s]

Training steps: 400 Loss: 1.004758358001709


Training:  47%|████▋     | 501/1070 [03:42<04:05,  2.32it/s]

Training steps: 500 Loss: 0.7444673776626587


Training:  56%|█████▌    | 601/1070 [04:41<04:55,  1.59it/s]

Training steps: 600 Loss: 0.7680782675743103


Training:  66%|██████▌   | 701/1070 [05:42<03:40,  1.67it/s]

Training steps: 700 Loss: 0.6626066565513611


Training:  75%|███████▍  | 801/1070 [06:44<02:49,  1.58it/s]

Training steps: 800 Loss: 0.6247090697288513


Training:  84%|████████▍ | 901/1070 [07:45<01:42,  1.66it/s]

Training steps: 900 Loss: 0.39895036816596985


Training:  94%|█████████▎| 1001/1070 [08:47<00:44,  1.56it/s]

Training steps: 1000 Loss: 0.7472143173217773


Training: 100%|██████████| 1070/1070 [09:29<00:00,  1.88it/s]

TRAIN ACC : 0.6664135514018692, TRAIN LOSS : 0.948553235982066



Training:   0%|          | 1/357 [00:00<02:05,  2.84it/s]

Validation steps: 0 Loss: 0.44886329770088196


Training:  28%|██▊       | 101/357 [00:34<01:28,  2.88it/s]

Validation steps: 100 Loss: 0.8192430734634399


Training:  56%|█████▋    | 201/357 [01:08<00:51,  3.02it/s]

Validation steps: 200 Loss: 0.38309991359710693


Training:  85%|████████▍ | 302/357 [01:40<00:09,  5.75it/s]

Validation steps: 300 Loss: 0.8098907470703125


Training: 100%|██████████| 357/357 [01:50<00:00,  3.23it/s]


              precision    recall  f1-score   support

        IT과학       0.67      0.94      0.78      1206
          경제       0.87      0.76      0.81      1556
          사회       0.84      0.62      0.72      1841
        생활문화       0.88      0.88      0.88      1483
          세계       0.87      0.86      0.86      1907
         스포츠       0.97      0.94      0.95      1733
          정치       0.81      0.91      0.86      1688

    accuracy                           0.84     11414
   macro avg       0.84      0.85      0.84     11414
weighted avg       0.85      0.84      0.84     11414

[0.94195688 0.76413882 0.62466051 0.88199595 0.86103828 0.94460473
 0.90936019]
VALID ACC : 0.8408095321534957, VALID LOSS : 0.5084720530054149
{'epoch': 0, 'train_loss': 0.948553235982066, 'train_acc': 0.6664135514018692, 'valid_acc': 0.8408095321534957, 'val_loss': 0.5084720530054149, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1070 [00:00<10:55,  1.63it/s]

Training steps: 0 Loss: 0.7576025724411011


Training:   9%|▉         | 101/1070 [01:01<09:35,  1.68it/s]

Training steps: 100 Loss: 0.688998281955719


Training:  19%|█▉        | 201/1070 [02:01<08:51,  1.63it/s]

Training steps: 200 Loss: 0.37793636322021484


Training:  28%|██▊       | 301/1070 [03:01<07:31,  1.70it/s]

Training steps: 300 Loss: 0.4916444718837738


Training:  37%|███▋      | 401/1070 [04:01<06:52,  1.62it/s]

Training steps: 400 Loss: 0.4045524597167969


Training:  47%|████▋     | 501/1070 [05:01<05:34,  1.70it/s]

Training steps: 500 Loss: 0.4686063230037689


Training:  56%|█████▌    | 601/1070 [06:01<04:50,  1.61it/s]

Training steps: 600 Loss: 0.5732858180999756


Training:  66%|██████▌   | 701/1070 [07:01<03:33,  1.73it/s]

Training steps: 700 Loss: 0.3616333305835724


Training:  75%|███████▍  | 801/1070 [08:01<02:44,  1.63it/s]

Training steps: 800 Loss: 0.4575946033000946


Training:  84%|████████▍ | 901/1070 [09:01<01:39,  1.70it/s]

Training steps: 900 Loss: 0.6573113799095154


Training:  94%|█████████▎| 1001/1070 [10:01<00:42,  1.64it/s]

Training steps: 1000 Loss: 0.5851954221725464


Training: 100%|██████████| 1070/1070 [10:42<00:00,  1.66it/s]

TRAIN ACC : 0.8498831775700935, TRAIN LOSS : 0.46050295831165583



Training:   1%|          | 2/357 [00:00<01:02,  5.64it/s]

Validation steps: 0 Loss: 0.3718087375164032


Training:  29%|██▊       | 102/357 [00:17<00:43,  5.91it/s]

Validation steps: 100 Loss: 0.660688579082489


Training:  57%|█████▋    | 202/357 [00:34<00:26,  5.89it/s]

Validation steps: 200 Loss: 0.295560747385025


Training:  85%|████████▍ | 302/357 [00:51<00:09,  5.88it/s]

Validation steps: 300 Loss: 0.8788026571273804


Training: 100%|██████████| 357/357 [01:00<00:00,  5.87it/s]


              precision    recall  f1-score   support

        IT과학       0.75      0.90      0.82      1206
          경제       0.81      0.86      0.83      1556
          사회       0.83      0.72      0.77      1841
        생활문화       0.91      0.87      0.89      1483
          세계       0.90      0.89      0.89      1907
         스포츠       0.97      0.95      0.96      1733
          정치       0.89      0.90      0.90      1688

    accuracy                           0.87     11414
   macro avg       0.87      0.87      0.87     11414
weighted avg       0.87      0.87      0.87     11414

[0.90298507 0.85732648 0.71591526 0.87457856 0.88778186 0.95326024
 0.9028436 ]
VALID ACC : 0.8679691606798668, VALID LOSS : 0.41307212986281605
{'epoch': 1, 'train_loss': 0.46050295831165583, 'train_acc': 0.8498831775700935, 'valid_acc': 0.8679691606798668, 'val_loss': 0.41307212986281605, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1070 [00:00<11:11,  1.59it/s]

Training steps: 0 Loss: 0.27708858251571655


Training:   9%|▉         | 101/1070 [01:01<09:24,  1.72it/s]

Training steps: 100 Loss: 0.4160956144332886


Training:  19%|█▉        | 201/1070 [02:01<08:50,  1.64it/s]

Training steps: 200 Loss: 0.4880337715148926


Training:  28%|██▊       | 301/1070 [03:01<07:31,  1.70it/s]

Training steps: 300 Loss: 0.6369554996490479


Training:  37%|███▋      | 401/1070 [04:01<06:57,  1.60it/s]

Training steps: 400 Loss: 0.6066946983337402


Training:  47%|████▋     | 501/1070 [05:01<05:36,  1.69it/s]

Training steps: 500 Loss: 0.47018906474113464


Training:  56%|█████▌    | 601/1070 [06:01<04:47,  1.63it/s]

Training steps: 600 Loss: 0.3989415466785431


Training:  66%|██████▌   | 701/1070 [07:01<03:36,  1.70it/s]

Training steps: 700 Loss: 0.45762184262275696


Training:  75%|███████▍  | 801/1070 [08:02<02:50,  1.58it/s]

Training steps: 800 Loss: 0.46261245012283325


Training:  84%|████████▍ | 901/1070 [09:02<01:40,  1.68it/s]

Training steps: 900 Loss: 0.5439658761024475


Training:  94%|█████████▎| 1001/1070 [10:02<00:42,  1.63it/s]

Training steps: 1000 Loss: 0.46801406145095825


Training: 100%|██████████| 1070/1070 [10:43<00:00,  1.66it/s]

TRAIN ACC : 0.8681950934579439, TRAIN LOSS : 0.39714963956971033



Training:   1%|          | 2/357 [00:00<01:01,  5.75it/s]

Validation steps: 0 Loss: 0.29327288269996643


Training:  29%|██▊       | 102/357 [00:17<00:43,  5.85it/s]

Validation steps: 100 Loss: 0.4464813470840454


Training:  57%|█████▋    | 202/357 [00:34<00:26,  5.91it/s]

Validation steps: 200 Loss: 0.20132499933242798


Training:  85%|████████▍ | 302/357 [00:51<00:09,  5.82it/s]

Validation steps: 300 Loss: 0.856895387172699


Training: 100%|██████████| 357/357 [01:01<00:00,  5.84it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.84      0.82      1206
          경제       0.81      0.85      0.83      1556
          사회       0.85      0.72      0.78      1841
        생활문화       0.90      0.88      0.89      1483
          세계       0.88      0.93      0.90      1907
         스포츠       0.96      0.96      0.96      1733
          정치       0.89      0.92      0.91      1688

    accuracy                           0.87     11414
   macro avg       0.87      0.87      0.87     11414
weighted avg       0.87      0.87      0.87     11414

[0.84162521 0.84575835 0.72460619 0.88132165 0.93078133 0.96306982
 0.92417062]
VALID ACC : 0.8740143683196075, VALID LOSS : 0.3774157197107704
{'epoch': 2, 'train_loss': 0.39714963956971033, 'train_acc': 0.8681950934579439, 'valid_acc': 0.8740143683196075, 'val_loss': 0.3774157197107704, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1070 [00:00<10:57,  1.63it/s]

Training steps: 0 Loss: 0.38019654154777527


Training:   9%|▉         | 101/1070 [01:01<09:35,  1.68it/s]

Training steps: 100 Loss: 0.19752153754234314


Training:  19%|█▉        | 201/1070 [02:01<08:57,  1.62it/s]

Training steps: 200 Loss: 0.3785014748573303


Training:  28%|██▊       | 301/1070 [03:01<07:39,  1.67it/s]

Training steps: 300 Loss: 0.7383772134780884


Training:  37%|███▋      | 401/1070 [04:01<06:56,  1.60it/s]

Training steps: 400 Loss: 0.29165732860565186


Training:  47%|████▋     | 501/1070 [05:01<05:32,  1.71it/s]

Training steps: 500 Loss: 0.4562930464744568


Training:  56%|█████▌    | 601/1070 [06:01<04:48,  1.62it/s]

Training steps: 600 Loss: 0.41822320222854614


Training:  66%|██████▌   | 701/1070 [07:01<03:38,  1.69it/s]

Training steps: 700 Loss: 0.1889665126800537


Training:  75%|███████▍  | 801/1070 [08:01<02:48,  1.60it/s]

Training steps: 800 Loss: 0.15713223814964294


Training:  84%|████████▍ | 901/1070 [09:02<01:40,  1.68it/s]

Training steps: 900 Loss: 0.18196256458759308


Training:  94%|█████████▎| 1001/1070 [10:02<00:43,  1.60it/s]

Training steps: 1000 Loss: 0.490281879901886


Training: 100%|██████████| 1070/1070 [10:44<00:00,  1.66it/s]

TRAIN ACC : 0.8793516355140187, TRAIN LOSS : 0.3594122146961288



Training:   0%|          | 1/357 [00:00<02:05,  2.84it/s]

Validation steps: 0 Loss: 0.34973466396331787


Training:  28%|██▊       | 101/357 [00:33<01:23,  3.06it/s]

Validation steps: 100 Loss: 0.49869728088378906


Training:  56%|█████▋    | 201/357 [01:06<00:52,  2.97it/s]

Validation steps: 200 Loss: 0.24819952249526978


Training:  84%|████████▍ | 301/357 [01:39<00:18,  3.06it/s]

Validation steps: 300 Loss: 0.8996202349662781


Training: 100%|██████████| 357/357 [01:57<00:00,  3.03it/s]


              precision    recall  f1-score   support

        IT과학       0.78      0.90      0.84      1206
          경제       0.81      0.85      0.83      1556
          사회       0.88      0.69      0.77      1841
        생활문화       0.91      0.89      0.90      1483
          세계       0.89      0.93      0.91      1907
         스포츠       0.94      0.98      0.96      1733
          정치       0.91      0.91      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.87      0.88      0.87     11414
weighted avg       0.88      0.88      0.88     11414

[0.90381426 0.84832905 0.68712656 0.89480782 0.93025695 0.98384305
 0.90817536]
VALID ACC : 0.877343613106711, VALID LOSS : 0.3784404396436879
{'epoch': 3, 'train_loss': 0.3594122146961288, 'train_acc': 0.8793516355140187, 'valid_acc': 0.877343613106711, 'val_loss': 0.3784404396436879, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1070 [00:00<11:18,  1.57it/s]

Training steps: 0 Loss: 0.5107194781303406


Training:   9%|▉         | 101/1070 [01:01<09:28,  1.71it/s]

Training steps: 100 Loss: 0.23409076035022736


Training:  19%|█▉        | 201/1070 [02:01<08:56,  1.62it/s]

Training steps: 200 Loss: 0.38228219747543335


Training:  28%|██▊       | 301/1070 [03:02<07:37,  1.68it/s]

Training steps: 300 Loss: 0.5010541677474976


Training:  37%|███▋      | 401/1070 [04:02<06:57,  1.60it/s]

Training steps: 400 Loss: 0.19973517954349518


Training:  47%|████▋     | 501/1070 [05:02<05:40,  1.67it/s]

Training steps: 500 Loss: 0.5607427954673767


Training:  56%|█████▌    | 601/1070 [06:02<04:51,  1.61it/s]

Training steps: 600 Loss: 0.31429824233055115


Training:  66%|██████▌   | 701/1070 [07:02<03:36,  1.71it/s]

Training steps: 700 Loss: 0.3386854827404022


Training:  75%|███████▍  | 801/1070 [08:02<02:44,  1.63it/s]

Training steps: 800 Loss: 0.3948332965373993


Training:  84%|████████▍ | 901/1070 [09:03<01:41,  1.67it/s]

Training steps: 900 Loss: 0.42832687497138977


Training:  94%|█████████▎| 1001/1070 [10:03<00:43,  1.60it/s]

Training steps: 1000 Loss: 0.4248258173465729


Training: 100%|██████████| 1070/1070 [10:45<00:00,  1.66it/s]

TRAIN ACC : 0.8872079439252336, TRAIN LOSS : 0.33282546695565507



Training:   0%|          | 1/357 [00:00<02:03,  2.89it/s]

Validation steps: 0 Loss: 0.28551003336906433


Training:  28%|██▊       | 101/357 [00:33<01:25,  2.98it/s]

Validation steps: 100 Loss: 0.44076433777809143


Training:  56%|█████▋    | 201/357 [01:06<00:50,  3.08it/s]

Validation steps: 200 Loss: 0.16213054955005646


Training:  84%|████████▍ | 301/357 [01:39<00:18,  3.01it/s]

Validation steps: 300 Loss: 0.8637170791625977


Training: 100%|██████████| 357/357 [01:58<00:00,  3.02it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.86      0.84      1206
          경제       0.86      0.82      0.84      1556
          사회       0.81      0.76      0.79      1841
        생활문화       0.91      0.90      0.90      1483
          세계       0.92      0.92      0.92      1907
         스포츠       0.97      0.97      0.97      1733
          정치       0.88      0.95      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.86069652 0.82197943 0.76099946 0.89615644 0.9155742  0.96710906
 0.94727488]
VALID ACC : 0.882074645172595, VALID LOSS : 0.35209790760186876
{'epoch': 4, 'train_loss': 0.33282546695565507, 'train_acc': 0.8872079439252336, 'valid_acc': 0.882074645172595, 'val_loss': 0.35209790760186876, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 6


Training:   0%|          | 1/1070 [00:00<10:42,  1.66it/s]

Training steps: 0 Loss: 0.421199232339859


Training:   9%|▉         | 101/1070 [01:01<09:30,  1.70it/s]

Training steps: 100 Loss: 0.3571414649486542


Training:  19%|█▉        | 201/1070 [02:01<08:53,  1.63it/s]

Training steps: 200 Loss: 0.3466971516609192


Training:  28%|██▊       | 301/1070 [03:00<07:27,  1.72it/s]

Training steps: 300 Loss: 0.39722180366516113


Training:  37%|███▋      | 401/1070 [04:00<06:46,  1.65it/s]

Training steps: 400 Loss: 0.1612597554922104


Training:  47%|████▋     | 501/1070 [05:00<05:29,  1.73it/s]

Training steps: 500 Loss: 0.5989037752151489


Training:  56%|█████▌    | 601/1070 [06:00<04:47,  1.63it/s]

Training steps: 600 Loss: 0.5125499367713928


Training:  66%|██████▌   | 701/1070 [07:00<03:36,  1.71it/s]

Training steps: 700 Loss: 0.35207846760749817


Training:  75%|███████▍  | 801/1070 [08:00<02:43,  1.64it/s]

Training steps: 800 Loss: 0.30003464221954346


Training:  84%|████████▍ | 901/1070 [08:59<01:38,  1.72it/s]

Training steps: 900 Loss: 0.0759863406419754


Training:  94%|█████████▎| 1001/1070 [09:59<00:41,  1.64it/s]

Training steps: 1000 Loss: 0.21912163496017456


Training: 100%|██████████| 1070/1070 [10:41<00:00,  1.67it/s]

TRAIN ACC : 0.8929030373831776, TRAIN LOSS : 0.31422748774220455



Training:   0%|          | 1/357 [00:00<01:59,  2.97it/s]

Validation steps: 0 Loss: 0.32977038621902466


Training:  28%|██▊       | 101/357 [00:32<01:23,  3.08it/s]

Validation steps: 100 Loss: 0.4979334771633148


Training:  56%|█████▋    | 201/357 [01:06<00:52,  2.96it/s]

Validation steps: 200 Loss: 0.23527728021144867


Training:  84%|████████▍ | 301/357 [01:39<00:18,  2.95it/s]

Validation steps: 300 Loss: 0.8125373721122742


Training: 100%|██████████| 357/357 [01:57<00:00,  3.04it/s]


              precision    recall  f1-score   support

        IT과학       0.78      0.92      0.84      1206
          경제       0.86      0.83      0.84      1556
          사회       0.87      0.71      0.78      1841
        생활문화       0.91      0.90      0.90      1483
          세계       0.89      0.94      0.91      1907
         스포츠       0.95      0.98      0.97      1733
          정치       0.90      0.92      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.91873964 0.8277635  0.71428571 0.89885367 0.93759832 0.97922677
 0.92298578]
VALID ACC : 0.8837392675661468, VALID LOSS : 0.35057719633066686
{'epoch': 5, 'train_loss': 0.31422748774220455, 'train_acc': 0.8929030373831776, 'valid_acc': 0.8837392675661468, 'val_loss': 0.35057719633066686, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 7


Training:   0%|          | 1/1070 [00:00<10:51,  1.64it/s]

Training steps: 0 Loss: 0.3285263180732727


Training:   9%|▉         | 101/1070 [01:01<09:28,  1.71it/s]

Training steps: 100 Loss: 0.4098421335220337


Training:  19%|█▉        | 201/1070 [02:00<08:49,  1.64it/s]

Training steps: 200 Loss: 0.27941280603408813


Training:  28%|██▊       | 301/1070 [03:00<07:27,  1.72it/s]

Training steps: 300 Loss: 0.40324145555496216


Training:  37%|███▋      | 401/1070 [04:00<06:53,  1.62it/s]

Training steps: 400 Loss: 0.13405241072177887


Training:  47%|████▋     | 501/1070 [05:00<05:30,  1.72it/s]

Training steps: 500 Loss: 0.47918859124183655


Training:  56%|█████▌    | 601/1070 [05:59<04:45,  1.64it/s]

Training steps: 600 Loss: 0.2892848253250122


Training:  66%|██████▌   | 701/1070 [06:59<03:33,  1.73it/s]

Training steps: 700 Loss: 0.38140198588371277


Training:  75%|███████▍  | 801/1070 [07:59<02:44,  1.64it/s]

Training steps: 800 Loss: 0.3657226860523224


Training:  84%|████████▍ | 901/1070 [08:58<01:39,  1.69it/s]

Training steps: 900 Loss: 0.30492350459098816


Training:  94%|█████████▎| 1001/1070 [09:58<00:42,  1.62it/s]

Training steps: 1000 Loss: 0.359546422958374


Training: 100%|██████████| 1070/1070 [10:39<00:00,  1.67it/s]

TRAIN ACC : 0.8985105140186916, TRAIN LOSS : 0.29523947528241395



Training:   0%|          | 1/357 [00:00<01:56,  3.04it/s]

Validation steps: 0 Loss: 0.3789258897304535


Training:  28%|██▊       | 101/357 [00:32<01:22,  3.10it/s]

Validation steps: 100 Loss: 0.525657594203949


Training:  56%|█████▋    | 201/357 [01:05<00:50,  3.07it/s]

Validation steps: 200 Loss: 0.3324982821941376


Training:  84%|████████▍ | 301/357 [01:37<00:17,  3.14it/s]

Validation steps: 300 Loss: 0.7653083801269531


Training: 100%|██████████| 357/357 [01:55<00:00,  3.08it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.85      0.84      1206
          경제       0.86      0.83      0.85      1556
          사회       0.84      0.75      0.79      1841
        생활문화       0.86      0.93      0.90      1483
          세계       0.87      0.94      0.91      1907
         스포츠       0.95      0.98      0.97      1733
          정치       0.92      0.89      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.84660033 0.82904884 0.7501358  0.92919757 0.94336654 0.97807271
 0.88803318]
VALID ACC : 0.8816365866479762, VALID LOSS : 0.3586924296780294
{'epoch': 6, 'train_loss': 0.29523947528241395, 'train_acc': 0.8985105140186916, 'valid_acc': 0.8816365866479762, 'val_loss': 0.3586924296780294, 'learning_rate': 5e-06}
Start Training: Epoch 8



Training:   0%|          | 1/1070 [00:00<10:38,  1.67it/s]

Training steps: 0 Loss: 0.06927135586738586


Training:   9%|▉         | 101/1070 [01:00<09:31,  1.70it/s]

Training steps: 100 Loss: 0.1423359513282776


Training:  19%|█▉        | 201/1070 [01:59<08:50,  1.64it/s]

Training steps: 200 Loss: 0.4532192051410675


Training:  28%|██▊       | 301/1070 [02:59<07:29,  1.71it/s]

Training steps: 300 Loss: 0.19783329963684082


Training:  37%|███▋      | 401/1070 [03:59<06:54,  1.61it/s]

Training steps: 400 Loss: 0.3053426444530487


Training:  47%|████▋     | 501/1070 [04:58<05:34,  1.70it/s]

Training steps: 500 Loss: 0.1643657386302948


Training:  56%|█████▌    | 601/1070 [05:57<04:51,  1.61it/s]

Training steps: 600 Loss: 0.198136568069458


Training:  66%|██████▌   | 701/1070 [06:57<03:33,  1.73it/s]

Training steps: 700 Loss: 0.14996075630187988


Training:  75%|███████▍  | 801/1070 [07:56<02:43,  1.65it/s]

Training steps: 800 Loss: 0.31040555238723755


Training:  84%|████████▍ | 901/1070 [08:56<01:39,  1.70it/s]

Training steps: 900 Loss: 0.13315030932426453


Training:  94%|█████████▎| 1001/1070 [09:56<00:42,  1.62it/s]

Training steps: 1000 Loss: 0.06828966736793518


Training: 100%|██████████| 1070/1070 [10:37<00:00,  1.68it/s]

TRAIN ACC : 0.9031834112149533, TRAIN LOSS : 0.28159564328945685



Training:   0%|          | 1/357 [00:00<01:56,  3.06it/s]

Validation steps: 0 Loss: 0.2872639298439026


Training:  28%|██▊       | 101/357 [00:32<01:25,  2.99it/s]

Validation steps: 100 Loss: 0.4151753783226013


Training:  56%|█████▋    | 201/357 [01:04<00:49,  3.15it/s]

Validation steps: 200 Loss: 0.29729709029197693


Training:  84%|████████▍ | 301/357 [01:37<00:18,  3.06it/s]

Validation steps: 300 Loss: 0.7212229371070862


Training: 100%|██████████| 357/357 [01:55<00:00,  3.09it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.87      0.84      1206
          경제       0.84      0.85      0.85      1556
          사회       0.87      0.71      0.78      1841
        생활문화       0.91      0.91      0.91      1483
          세계       0.87      0.95      0.91      1907
         스포츠       0.94      0.99      0.97      1733
          정치       0.90      0.92      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.86650083 0.84832905 0.70613797 0.908294   0.94703723 0.98903635
 0.91587678]
VALID ACC : 0.8829507622218329, VALID LOSS : 0.358487459447454
{'epoch': 7, 'train_loss': 0.28159564328945685, 'train_acc': 0.9031834112149533, 'valid_acc': 0.8829507622218329, 'val_loss': 0.358487459447454, 'learning_rate': 5e-06}
Start Training: Epoch 9



Training:   0%|          | 1/1070 [00:00<07:35,  2.35it/s]

Training steps: 0 Loss: 0.2957310080528259


Training:   9%|▉         | 101/1070 [00:44<06:58,  2.32it/s]

Training steps: 100 Loss: 0.25488027930259705


Training:  19%|█▉        | 201/1070 [01:28<06:38,  2.18it/s]

Training steps: 200 Loss: 0.09574513137340546


Training:  28%|██▊       | 301/1070 [02:12<05:32,  2.31it/s]

Training steps: 300 Loss: 0.21130216121673584


Training:  37%|███▋      | 401/1070 [02:57<05:06,  2.18it/s]

Training steps: 400 Loss: 0.659852921962738


Training:  47%|████▋     | 501/1070 [03:41<04:03,  2.33it/s]

Training steps: 500 Loss: 0.21666882932186127


Training:  56%|█████▌    | 601/1070 [04:25<03:36,  2.17it/s]

Training steps: 600 Loss: 0.29768839478492737


Training:  66%|██████▌   | 701/1070 [05:09<02:38,  2.33it/s]

Training steps: 700 Loss: 0.3808073401451111


Training:  75%|███████▍  | 801/1070 [05:54<02:03,  2.18it/s]

Training steps: 800 Loss: 0.6487235426902771


Training:  84%|████████▍ | 901/1070 [06:38<01:12,  2.32it/s]

Training steps: 900 Loss: 0.3060755431652069


Training:  94%|█████████▎| 1001/1070 [07:22<00:31,  2.17it/s]

Training steps: 1000 Loss: 0.2046748846769333


Training: 100%|██████████| 1070/1070 [07:53<00:00,  2.26it/s]

TRAIN ACC : 0.9070385514018692, TRAIN LOSS : 0.2701942329930368



Training:   0%|          | 1/357 [00:00<01:59,  2.98it/s]

Validation steps: 0 Loss: 0.27586185932159424


Training:  28%|██▊       | 101/357 [00:32<01:24,  3.05it/s]

Validation steps: 100 Loss: 0.5646651387214661


Training:  56%|█████▋    | 201/357 [01:05<00:49,  3.13it/s]

Validation steps: 200 Loss: 0.18169403076171875


Training:  84%|████████▍ | 301/357 [01:37<00:18,  3.09it/s]

Validation steps: 300 Loss: 0.7435078024864197


Training: 100%|██████████| 357/357 [01:55<00:00,  3.09it/s]


              precision    recall  f1-score   support

        IT과학       0.77      0.93      0.84      1206
          경제       0.88      0.81      0.84      1556
          사회       0.84      0.76      0.80      1841
        생활문화       0.92      0.91      0.91      1483
          세계       0.91      0.92      0.92      1907
         스포츠       0.97      0.97      0.97      1733
          정치       0.91      0.92      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.93117745 0.80719794 0.76425856 0.90761969 0.92186681 0.9728794
 0.92061611]
VALID ACC : 0.8875065708778693, VALID LOSS : 0.3347176994726646
{'epoch': 8, 'train_loss': 0.2701942329930368, 'train_acc': 0.9070385514018692, 'valid_acc': 0.8875065708778693, 'val_loss': 0.3347176994726646, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 10


Training:   0%|          | 1/1070 [00:00<10:41,  1.67it/s]

Training steps: 0 Loss: 0.17295169830322266


Training:   9%|▉         | 101/1070 [01:00<09:15,  1.74it/s]

Training steps: 100 Loss: 0.2617556154727936


Training:  19%|█▉        | 201/1070 [01:59<08:49,  1.64it/s]

Training steps: 200 Loss: 0.513604998588562


Training:  28%|██▊       | 301/1070 [02:57<07:18,  1.76it/s]

Training steps: 300 Loss: 0.27975958585739136


Training:  37%|███▋      | 401/1070 [03:56<06:43,  1.66it/s]

Training steps: 400 Loss: 0.18200616538524628


Training:  47%|████▋     | 501/1070 [04:55<05:26,  1.74it/s]

Training steps: 500 Loss: 0.34352079033851624


Training:  56%|█████▌    | 601/1070 [05:54<04:41,  1.67it/s]

Training steps: 600 Loss: 0.24703913927078247


Training:  66%|██████▌   | 701/1070 [06:52<03:33,  1.73it/s]

Training steps: 700 Loss: 0.40469130873680115


Training:  75%|███████▍  | 801/1070 [07:51<02:44,  1.64it/s]

Training steps: 800 Loss: 0.22289079427719116


Training:  84%|████████▍ | 901/1070 [08:50<01:37,  1.74it/s]

Training steps: 900 Loss: 0.15656113624572754


Training:  94%|█████████▎| 1001/1070 [09:49<00:41,  1.66it/s]

Training steps: 1000 Loss: 0.1843540221452713


Training: 100%|██████████| 1070/1070 [10:30<00:00,  1.70it/s]

TRAIN ACC : 0.9103387850467289, TRAIN LOSS : 0.25670427522300004



Training:   0%|          | 1/357 [00:00<02:00,  2.95it/s]

Validation steps: 0 Loss: 0.3202721178531647


Training:  28%|██▊       | 101/357 [00:32<01:21,  3.16it/s]

Validation steps: 100 Loss: 0.45411768555641174


Training:  56%|█████▋    | 201/357 [01:03<00:50,  3.10it/s]

Validation steps: 200 Loss: 0.1960834413766861


Training:  84%|████████▍ | 301/357 [01:35<00:17,  3.12it/s]

Validation steps: 300 Loss: 0.8959819674491882


Training: 100%|██████████| 357/357 [01:53<00:00,  3.14it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.90      0.85      1206
          경제       0.84      0.86      0.85      1556
          사회       0.87      0.73      0.79      1841
        생활문화       0.91      0.91      0.91      1483
          세계       0.90      0.94      0.92      1907
         스포츠       0.95      0.98      0.97      1733
          정치       0.92      0.91      0.92      1688

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.90381426 0.85796915 0.72786529 0.90559676 0.94074463 0.98038084
 0.90817536]
VALID ACC : 0.8878570176975644, VALID LOSS : 0.35105844568826405
{'epoch': 9, 'train_loss': 0.25670427522300004, 'train_acc': 0.9103387850467289, 'valid_acc': 0.8878570176975644, 'val_loss': 0.35105844568826405, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 11


Training:   0%|          | 1/1070 [00:00<10:42,  1.66it/s]

Training steps: 0 Loss: 0.1380174160003662


Training:   9%|▉         | 101/1070 [01:00<09:22,  1.72it/s]

Training steps: 100 Loss: 0.09849776327610016


Training:  19%|█▉        | 201/1070 [01:58<08:48,  1.64it/s]

Training steps: 200 Loss: 0.3062841594219208


Training:  28%|██▊       | 301/1070 [02:57<07:27,  1.72it/s]

Training steps: 300 Loss: 0.12880097329616547


Training:  37%|███▋      | 401/1070 [03:56<06:42,  1.66it/s]

Training steps: 400 Loss: 0.26786360144615173


Training:  47%|████▋     | 501/1070 [04:54<05:24,  1.75it/s]

Training steps: 500 Loss: 0.22792676091194153


Training:  56%|█████▌    | 601/1070 [05:53<04:41,  1.66it/s]

Training steps: 600 Loss: 0.14213667809963226


Training:  66%|██████▌   | 701/1070 [06:52<03:33,  1.73it/s]

Training steps: 700 Loss: 0.21060997247695923


Training:  75%|███████▍  | 801/1070 [07:50<02:42,  1.65it/s]

Training steps: 800 Loss: 0.3106360137462616


Training:  84%|████████▍ | 901/1070 [08:49<01:36,  1.76it/s]

Training steps: 900 Loss: 0.46720370650291443


Training:  94%|█████████▎| 1001/1070 [09:48<00:41,  1.67it/s]

Training steps: 1000 Loss: 0.3335445821285248


Training: 100%|██████████| 1070/1070 [10:28<00:00,  1.70it/s]

TRAIN ACC : 0.916588785046729, TRAIN LOSS : 0.24024324228238558



Training:   0%|          | 1/357 [00:00<01:56,  3.06it/s]

Validation steps: 0 Loss: 0.31377503275871277


Training:  28%|██▊       | 101/357 [00:31<01:19,  3.21it/s]

Validation steps: 100 Loss: 0.4625715911388397


Training:  56%|█████▋    | 201/357 [01:03<00:49,  3.17it/s]

Validation steps: 200 Loss: 0.1558312177658081


Training:  84%|████████▍ | 301/357 [01:35<00:17,  3.12it/s]

Validation steps: 300 Loss: 0.7742204070091248


Training: 100%|██████████| 357/357 [01:52<00:00,  3.17it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.86      0.84      1206
          경제       0.83      0.86      0.85      1556
          사회       0.85      0.75      0.80      1841
        생활문화       0.88      0.93      0.90      1483
          세계       0.93      0.92      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.89      0.94      0.92      1688

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.85820896 0.86182519 0.74579033 0.92515172 0.9155742  0.97576457
 0.93661137]
VALID ACC : 0.8882950762221833, VALID LOSS : 0.34474947016571417
{'epoch': 10, 'train_loss': 0.24024324228238558, 'train_acc': 0.916588785046729, 'valid_acc': 0.8882950762221833, 'val_loss': 0.34474947016571417, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 12


Training:   0%|          | 1/1070 [00:00<11:06,  1.60it/s]

Training steps: 0 Loss: 0.1524728238582611


Training:   9%|▉         | 101/1070 [00:59<09:11,  1.76it/s]

Training steps: 100 Loss: 0.44551169872283936


Training:  19%|█▉        | 201/1070 [01:58<08:39,  1.67it/s]

Training steps: 200 Loss: 0.24593089520931244


Training:  28%|██▊       | 301/1070 [02:56<07:24,  1.73it/s]

Training steps: 300 Loss: 0.15151579678058624


Training:  37%|███▋      | 401/1070 [03:54<06:46,  1.65it/s]

Training steps: 400 Loss: 0.2235141396522522


Training:  47%|████▋     | 501/1070 [04:53<05:24,  1.75it/s]

Training steps: 500 Loss: 0.46752557158470154


Training:  56%|█████▌    | 601/1070 [05:51<04:39,  1.68it/s]

Training steps: 600 Loss: 0.10833368450403214


Training:  66%|██████▌   | 701/1070 [06:50<03:32,  1.73it/s]

Training steps: 700 Loss: 0.16376402974128723


Training:  75%|███████▍  | 801/1070 [07:48<02:43,  1.65it/s]

Training steps: 800 Loss: 0.11685243248939514


Training:  84%|████████▍ | 901/1070 [08:46<01:37,  1.74it/s]

Training steps: 900 Loss: 0.22640280425548553


Training:  94%|█████████▎| 1001/1070 [09:44<00:41,  1.67it/s]

Training steps: 1000 Loss: 0.19719143211841583


Training: 100%|██████████| 1070/1070 [10:25<00:00,  1.71it/s]

TRAIN ACC : 0.9174941588785047, TRAIN LOSS : 0.2296293833580251



Training:   0%|          | 1/357 [00:00<01:58,  3.01it/s]

Validation steps: 0 Loss: 0.29358574748039246


Training:  28%|██▊       | 101/357 [00:31<01:18,  3.26it/s]

Validation steps: 100 Loss: 0.44859039783477783


Training:  56%|█████▋    | 201/357 [01:03<00:49,  3.16it/s]

Validation steps: 200 Loss: 0.16137593984603882


Training:  84%|████████▍ | 301/357 [01:34<00:17,  3.18it/s]

Validation steps: 300 Loss: 0.7893434166908264


Training: 100%|██████████| 357/357 [01:51<00:00,  3.19it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85      1206
          경제       0.87      0.82      0.84      1556
          사회       0.85      0.74      0.79      1841
        생활문화       0.88      0.93      0.91      1483
          세계       0.92      0.92      0.92      1907
         스포츠       0.96      0.97      0.97      1733
          정치       0.88      0.95      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.89      0.89     11414

[0.89220564 0.81748072 0.74253123 0.92987188 0.91767174 0.9728794
 0.94549763]
VALID ACC : 0.8871561240581741, VALID LOSS : 0.3492811508789784
{'epoch': 11, 'train_loss': 0.2296293833580251, 'train_acc': 0.9174941588785047, 'valid_acc': 0.8871561240581741, 'val_loss': 0.3492811508789784, 'learning_rate': 5e-06}
Start Training: Epoch 13



Training:   0%|          | 1/1070 [00:00<10:03,  1.77it/s]

Training steps: 0 Loss: 0.5123509764671326


Training:   9%|▉         | 101/1070 [00:58<09:07,  1.77it/s]

Training steps: 100 Loss: 0.19772790372371674


Training:  19%|█▉        | 201/1070 [01:57<08:36,  1.68it/s]

Training steps: 200 Loss: 0.1863887459039688


Training:  28%|██▊       | 301/1070 [02:55<07:19,  1.75it/s]

Training steps: 300 Loss: 0.28993529081344604


Training:  37%|███▋      | 401/1070 [03:53<06:37,  1.68it/s]

Training steps: 400 Loss: 0.13962215185165405


Training:  47%|████▋     | 501/1070 [04:51<05:22,  1.76it/s]

Training steps: 500 Loss: 0.36141660809516907


Training:  56%|█████▌    | 601/1070 [05:50<04:42,  1.66it/s]

Training steps: 600 Loss: 0.43680453300476074


Training:  66%|██████▌   | 701/1070 [06:48<03:28,  1.77it/s]

Training steps: 700 Loss: 0.5111375451087952


Training:  75%|███████▍  | 801/1070 [07:46<02:40,  1.67it/s]

Training steps: 800 Loss: 0.11380844563245773


Training:  84%|████████▍ | 901/1070 [08:44<01:37,  1.73it/s]

Training steps: 900 Loss: 0.059798404574394226


Training:  94%|█████████▎| 1001/1070 [09:43<00:41,  1.68it/s]

Training steps: 1000 Loss: 0.290077269077301


Training: 100%|██████████| 1070/1070 [10:23<00:00,  1.72it/s]

TRAIN ACC : 0.9259637850467289, TRAIN LOSS : 0.21203531816114332



Training:   0%|          | 1/357 [00:00<01:58,  3.00it/s]

Validation steps: 0 Loss: 0.30106425285339355


Training:  28%|██▊       | 101/357 [00:31<01:19,  3.24it/s]

Validation steps: 100 Loss: 0.5118014216423035


Training:  56%|█████▋    | 201/357 [01:02<00:50,  3.08it/s]

Validation steps: 200 Loss: 0.2044125199317932


Training:  84%|████████▍ | 301/357 [01:34<00:17,  3.27it/s]

Validation steps: 300 Loss: 0.8787040114402771


Training: 100%|██████████| 357/357 [01:51<00:00,  3.21it/s]

              precision    recall  f1-score   support

        IT과학       0.80      0.88      0.84      1206
          경제       0.84      0.85      0.85      1556
          사회       0.86      0.73      0.79      1841
        생활문화       0.91      0.91      0.91      1483
          세계       0.91      0.93      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.89      0.93      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.87976783 0.84640103 0.72677892 0.90559676 0.92606188 0.97749567
 0.92950237]
VALID ACC : 0.8838268792710706, VALID LOSS : 0.3563131030534162
{'epoch': 12, 'train_loss': 0.21203531816114332, 'train_acc': 0.9259637850467289, 'valid_acc': 0.8838268792710706, 'val_loss': 0.3563131030534162, 'learning_rate': 5e-06}
Start Training: Epoch 14



Training:   0%|          | 1/1070 [00:00<10:24,  1.71it/s]

Training steps: 0 Loss: 0.46654990315437317


Training:   9%|▉         | 101/1070 [00:58<09:04,  1.78it/s]

Training steps: 100 Loss: 0.15338169038295746


Training:  19%|█▉        | 201/1070 [01:56<08:43,  1.66it/s]

Training steps: 200 Loss: 0.34688127040863037


Training:  28%|██▊       | 301/1070 [02:54<07:22,  1.74it/s]

Training steps: 300 Loss: 0.0512094683945179


Training:  37%|███▋      | 401/1070 [03:52<06:38,  1.68it/s]

Training steps: 400 Loss: 0.16902537643909454


Training:  47%|████▋     | 501/1070 [04:50<05:21,  1.77it/s]

Training steps: 500 Loss: 0.06595499813556671


Training:  56%|█████▌    | 601/1070 [05:48<04:39,  1.68it/s]

Training steps: 600 Loss: 0.09662597626447678


Training:  66%|██████▌   | 701/1070 [06:46<03:28,  1.77it/s]

Training steps: 700 Loss: 0.21190637350082397


Training:  75%|███████▍  | 801/1070 [07:44<02:40,  1.67it/s]

Training steps: 800 Loss: 0.1451534777879715


Training:  84%|████████▍ | 901/1070 [08:42<01:34,  1.79it/s]

Training steps: 900 Loss: 0.15719859302043915


Training:  94%|█████████▎| 1001/1070 [09:40<00:40,  1.68it/s]

Training steps: 1000 Loss: 0.20629486441612244


Training: 100%|██████████| 1070/1070 [10:20<00:00,  1.72it/s]

TRAIN ACC : 0.9283586448598131, TRAIN LOSS : 0.20211204059039042



Training:   0%|          | 1/357 [00:00<01:56,  3.06it/s]

Validation steps: 0 Loss: 0.26483041048049927


Training:  28%|██▊       | 101/357 [00:30<01:18,  3.25it/s]

Validation steps: 100 Loss: 0.4476841390132904


Training:  56%|█████▋    | 201/357 [01:02<00:49,  3.14it/s]

Validation steps: 200 Loss: 0.20465949177742004


Training:  84%|████████▍ | 301/357 [01:33<00:17,  3.28it/s]

Validation steps: 300 Loss: 0.8699198365211487


Training: 100%|██████████| 357/357 [01:50<00:00,  3.24it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.91      0.85      1206
          경제       0.85      0.85      0.85      1556
          사회       0.87      0.71      0.78      1841
        생활문화       0.90      0.92      0.91      1483
          세계       0.91      0.93      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.90      0.93      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.89      0.88     11414

[0.90547264 0.84832905 0.71102662 0.91706001 0.92973256 0.9763416
 0.9271327 ]
VALID ACC : 0.8858419484843175, VALID LOSS : 0.35721759811690706
{'epoch': 13, 'train_loss': 0.20211204059039042, 'train_acc': 0.9283586448598131, 'valid_acc': 0.8858419484843175, 'val_loss': 0.35721759811690706, 'learning_rate': 5e-06}
EarlyStopping counter: 3 out of 3


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'roberta.embeddings.token_label_2_

Start Training: Epoch 1


Training:   0%|          | 1/1071 [00:00<08:10,  2.18it/s]

Training steps: 0 Loss: 1.9460468292236328


Training:   9%|▉         | 101/1071 [00:46<07:14,  2.23it/s]

Training steps: 100 Loss: 1.860207200050354


Training:  19%|█▉        | 201/1071 [01:32<06:53,  2.11it/s]

Training steps: 200 Loss: 1.5388327836990356


Training:  28%|██▊       | 301/1071 [02:17<05:40,  2.26it/s]

Training steps: 300 Loss: 0.9725783467292786


Training:  37%|███▋      | 401/1071 [03:03<05:21,  2.08it/s]

Training steps: 400 Loss: 0.9749164581298828


Training:  47%|████▋     | 501/1071 [03:49<04:14,  2.24it/s]

Training steps: 500 Loss: 0.6883231401443481


Training:  56%|█████▌    | 601/1071 [04:35<03:42,  2.11it/s]

Training steps: 600 Loss: 0.3852696120738983


Training:  65%|██████▌   | 701/1071 [05:20<02:44,  2.25it/s]

Training steps: 700 Loss: 0.564311146736145


Training:  75%|███████▍  | 801/1071 [06:06<02:09,  2.09it/s]

Training steps: 800 Loss: 0.6532618999481201


Training:  84%|████████▍ | 901/1071 [06:52<01:16,  2.22it/s]

Training steps: 900 Loss: 0.8673739433288574


Training:  93%|█████████▎| 1001/1071 [07:38<00:33,  2.08it/s]

Training steps: 1000 Loss: 0.4380047023296356


Training: 100%|██████████| 1071/1071 [08:10<00:00,  2.18it/s]

TRAIN ACC : 0.6493093075552699, TRAIN LOSS : 0.9846515167438461



Training:   0%|          | 1/357 [00:00<02:17,  2.59it/s]

Validation steps: 0 Loss: 0.33978649973869324


Training:  29%|██▊       | 102/357 [00:35<00:44,  5.80it/s]

Validation steps: 100 Loss: 0.47808364033699036


Training:  57%|█████▋    | 202/357 [00:52<00:26,  5.80it/s]

Validation steps: 200 Loss: 0.23384606838226318


Training:  85%|████████▍ | 302/357 [01:09<00:09,  5.85it/s]

Validation steps: 300 Loss: 1.021722435951233


Training: 100%|██████████| 357/357 [01:18<00:00,  4.54it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.85      0.82      1206
          경제       0.82      0.84      0.83      1556
          사회       0.76      0.71      0.73      1840
        생활문화       0.91      0.83      0.87      1483
          세계       0.79      0.90      0.84      1907
         스포츠       0.97      0.93      0.95      1733
          정치       0.87      0.81      0.84      1688

    accuracy                           0.84     11413
   macro avg       0.84      0.84      0.84     11413
weighted avg       0.84      0.84      0.84     11413

[0.84742952 0.83997429 0.71141304 0.83142279 0.90351337 0.93421812
 0.81457346]
VALID ACC : 0.8400946289319198, VALID LOSS : 0.5082569915790852
{'epoch': 0, 'train_loss': 0.9846515167438461, 'train_acc': 0.6493093075552699, 'valid_acc': 0.8400946289319198, 'val_loss': 0.5082569915790852, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1071 [00:00<07:55,  2.25it/s]

Training steps: 0 Loss: 0.515677273273468


Training:   9%|▉         | 101/1071 [00:45<07:00,  2.31it/s]

Training steps: 100 Loss: 0.6508539319038391


Training:  19%|█▉        | 201/1071 [01:29<06:45,  2.14it/s]

Training steps: 200 Loss: 0.2525569498538971


Training:  28%|██▊       | 301/1071 [02:14<05:35,  2.29it/s]

Training steps: 300 Loss: 0.5117329955101013


Training:  37%|███▋      | 401/1071 [02:59<05:11,  2.15it/s]

Training steps: 400 Loss: 0.3544205129146576


Training:  47%|████▋     | 501/1071 [03:43<04:06,  2.31it/s]

Training steps: 500 Loss: 0.3873271346092224


Training:  56%|█████▌    | 601/1071 [04:28<03:38,  2.15it/s]

Training steps: 600 Loss: 0.30872610211372375


Training:  65%|██████▌   | 701/1071 [05:13<02:41,  2.29it/s]

Training steps: 700 Loss: 0.3032173216342926


Training:  75%|███████▍  | 801/1071 [05:57<02:04,  2.17it/s]

Training steps: 800 Loss: 0.22143521904945374


Training:  84%|████████▍ | 901/1071 [06:42<01:13,  2.31it/s]

Training steps: 900 Loss: 0.36605072021484375


Training:  93%|█████████▎| 1001/1071 [07:26<00:32,  2.16it/s]

Training steps: 1000 Loss: 0.6695199608802795


Training: 100%|██████████| 1071/1071 [07:57<00:00,  2.24it/s]

TRAIN ACC : 0.8355188224642972, TRAIN LOSS : 0.4953064355307044



Training:   1%|          | 2/357 [00:00<01:06,  5.35it/s]

Validation steps: 0 Loss: 0.4146192967891693


Training:  29%|██▊       | 102/357 [00:18<00:45,  5.62it/s]

Validation steps: 100 Loss: 0.3494523763656616


Training:  57%|█████▋    | 202/357 [00:35<00:27,  5.73it/s]

Validation steps: 200 Loss: 0.19961005449295044


Training:  85%|████████▍ | 302/357 [00:53<00:09,  5.66it/s]

Validation steps: 300 Loss: 0.9037883281707764


Training: 100%|██████████| 357/357 [01:02<00:00,  5.68it/s]


              precision    recall  f1-score   support

        IT과학       0.72      0.94      0.81      1206
          경제       0.84      0.85      0.84      1556
          사회       0.84      0.69      0.76      1840
        생활문화       0.88      0.89      0.89      1483
          세계       0.92      0.85      0.88      1907
         스포츠       0.96      0.97      0.96      1733
          정치       0.87      0.89      0.88      1688

    accuracy                           0.86     11413
   macro avg       0.86      0.87      0.86     11413
weighted avg       0.87      0.86      0.86     11413

[0.94112769 0.8470437  0.69076087 0.88941335 0.84950184 0.96826313
 0.89395735]
VALID ACC : 0.8630509068605976, VALID LOSS : 0.42989219813829377
{'epoch': 1, 'train_loss': 0.4953064355307044, 'train_acc': 0.8355188224642972, 'valid_acc': 0.8630509068605976, 'val_loss': 0.42989219813829377, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1071 [00:00<08:05,  2.21it/s]

Training steps: 0 Loss: 0.4402773976325989


Training:   9%|▉         | 101/1071 [00:45<06:59,  2.31it/s]

Training steps: 100 Loss: 0.6688761115074158


Training:  19%|█▉        | 201/1071 [01:29<06:41,  2.17it/s]

Training steps: 200 Loss: 0.4612062871456146


Training:  28%|██▊       | 301/1071 [02:14<05:35,  2.29it/s]

Training steps: 300 Loss: 0.4448024034500122


Training:  37%|███▋      | 401/1071 [02:59<05:10,  2.16it/s]

Training steps: 400 Loss: 0.40509033203125


Training:  47%|████▋     | 501/1071 [03:43<04:06,  2.31it/s]

Training steps: 500 Loss: 0.2747742235660553


Training:  56%|█████▌    | 601/1071 [04:28<03:38,  2.16it/s]

Training steps: 600 Loss: 0.6218824982643127


Training:  65%|██████▌   | 701/1071 [05:12<02:40,  2.31it/s]

Training steps: 700 Loss: 0.21960777044296265


Training:  75%|███████▍  | 801/1071 [05:57<02:04,  2.17it/s]

Training steps: 800 Loss: 0.4639433026313782


Training:  84%|████████▍ | 901/1071 [06:42<01:13,  2.31it/s]

Training steps: 900 Loss: 0.26371532678604126


Training:  93%|█████████▎| 1001/1071 [07:26<00:32,  2.15it/s]

Training steps: 1000 Loss: 0.3314189612865448


Training: 100%|██████████| 1071/1071 [07:57<00:00,  2.24it/s]

TRAIN ACC : 0.8561373791653281, TRAIN LOSS : 0.42706805244296875



Training:   1%|          | 2/357 [00:00<01:04,  5.54it/s]

Validation steps: 0 Loss: 0.36519160866737366


Training:  29%|██▊       | 102/357 [00:17<00:45,  5.63it/s]

Validation steps: 100 Loss: 0.36496293544769287


Training:  57%|█████▋    | 202/357 [00:35<00:27,  5.62it/s]

Validation steps: 200 Loss: 0.19413888454437256


Training:  85%|████████▍ | 302/357 [00:53<00:09,  5.72it/s]

Validation steps: 300 Loss: 0.8276495933532715


Training: 100%|██████████| 357/357 [01:02<00:00,  5.70it/s]


              precision    recall  f1-score   support

        IT과학       0.78      0.91      0.84      1206
          경제       0.89      0.81      0.85      1556
          사회       0.81      0.76      0.79      1840
        생활문화       0.94      0.82      0.88      1483
          세계       0.87      0.92      0.89      1907
         스포츠       0.95      0.97      0.96      1733
          정치       0.88      0.91      0.90      1688

    accuracy                           0.87     11413
   macro avg       0.87      0.87      0.87     11413
weighted avg       0.88      0.87      0.87     11413

[0.91044776 0.81491003 0.76467391 0.82333109 0.92239119 0.97403347
 0.90876777]
VALID ACC : 0.8740033295364935, VALID LOSS : 0.39321108432669266
{'epoch': 2, 'train_loss': 0.42706805244296875, 'train_acc': 0.8561373791653281, 'valid_acc': 0.8740033295364935, 'val_loss': 0.39321108432669266, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1071 [00:00<08:04,  2.21it/s]

Training steps: 0 Loss: 0.781673789024353


Training:   9%|▉         | 101/1071 [00:45<06:58,  2.32it/s]

Training steps: 100 Loss: 0.4910908043384552


Training:  19%|█▉        | 201/1071 [01:29<06:40,  2.17it/s]

Training steps: 200 Loss: 0.30585405230522156


Training:  28%|██▊       | 301/1071 [02:14<05:35,  2.29it/s]

Training steps: 300 Loss: 0.50013267993927


Training:  37%|███▋      | 401/1071 [02:59<05:11,  2.15it/s]

Training steps: 400 Loss: 0.3321877717971802


Training:  47%|████▋     | 501/1071 [03:43<04:06,  2.32it/s]

Training steps: 500 Loss: 0.3887876570224762


Training:  56%|█████▌    | 601/1071 [04:28<03:37,  2.16it/s]

Training steps: 600 Loss: 0.4955589175224304


Training:  65%|██████▌   | 701/1071 [05:12<02:40,  2.30it/s]

Training steps: 700 Loss: 0.6744756102561951


Training:  75%|███████▍  | 801/1071 [05:57<02:05,  2.15it/s]

Training steps: 800 Loss: 0.4244306683540344


Training:  84%|████████▍ | 901/1071 [06:41<01:13,  2.32it/s]

Training steps: 900 Loss: 0.05366675555706024


Training:  93%|█████████▎| 1001/1071 [07:26<00:32,  2.16it/s]

Training steps: 1000 Loss: 0.3715987503528595


Training: 100%|██████████| 1071/1071 [07:56<00:00,  2.25it/s]

TRAIN ACC : 0.8723460179317193, TRAIN LOSS : 0.3800884107863452



Training:   1%|          | 2/357 [00:00<01:04,  5.49it/s]

Validation steps: 0 Loss: 0.32472994923591614


Training:  29%|██▊       | 102/357 [00:18<00:44,  5.67it/s]

Validation steps: 100 Loss: 0.3526690900325775


Training:  57%|█████▋    | 202/357 [00:35<00:26,  5.75it/s]

Validation steps: 200 Loss: 0.15108060836791992


Training:  85%|████████▍ | 302/357 [00:52<00:09,  5.76it/s]

Validation steps: 300 Loss: 0.8024616837501526


Training: 100%|██████████| 357/357 [01:02<00:00,  5.72it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.88      0.85      1206
          경제       0.86      0.85      0.86      1556
          사회       0.80      0.77      0.79      1840
        생활문화       0.92      0.86      0.89      1483
          세계       0.88      0.93      0.90      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.90      0.89      0.90      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.88059701 0.85025707 0.76956522 0.862441   0.92658626 0.9763416
 0.89454976]
VALID ACC : 0.8804871637606239, VALID LOSS : 0.3670344148405126
{'epoch': 3, 'train_loss': 0.3800884107863452, 'train_acc': 0.8723460179317193, 'valid_acc': 0.8804871637606239, 'val_loss': 0.3670344148405126, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1071 [00:00<08:01,  2.22it/s]

Training steps: 0 Loss: 0.395399272441864


Training:   9%|▉         | 101/1071 [00:45<06:58,  2.32it/s]

Training steps: 100 Loss: 0.40880730748176575


Training:  19%|█▉        | 201/1071 [01:29<06:42,  2.16it/s]

Training steps: 200 Loss: 0.3352307081222534


Training:  28%|██▊       | 301/1071 [02:14<05:33,  2.31it/s]

Training steps: 300 Loss: 0.5443202257156372


Training:  37%|███▋      | 401/1071 [02:58<05:10,  2.16it/s]

Training steps: 400 Loss: 0.48667681217193604


Training:  47%|████▋     | 501/1071 [03:43<04:06,  2.31it/s]

Training steps: 500 Loss: 0.27860069274902344


Training:  56%|█████▌    | 601/1071 [04:28<03:36,  2.17it/s]

Training steps: 600 Loss: 0.15661990642547607


Training:  65%|██████▌   | 701/1071 [05:12<02:41,  2.29it/s]

Training steps: 700 Loss: 0.4117644429206848


Training:  75%|███████▍  | 801/1071 [05:57<02:04,  2.17it/s]

Training steps: 800 Loss: 0.28757473826408386


Training:  84%|████████▍ | 901/1071 [06:41<01:13,  2.31it/s]

Training steps: 900 Loss: 0.4191267788410187


Training:  93%|█████████▎| 1001/1071 [07:26<00:32,  2.15it/s]

Training steps: 1000 Loss: 0.3804872930049896


Training: 100%|██████████| 1071/1071 [07:56<00:00,  2.25it/s]

TRAIN ACC : 0.8783037878566631, TRAIN LOSS : 0.35353139540255346



Training:   1%|          | 2/357 [00:00<01:05,  5.44it/s]

Validation steps: 0 Loss: 0.40979892015457153


Training:  29%|██▊       | 102/357 [00:17<00:43,  5.82it/s]

Validation steps: 100 Loss: 0.21463432908058167


Training:  57%|█████▋    | 202/357 [00:35<00:26,  5.81it/s]

Validation steps: 200 Loss: 0.11355885118246078


Training:  85%|████████▍ | 302/357 [00:52<00:09,  5.76it/s]

Validation steps: 300 Loss: 0.6893393993377686


Training: 100%|██████████| 357/357 [01:01<00:00,  5.77it/s]


              precision    recall  f1-score   support

        IT과학       0.78      0.92      0.85      1206
          경제       0.91      0.78      0.84      1556
          사회       0.82      0.77      0.79      1840
        생활문화       0.87      0.91      0.89      1483
          세계       0.91      0.92      0.92      1907
         스포츠       0.96      0.97      0.97      1733
          정치       0.90      0.90      0.90      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.92205638 0.78470437 0.76847826 0.91436278 0.92239119 0.97172533
 0.90343602]
VALID ACC : 0.8824147901515815, VALID LOSS : 0.35704513520252806
{'epoch': 4, 'train_loss': 0.35353139540255346, 'train_acc': 0.8783037878566631, 'valid_acc': 0.8824147901515815, 'val_loss': 0.35704513520252806, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 6


Training:   0%|          | 1/1071 [00:00<07:56,  2.25it/s]

Training steps: 0 Loss: 0.31802573800086975


Training:   9%|▉         | 101/1071 [00:44<06:57,  2.33it/s]

Training steps: 100 Loss: 0.1532519906759262


Training:  19%|█▉        | 201/1071 [01:29<06:40,  2.17it/s]

Training steps: 200 Loss: 0.15483899414539337


Training:  28%|██▊       | 301/1071 [02:13<05:33,  2.31it/s]

Training steps: 300 Loss: 0.5645114779472351


Training:  37%|███▋      | 401/1071 [02:58<05:07,  2.18it/s]

Training steps: 400 Loss: 0.18119849264621735


Training:  47%|████▋     | 501/1071 [03:42<04:07,  2.30it/s]

Training steps: 500 Loss: 0.09896552562713623


Training:  56%|█████▌    | 601/1071 [04:27<03:36,  2.17it/s]

Training steps: 600 Loss: 0.3009995222091675


Training:  65%|██████▌   | 701/1071 [05:11<02:39,  2.32it/s]

Training steps: 700 Loss: 0.490319162607193


Training:  75%|███████▍  | 801/1071 [05:55<02:04,  2.17it/s]

Training steps: 800 Loss: 0.5619215369224548


Training:  84%|████████▍ | 901/1071 [06:40<01:13,  2.32it/s]

Training steps: 900 Loss: 0.28595027327537537


Training:  93%|█████████▎| 1001/1071 [07:24<00:32,  2.18it/s]

Training steps: 1000 Loss: 0.40149661898612976


Training: 100%|██████████| 1071/1071 [07:55<00:00,  2.25it/s]

TRAIN ACC : 0.8850500861540259, TRAIN LOSS : 0.33421989020401915



Training:   1%|          | 2/357 [00:00<01:03,  5.57it/s]

Validation steps: 0 Loss: 0.31102919578552246


Training:  29%|██▊       | 102/357 [00:17<00:44,  5.75it/s]

Validation steps: 100 Loss: 0.3042999505996704


Training:  57%|█████▋    | 202/357 [00:35<00:26,  5.81it/s]

Validation steps: 200 Loss: 0.08655932545661926


Training:  85%|████████▍ | 302/357 [00:52<00:09,  5.82it/s]

Validation steps: 300 Loss: 0.8600417971611023


Training: 100%|██████████| 357/357 [01:01<00:00,  5.77it/s]


              precision    recall  f1-score   support

        IT과학       0.84      0.86      0.85      1206
          경제       0.83      0.87      0.85      1556
          사회       0.86      0.73      0.79      1840
        생활문화       0.89      0.91      0.90      1483
          세계       0.88      0.94      0.91      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.92      0.90      0.91      1688

    accuracy                           0.89     11413
   macro avg       0.88      0.89      0.88     11413
weighted avg       0.89      0.89      0.88     11413

[0.86484245 0.87403599 0.7326087  0.90761969 0.93864709 0.97864974
 0.9028436 ]
VALID ACC : 0.8855690878822395, VALID LOSS : 0.35878522969594523
{'epoch': 5, 'train_loss': 0.33421989020401915, 'train_acc': 0.8850500861540259, 'valid_acc': 0.8855690878822395, 'val_loss': 0.35878522969594523, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 7


Training:   0%|          | 1/1071 [00:00<07:53,  2.26it/s]

Training steps: 0 Loss: 0.1622016578912735


Training:   9%|▉         | 101/1071 [00:45<06:58,  2.32it/s]

Training steps: 100 Loss: 0.35888904333114624


Training:  19%|█▉        | 201/1071 [01:29<06:38,  2.18it/s]

Training steps: 200 Loss: 0.17211101949214935


Training:  28%|██▊       | 301/1071 [02:13<05:31,  2.33it/s]

Training steps: 300 Loss: 0.3507706820964813


Training:  37%|███▋      | 401/1071 [02:57<05:12,  2.15it/s]

Training steps: 400 Loss: 0.2829558849334717


Training:  47%|████▋     | 501/1071 [03:42<04:04,  2.33it/s]

Training steps: 500 Loss: 0.3020176887512207


Training:  56%|█████▌    | 601/1071 [04:26<03:36,  2.17it/s]

Training steps: 600 Loss: 0.3226369619369507


Training:  65%|██████▌   | 701/1071 [05:10<02:39,  2.31it/s]

Training steps: 700 Loss: 0.14042586088180542


Training:  75%|███████▍  | 801/1071 [05:55<02:04,  2.16it/s]

Training steps: 800 Loss: 0.15958285331726074


Training:  84%|████████▍ | 901/1071 [06:39<01:13,  2.31it/s]

Training steps: 900 Loss: 0.6057343482971191


Training:  93%|█████████▎| 1001/1071 [07:23<00:32,  2.18it/s]

Training steps: 1000 Loss: 0.3007897138595581


Training: 100%|██████████| 1071/1071 [07:54<00:00,  2.26it/s]

TRAIN ACC : 0.8883794281709062, TRAIN LOSS : 0.32024388809720566



Training:   1%|          | 2/357 [00:00<01:03,  5.60it/s]

Validation steps: 0 Loss: 0.3890451192855835


Training:  29%|██▊       | 102/357 [00:17<00:43,  5.88it/s]

Validation steps: 100 Loss: 0.2212647795677185


Training:  57%|█████▋    | 202/357 [00:34<00:26,  5.82it/s]

Validation steps: 200 Loss: 0.07936372607946396


Training:  85%|████████▍ | 302/357 [00:51<00:09,  5.87it/s]

Validation steps: 300 Loss: 0.8225501179695129


Training: 100%|██████████| 357/357 [01:01<00:00,  5.84it/s]


              precision    recall  f1-score   support

        IT과학       0.76      0.95      0.84      1206
          경제       0.90      0.81      0.86      1556
          사회       0.84      0.76      0.80      1840
        생활문화       0.91      0.89      0.90      1483
          세계       0.90      0.93      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.92      0.91      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.95024876 0.81362468 0.75815217 0.88671612 0.92815941 0.97807271
 0.91291469]
VALID ACC : 0.8874090948917901, VALID LOSS : 0.3470018427109435
{'epoch': 6, 'train_loss': 0.32024388809720566, 'train_acc': 0.8883794281709062, 'valid_acc': 0.8874090948917901, 'val_loss': 0.3470018427109435, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 8


Training:   0%|          | 1/1071 [00:00<11:36,  1.54it/s]

Training steps: 0 Loss: 0.6674706935882568


Training:   9%|▉         | 101/1071 [01:03<09:55,  1.63it/s]

Training steps: 100 Loss: 0.2422080785036087


Training:  19%|█▉        | 201/1071 [02:05<09:21,  1.55it/s]

Training steps: 200 Loss: 0.31982478499412537


Training:  28%|██▊       | 301/1071 [03:08<07:42,  1.66it/s]

Training steps: 300 Loss: 0.21317166090011597


Training:  37%|███▋      | 401/1071 [04:10<07:03,  1.58it/s]

Training steps: 400 Loss: 0.22487172484397888


Training:  47%|████▋     | 501/1071 [05:12<05:49,  1.63it/s]

Training steps: 500 Loss: 0.33578088879585266


Training:  56%|█████▌    | 601/1071 [06:15<05:04,  1.54it/s]

Training steps: 600 Loss: 0.40704023838043213


Training:  65%|██████▌   | 701/1071 [07:17<03:48,  1.62it/s]

Training steps: 700 Loss: 0.12931622564792633


Training:  75%|███████▍  | 801/1071 [08:19<02:52,  1.57it/s]

Training steps: 800 Loss: 0.3542945981025696


Training:  84%|████████▍ | 901/1071 [09:21<01:43,  1.64it/s]

Training steps: 900 Loss: 0.19857855141162872


Training:  93%|█████████▎| 1001/1071 [10:24<00:44,  1.57it/s]

Training steps: 1000 Loss: 0.3122003376483917


Training: 100%|██████████| 1071/1071 [11:07<00:00,  1.60it/s]

TRAIN ACC : 0.8924680938056716, TRAIN LOSS : 0.3055900805512396



Training:   0%|          | 1/357 [00:00<02:14,  2.65it/s]

Validation steps: 0 Loss: 0.3283873200416565


Training:  28%|██▊       | 101/357 [00:35<01:29,  2.86it/s]

Validation steps: 100 Loss: 0.21896326541900635


Training:  56%|█████▋    | 201/357 [01:10<00:56,  2.77it/s]

Validation steps: 200 Loss: 0.08499287068843842


Training:  84%|████████▍ | 301/357 [01:45<00:19,  2.89it/s]

Validation steps: 300 Loss: 0.7411919832229614


Training: 100%|██████████| 357/357 [02:05<00:00,  2.85it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.88      0.86      1206
          경제       0.89      0.83      0.86      1556
          사회       0.80      0.80      0.80      1840
        생활문화       0.89      0.90      0.90      1483
          세계       0.90      0.93      0.91      1907
         스포츠       0.97      0.97      0.97      1733
          정치       0.94      0.89      0.91      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.88474295 0.83097686 0.80054348 0.90424815 0.93445202 0.9694172
 0.89277251]
VALID ACC : 0.8887233856128975, VALID LOSS : 0.3399746211908278
{'epoch': 7, 'train_loss': 0.3055900805512396, 'train_acc': 0.8924680938056716, 'valid_acc': 0.8887233856128975, 'val_loss': 0.3399746211908278, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 9


Training:   0%|          | 1/1071 [00:00<11:17,  1.58it/s]

Training steps: 0 Loss: 0.3329797089099884


Training:   9%|▉         | 101/1071 [01:03<09:51,  1.64it/s]

Training steps: 100 Loss: 0.5097506642341614


Training:  19%|█▉        | 201/1071 [02:05<09:12,  1.57it/s]

Training steps: 200 Loss: 0.20676693320274353


Training:  28%|██▊       | 301/1071 [03:07<07:46,  1.65it/s]

Training steps: 300 Loss: 0.146074116230011


Training:  37%|███▋      | 401/1071 [04:10<07:04,  1.58it/s]

Training steps: 400 Loss: 0.3753493130207062


Training:  47%|████▋     | 501/1071 [05:12<05:46,  1.65it/s]

Training steps: 500 Loss: 0.14973817765712738


Training:  56%|█████▌    | 601/1071 [06:14<04:59,  1.57it/s]

Training steps: 600 Loss: 0.3463445007801056


Training:  65%|██████▌   | 701/1071 [07:16<03:44,  1.65it/s]

Training steps: 700 Loss: 0.21084138751029968


Training:  75%|███████▍  | 801/1071 [08:18<02:51,  1.57it/s]

Training steps: 800 Loss: 0.19404107332229614


Training:  84%|████████▍ | 901/1071 [09:20<01:42,  1.65it/s]

Training steps: 900 Loss: 0.23297655582427979


Training:  93%|█████████▎| 1001/1071 [10:23<00:44,  1.58it/s]

Training steps: 1000 Loss: 0.5152341723442078


Training: 100%|██████████| 1071/1071 [11:06<00:00,  1.61it/s]

TRAIN ACC : 0.8953885692590754, TRAIN LOSS : 0.29224531303127715



Training:   0%|          | 1/357 [00:00<02:11,  2.70it/s]

Validation steps: 0 Loss: 0.5239905118942261


Training:  28%|██▊       | 101/357 [00:35<01:31,  2.79it/s]

Validation steps: 100 Loss: 0.27795058488845825


Training:  56%|█████▋    | 201/357 [01:10<00:54,  2.86it/s]

Validation steps: 200 Loss: 0.05333590507507324


Training:  84%|████████▍ | 301/357 [01:46<00:19,  2.86it/s]

Validation steps: 300 Loss: 0.9225316643714905


Training: 100%|██████████| 357/357 [02:05<00:00,  2.84it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.91      0.86      1206
          경제       0.88      0.85      0.86      1556
          사회       0.84      0.76      0.80      1840
        생활문화       0.88      0.90      0.89      1483
          세계       0.92      0.91      0.92      1907
         스포츠       0.95      0.98      0.97      1733
          정치       0.91      0.91      0.91      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.91293532 0.85089974 0.76413043 0.90492245 0.90980598 0.98211194
 0.90995261]
VALID ACC : 0.8889862437571191, VALID LOSS : 0.35158046915167185
{'epoch': 8, 'train_loss': 0.29224531303127715, 'train_acc': 0.8953885692590754, 'valid_acc': 0.8889862437571191, 'val_loss': 0.35158046915167185, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 10


Training:   0%|          | 1/1071 [00:00<11:07,  1.60it/s]

Training steps: 0 Loss: 0.28872150182724


Training:   9%|▉         | 101/1071 [01:03<09:46,  1.65it/s]

Training steps: 100 Loss: 0.3478526473045349


Training:  19%|█▉        | 201/1071 [02:05<09:10,  1.58it/s]

Training steps: 200 Loss: 0.2990553677082062


Training:  28%|██▊       | 301/1071 [03:07<07:52,  1.63it/s]

Training steps: 300 Loss: 0.08200643211603165


Training:  37%|███▋      | 401/1071 [04:09<07:10,  1.56it/s]

Training steps: 400 Loss: 0.46646198630332947


Training:  47%|████▋     | 501/1071 [05:11<05:48,  1.64it/s]

Training steps: 500 Loss: 0.1493731439113617


Training:  56%|█████▌    | 601/1071 [06:13<04:58,  1.57it/s]

Training steps: 600 Loss: 0.28390631079673767


Training:  65%|██████▌   | 701/1071 [07:14<03:43,  1.66it/s]

Training steps: 700 Loss: 0.3762114644050598


Training:  75%|███████▍  | 801/1071 [08:17<02:50,  1.58it/s]

Training steps: 800 Loss: 0.5091996192932129


Training:  84%|████████▍ | 901/1071 [09:19<01:44,  1.62it/s]

Training steps: 900 Loss: 0.2618290185928345


Training:  93%|█████████▎| 1001/1071 [10:21<00:44,  1.56it/s]

Training steps: 1000 Loss: 0.2971609830856323


Training: 100%|██████████| 1071/1071 [11:03<00:00,  1.61it/s]

TRAIN ACC : 0.9041207908647528, TRAIN LOSS : 0.27719124905803316



Training:   0%|          | 1/357 [00:00<02:05,  2.85it/s]

Validation steps: 0 Loss: 0.33687838912010193


Training:  28%|██▊       | 101/357 [00:36<01:30,  2.83it/s]

Validation steps: 100 Loss: 0.3154672384262085


Training:  56%|█████▋    | 201/357 [01:10<00:53,  2.90it/s]

Validation steps: 200 Loss: 0.038995709270238876


Training:  84%|████████▍ | 301/357 [01:45<00:19,  2.95it/s]

Validation steps: 300 Loss: 0.9030530452728271


Training: 100%|██████████| 357/357 [02:04<00:00,  2.86it/s]

              precision    recall  f1-score   support

        IT과학       0.80      0.92      0.86      1206
          경제       0.86      0.86      0.86      1556
          사회       0.88      0.72      0.79      1840
        생활문화       0.89      0.90      0.90      1483
          세계       0.89      0.94      0.91      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.91      0.92      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.91791045 0.85732648 0.72119565 0.89952798 0.93969586 0.97980381
 0.92061611]
VALID ACC : 0.8889862437571191, VALID LOSS : 0.35745706429825025
{'epoch': 9, 'train_loss': 0.27719124905803316, 'train_acc': 0.9041207908647528, 'valid_acc': 0.8889862437571191, 'val_loss': 0.35745706429825025, 'learning_rate': 5e-06}
Start Training: Epoch 11



Training:   0%|          | 1/1071 [00:00<07:44,  2.30it/s]

Training steps: 0 Loss: 0.2602362036705017


Training:   9%|▉         | 101/1071 [00:44<06:56,  2.33it/s]

Training steps: 100 Loss: 0.2717387080192566


Training:  19%|█▉        | 201/1071 [01:29<06:40,  2.17it/s]

Training steps: 200 Loss: 0.11700519174337387


Training:  28%|██▊       | 301/1071 [02:13<05:32,  2.31it/s]

Training steps: 300 Loss: 0.2649680972099304


Training:  37%|███▋      | 401/1071 [02:57<05:07,  2.18it/s]

Training steps: 400 Loss: 0.2082974910736084


Training:  47%|████▋     | 501/1071 [03:41<04:05,  2.32it/s]

Training steps: 500 Loss: 0.37343302369117737


Training:  56%|█████▌    | 601/1071 [04:26<03:36,  2.17it/s]

Training steps: 600 Loss: 0.2124125361442566


Training:  65%|██████▌   | 701/1071 [05:10<02:38,  2.33it/s]

Training steps: 700 Loss: 0.15522532165050507


Training:  75%|███████▍  | 801/1071 [06:01<02:54,  1.55it/s]

Training steps: 800 Loss: 0.41442009806632996


Training:  84%|████████▍ | 901/1071 [07:02<01:43,  1.64it/s]

Training steps: 900 Loss: 0.15833298861980438


Training:  93%|█████████▎| 1001/1071 [08:04<00:44,  1.57it/s]

Training steps: 1000 Loss: 0.24463792145252228


Training: 100%|██████████| 1071/1071 [08:47<00:00,  2.03it/s]

TRAIN ACC : 0.9083846850267223, TRAIN LOSS : 0.2633535650217024



Training:   0%|          | 1/357 [00:00<02:05,  2.85it/s]

Validation steps: 0 Loss: 0.38222599029541016


Training:  28%|██▊       | 101/357 [00:35<01:30,  2.82it/s]

Validation steps: 100 Loss: 0.3028739094734192


Training:  56%|█████▋    | 201/357 [01:09<00:54,  2.88it/s]

Validation steps: 200 Loss: 0.05040489882230759


Training:  84%|████████▍ | 301/357 [01:44<00:19,  2.83it/s]

Validation steps: 300 Loss: 0.9631662368774414


Training: 100%|██████████| 357/357 [02:04<00:00,  2.88it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.90      0.85      1206
          경제       0.87      0.85      0.86      1556
          사회       0.86      0.73      0.79      1840
        생활문화       0.89      0.91      0.90      1483
          세계       0.92      0.92      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.89      0.94      0.91      1688

    accuracy                           0.89     11413
   macro avg       0.88      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.89883914 0.8496144  0.73423913 0.91099123 0.91976927 0.97980381
 0.93898104]
VALID ACC : 0.8888986243757119, VALID LOSS : 0.34798525531022323
{'epoch': 10, 'train_loss': 0.2633535650217024, 'train_acc': 0.9083846850267223, 'valid_acc': 0.8888986243757119, 'val_loss': 0.34798525531022323, 'learning_rate': 5e-06}
Start Training: Epoch 12



Training:   0%|          | 1/1071 [00:00<10:56,  1.63it/s]

Training steps: 0 Loss: 0.20403249561786652


Training:   9%|▉         | 101/1071 [01:02<09:52,  1.64it/s]

Training steps: 100 Loss: 0.2981666922569275


Training:  19%|█▉        | 201/1071 [02:04<09:06,  1.59it/s]

Training steps: 200 Loss: 0.060261525213718414


Training:  28%|██▊       | 301/1071 [03:05<07:39,  1.68it/s]

Training steps: 300 Loss: 0.07899855822324753


Training:  37%|███▋      | 401/1071 [04:07<06:58,  1.60it/s]

Training steps: 400 Loss: 0.36162814497947693


Training:  47%|████▋     | 501/1071 [05:08<05:41,  1.67it/s]

Training steps: 500 Loss: 0.15622812509536743


Training:  56%|█████▌    | 601/1071 [06:10<05:00,  1.56it/s]

Training steps: 600 Loss: 0.15354080498218536


Training:  65%|██████▌   | 701/1071 [07:11<03:44,  1.65it/s]

Training steps: 700 Loss: 0.17836639285087585


Training:  75%|███████▍  | 801/1071 [08:12<02:52,  1.57it/s]

Training steps: 800 Loss: 0.2772497534751892


Training:  84%|████████▍ | 901/1071 [09:14<01:42,  1.65it/s]

Training steps: 900 Loss: 0.1744992882013321


Training:  93%|█████████▎| 1001/1071 [10:15<00:43,  1.60it/s]

Training steps: 1000 Loss: 0.4780955910682678


Training: 100%|██████████| 1071/1071 [10:58<00:00,  1.63it/s]

TRAIN ACC : 0.9099909465260945, TRAIN LOSS : 0.25280968843499263



Training:   0%|          | 1/357 [00:00<02:05,  2.83it/s]

Validation steps: 0 Loss: 0.4742114245891571


Training:  28%|██▊       | 101/357 [00:34<01:28,  2.90it/s]

Validation steps: 100 Loss: 0.21922296285629272


Training:  56%|█████▋    | 201/357 [01:09<00:54,  2.86it/s]

Validation steps: 200 Loss: 0.06340979784727097


Training:  84%|████████▍ | 301/357 [01:43<00:19,  2.93it/s]

Validation steps: 300 Loss: 0.8193223476409912


Training: 100%|██████████| 357/357 [02:02<00:00,  2.92it/s]


              precision    recall  f1-score   support

        IT과학       0.84      0.85      0.85      1206
          경제       0.85      0.86      0.86      1556
          사회       0.83      0.78      0.80      1840
        생활문화       0.87      0.92      0.90      1483
          세계       0.95      0.89      0.92      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.90      0.94      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.85240464 0.86439589 0.77717391 0.92447741 0.89040378 0.9763416
 0.93779621]
VALID ACC : 0.8890738631385262, VALID LOSS : 0.3474304358571434
{'epoch': 11, 'train_loss': 0.25280968843499263, 'train_acc': 0.9099909465260945, 'valid_acc': 0.8890738631385262, 'val_loss': 0.3474304358571434, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 13


Training:   0%|          | 1/1071 [00:00<11:24,  1.56it/s]

Training steps: 0 Loss: 0.27514827251434326


Training:   9%|▉         | 101/1071 [01:02<09:35,  1.69it/s]

Training steps: 100 Loss: 0.3712029457092285


Training:  19%|█▉        | 201/1071 [02:03<09:08,  1.59it/s]

Training steps: 200 Loss: 0.1493121087551117


Training:  28%|██▊       | 301/1071 [03:04<07:36,  1.69it/s]

Training steps: 300 Loss: 0.24794024229049683


Training:  37%|███▋      | 401/1071 [04:06<07:06,  1.57it/s]

Training steps: 400 Loss: 0.06656338274478912


Training:  47%|████▋     | 501/1071 [05:07<05:46,  1.65it/s]

Training steps: 500 Loss: 0.1449044942855835


Training:  56%|█████▌    | 601/1071 [06:08<04:59,  1.57it/s]

Training steps: 600 Loss: 0.31290557980537415


Training:  65%|██████▌   | 701/1071 [07:09<03:43,  1.66it/s]

Training steps: 700 Loss: 0.3687549829483032


Training:  75%|███████▍  | 801/1071 [08:11<02:47,  1.61it/s]

Training steps: 800 Loss: 0.2106095254421234


Training:  84%|████████▍ | 901/1071 [09:12<01:40,  1.69it/s]

Training steps: 900 Loss: 0.19629815220832825


Training:  93%|█████████▎| 1001/1071 [10:13<00:44,  1.57it/s]

Training steps: 1000 Loss: 0.19999805092811584


Training: 100%|██████████| 1071/1071 [10:55<00:00,  1.63it/s]

TRAIN ACC : 0.9156566689056979, TRAIN LOSS : 0.23892299726041927



Training:   0%|          | 1/357 [00:00<02:05,  2.83it/s]

Validation steps: 0 Loss: 0.4042469263076782


Training:  28%|██▊       | 101/357 [00:34<01:26,  2.97it/s]

Validation steps: 100 Loss: 0.32154545187950134


Training:  56%|█████▋    | 201/357 [01:08<00:52,  2.96it/s]

Validation steps: 200 Loss: 0.030080405995249748


Training:  84%|████████▍ | 301/357 [01:42<00:18,  2.98it/s]

Validation steps: 300 Loss: 0.9452452659606934


Training: 100%|██████████| 357/357 [02:01<00:00,  2.94it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.91      0.86      1206
          경제       0.86      0.86      0.86      1556
          사회       0.87      0.74      0.80      1840
        생활문화       0.90      0.89      0.89      1483
          세계       0.89      0.94      0.92      1907
         스포츠       0.95      0.98      0.97      1733
          정치       0.93      0.91      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.90630182 0.86439589 0.73858696 0.88806473 0.94284216 0.98384305
 0.90580569]
VALID ACC : 0.8889862437571191, VALID LOSS : 0.3627739997648475
{'epoch': 12, 'train_loss': 0.23892299726041927, 'train_acc': 0.9156566689056979, 'valid_acc': 0.8889862437571191, 'val_loss': 0.3627739997648475, 'learning_rate': 5e-06}
Start Training: Epoch 14



Training:   0%|          | 1/1071 [00:00<10:41,  1.67it/s]

Training steps: 0 Loss: 0.16650646924972534


Training:   9%|▉         | 101/1071 [01:01<09:32,  1.69it/s]

Training steps: 100 Loss: 0.23137150704860687


Training:  19%|█▉        | 201/1071 [02:02<09:12,  1.57it/s]

Training steps: 200 Loss: 0.5433224439620972


Training:  28%|██▊       | 301/1071 [03:03<07:41,  1.67it/s]

Training steps: 300 Loss: 0.2350325733423233


Training:  37%|███▋      | 401/1071 [04:04<06:56,  1.61it/s]

Training steps: 400 Loss: 0.19560587406158447


Training:  47%|████▋     | 501/1071 [05:05<05:36,  1.69it/s]

Training steps: 500 Loss: 0.26343846321105957


Training:  56%|█████▌    | 601/1071 [06:06<04:50,  1.62it/s]

Training steps: 600 Loss: 0.1918792426586151


Training:  65%|██████▌   | 701/1071 [07:07<03:38,  1.69it/s]

Training steps: 700 Loss: 0.28424200415611267


Training:  75%|███████▍  | 801/1071 [08:08<02:48,  1.60it/s]

Training steps: 800 Loss: 0.22892585396766663


Training:  84%|████████▍ | 901/1071 [09:09<01:42,  1.66it/s]

Training steps: 900 Loss: 0.1599157154560089


Training:  93%|█████████▎| 1001/1071 [10:10<00:43,  1.59it/s]

Training steps: 1000 Loss: 0.13206250965595245


Training: 100%|██████████| 1071/1071 [10:52<00:00,  1.64it/s]

TRAIN ACC : 0.9179930492684208, TRAIN LOSS : 0.23084726994187613



Training:   0%|          | 1/357 [00:00<02:07,  2.79it/s]

Validation steps: 0 Loss: 0.35424602031707764


Training:  28%|██▊       | 101/357 [00:34<01:27,  2.94it/s]

Validation steps: 100 Loss: 0.2999204397201538


Training:  56%|█████▋    | 201/357 [01:07<00:51,  3.06it/s]

Validation steps: 200 Loss: 0.020499201491475105


Training:  84%|████████▍ | 301/357 [01:41<00:19,  2.92it/s]

Validation steps: 300 Loss: 0.9017281532287598


Training: 100%|██████████| 357/357 [02:00<00:00,  2.96it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.89      0.86      1206
          경제       0.86      0.86      0.86      1556
          사회       0.86      0.74      0.80      1840
        생활문화       0.89      0.90      0.89      1483
          세계       0.88      0.95      0.91      1907
         스포츠       0.97      0.97      0.97      1733
          정치       0.92      0.91      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.88888889 0.86311054 0.7423913  0.90289953 0.94546408 0.96537796
 0.91232227]
VALID ACC : 0.8881100499430474, VALID LOSS : 0.36086456141374607
{'epoch': 13, 'train_loss': 0.23084726994187613, 'train_acc': 0.9179930492684208, 'valid_acc': 0.8881100499430474, 'val_loss': 0.36086456141374607, 'learning_rate': 5e-06}
Start Training: Epoch 15



Training:   0%|          | 1/1071 [00:00<10:39,  1.67it/s]

Training steps: 0 Loss: 0.16527493298053741


Training:   9%|▉         | 101/1071 [01:01<09:33,  1.69it/s]

Training steps: 100 Loss: 0.11932382732629776


Training:  19%|█▉        | 201/1071 [02:02<08:56,  1.62it/s]

Training steps: 200 Loss: 0.37249132990837097


Training:  28%|██▊       | 301/1071 [03:02<07:37,  1.68it/s]

Training steps: 300 Loss: 0.2122676521539688


Training:  37%|███▋      | 401/1071 [04:03<06:54,  1.61it/s]

Training steps: 400 Loss: 0.12071490287780762


Training:  47%|████▋     | 501/1071 [05:04<05:35,  1.70it/s]

Training steps: 500 Loss: 0.12344254553318024


Training:  56%|█████▌    | 601/1071 [06:05<04:54,  1.60it/s]

Training steps: 600 Loss: 0.3293358087539673


Training:  65%|██████▌   | 701/1071 [07:05<03:39,  1.68it/s]

Training steps: 700 Loss: 0.16742977499961853


Training:  75%|███████▍  | 801/1071 [08:06<02:45,  1.63it/s]

Training steps: 800 Loss: 0.2303342968225479


Training:  84%|████████▍ | 901/1071 [09:07<01:40,  1.70it/s]

Training steps: 900 Loss: 0.2495797723531723


Training:  93%|█████████▎| 1001/1071 [10:07<00:43,  1.63it/s]

Training steps: 1000 Loss: 0.22827179729938507


Training: 100%|██████████| 1071/1071 [10:49<00:00,  1.65it/s]

TRAIN ACC : 0.9211763675126311, TRAIN LOSS : 0.21869460856289813



Training:   0%|          | 1/357 [00:00<02:03,  2.89it/s]

Validation steps: 0 Loss: 0.35528793931007385


Training:  28%|██▊       | 101/357 [00:33<01:27,  2.93it/s]

Validation steps: 100 Loss: 0.35454273223876953


Training:  56%|█████▋    | 201/357 [01:06<00:51,  3.03it/s]

Validation steps: 200 Loss: 0.035629257559776306


Training:  84%|████████▍ | 301/357 [01:40<00:19,  2.92it/s]

Validation steps: 300 Loss: 1.061305284500122


Training: 100%|██████████| 357/357 [01:59<00:00,  3.00it/s]


              precision    recall  f1-score   support

        IT과학       0.85      0.83      0.84      1206
          경제       0.79      0.91      0.84      1556
          사회       0.88      0.70      0.78      1840
        생활문화       0.88      0.90      0.89      1483
          세계       0.91      0.93      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.91      0.93      0.92      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.89      0.88      0.88     11413

[0.83416252 0.90681234 0.70271739 0.90492245 0.92920818 0.97807271
 0.9306872 ]
VALID ACC : 0.8840795583983178, VALID LOSS : 0.3637603611609682
{'epoch': 14, 'train_loss': 0.21869460856289813, 'train_acc': 0.9211763675126311, 'valid_acc': 0.8840795583983178, 'val_loss': 0.3637603611609682, 'learning_rate': 5e-06}
EarlyStopping counter: 3 out of 3


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'roberta.embeddings.token_label_2_

Start Training: Epoch 1


Training:   0%|          | 1/1071 [00:00<07:52,  2.26it/s]

Training steps: 0 Loss: 1.9569305181503296


Training:   9%|▉         | 101/1071 [00:45<06:58,  2.32it/s]

Training steps: 100 Loss: 1.7713030576705933


Training:  19%|█▉        | 201/1071 [01:29<06:43,  2.16it/s]

Training steps: 200 Loss: 1.5546225309371948


Training:  28%|██▊       | 301/1071 [02:13<05:30,  2.33it/s]

Training steps: 300 Loss: 1.3831030130386353


Training:  37%|███▋      | 401/1071 [02:58<05:09,  2.17it/s]

Training steps: 400 Loss: 1.0372554063796997


Training:  47%|████▋     | 501/1071 [03:42<04:05,  2.32it/s]

Training steps: 500 Loss: 0.8414955139160156


Training:  56%|█████▌    | 601/1071 [04:27<03:36,  2.17it/s]

Training steps: 600 Loss: 1.0185719728469849


Training:  65%|██████▌   | 701/1071 [05:11<02:38,  2.33it/s]

Training steps: 700 Loss: 0.5952715277671814


Training:  75%|███████▍  | 801/1071 [05:55<02:04,  2.17it/s]

Training steps: 800 Loss: 0.8759333491325378


Training:  84%|████████▍ | 901/1071 [06:39<01:13,  2.31it/s]

Training steps: 900 Loss: 0.7824411988258362


Training:  93%|█████████▎| 1001/1071 [07:24<00:32,  2.18it/s]

Training steps: 1000 Loss: 0.6143192648887634


Training: 100%|██████████| 1071/1071 [07:55<00:00,  2.25it/s]

TRAIN ACC : 0.6238135568470548, TRAIN LOSS : 1.037768509458093



Training:   1%|          | 2/357 [00:00<01:02,  5.71it/s]

Validation steps: 0 Loss: 0.2108294516801834


Training:  29%|██▊       | 102/357 [00:17<00:44,  5.78it/s]

Validation steps: 100 Loss: 0.6482700705528259


Training:  57%|█████▋    | 202/357 [00:34<00:27,  5.69it/s]

Validation steps: 200 Loss: 0.6323533654212952


Training:  85%|████████▍ | 302/357 [00:51<00:09,  5.70it/s]

Validation steps: 300 Loss: 1.1090527772903442


Training: 100%|██████████| 357/357 [01:01<00:00,  5.80it/s]


              precision    recall  f1-score   support

        IT과학       0.77      0.81      0.79      1206
          경제       0.77      0.84      0.80      1555
          사회       0.85      0.54      0.66      1840
        생활문화       0.82      0.90      0.86      1484
          세계       0.84      0.85      0.84      1907
         스포츠       0.93      0.96      0.94      1733
          정치       0.78      0.90      0.83      1688

    accuracy                           0.82     11413
   macro avg       0.82      0.83      0.82     11413
weighted avg       0.83      0.82      0.82     11413

[0.80597015 0.83665595 0.53586957 0.90229111 0.8505506  0.96133872
 0.8957346 ]
VALID ACC : 0.823446946464558, VALID LOSS : 0.5647382300649705
{'epoch': 0, 'train_loss': 1.037768509458093, 'train_acc': 0.6238135568470548, 'valid_acc': 0.823446946464558, 'val_loss': 0.5647382300649705, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1071 [00:00<07:57,  2.24it/s]

Training steps: 0 Loss: 0.49892789125442505


Training:   9%|▉         | 101/1071 [00:44<06:54,  2.34it/s]

Training steps: 100 Loss: 0.41426652669906616


Training:  19%|█▉        | 201/1071 [01:29<06:41,  2.17it/s]

Training steps: 200 Loss: 0.5329985618591309


Training:  28%|██▊       | 301/1071 [02:13<05:29,  2.34it/s]

Training steps: 300 Loss: 0.2782227098941803


Training:  37%|███▋      | 401/1071 [02:57<05:05,  2.19it/s]

Training steps: 400 Loss: 0.3826943337917328


Training:  47%|████▋     | 501/1071 [03:41<04:04,  2.33it/s]

Training steps: 500 Loss: 0.788103461265564


Training:  56%|█████▌    | 601/1071 [04:25<03:34,  2.19it/s]

Training steps: 600 Loss: 0.48855796456336975


Training:  65%|██████▌   | 701/1071 [05:09<02:38,  2.33it/s]

Training steps: 700 Loss: 0.7405698299407959


Training:  75%|███████▍  | 801/1071 [05:53<02:03,  2.19it/s]

Training steps: 800 Loss: 0.4666835069656372


Training:  84%|████████▍ | 901/1071 [06:37<01:12,  2.34it/s]

Training steps: 900 Loss: 0.36475488543510437


Training:  93%|█████████▎| 1001/1071 [07:21<00:32,  2.18it/s]

Training steps: 1000 Loss: 0.43475204706192017


Training: 100%|██████████| 1071/1071 [07:52<00:00,  2.27it/s]

TRAIN ACC : 0.8382348646359628, TRAIN LOSS : 0.4922725437026398



Training:   1%|          | 2/357 [00:00<01:02,  5.64it/s]

Validation steps: 0 Loss: 0.16821889579296112


Training:  29%|██▊       | 102/357 [00:17<00:43,  5.87it/s]

Validation steps: 100 Loss: 0.3255079984664917


Training:  57%|█████▋    | 202/357 [00:34<00:26,  5.80it/s]

Validation steps: 200 Loss: 0.5127562880516052


Training:  85%|████████▍ | 302/357 [00:51<00:09,  5.76it/s]

Validation steps: 300 Loss: 0.5989081859588623


Training: 100%|██████████| 357/357 [01:01<00:00,  5.82it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.86      0.82      1206
          경제       0.86      0.79      0.83      1555
          사회       0.79      0.75      0.77      1840
        생활문화       0.90      0.87      0.89      1484
          세계       0.84      0.92      0.88      1907
         스포츠       0.96      0.95      0.96      1733
          정치       0.89      0.89      0.89      1688

    accuracy                           0.86     11413
   macro avg       0.86      0.86      0.86     11413
weighted avg       0.86      0.86      0.86     11413

[0.85655058 0.79228296 0.74619565 0.87061995 0.92186681 0.95383728
 0.88625592]
VALID ACC : 0.8619118549023044, VALID LOSS : 0.4411315955326003
{'epoch': 1, 'train_loss': 0.4922725437026398, 'train_acc': 0.8382348646359628, 'valid_acc': 0.8619118549023044, 'val_loss': 0.4411315955326003, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1071 [00:00<07:48,  2.28it/s]

Training steps: 0 Loss: 0.32835352420806885


Training:   9%|▉         | 101/1071 [00:44<06:59,  2.31it/s]

Training steps: 100 Loss: 0.6681056618690491


Training:  19%|█▉        | 201/1071 [01:29<06:37,  2.19it/s]

Training steps: 200 Loss: 0.16761131584644318


Training:  28%|██▊       | 301/1071 [02:13<05:31,  2.33it/s]

Training steps: 300 Loss: 0.4371297359466553


Training:  37%|███▋      | 401/1071 [02:57<05:07,  2.18it/s]

Training steps: 400 Loss: 0.491992712020874


Training:  47%|████▋     | 501/1071 [03:41<04:05,  2.33it/s]

Training steps: 500 Loss: 0.3771420121192932


Training:  56%|█████▌    | 601/1071 [04:25<03:37,  2.16it/s]

Training steps: 600 Loss: 0.48894137144088745


Training:  65%|██████▌   | 701/1071 [05:09<02:38,  2.33it/s]

Training steps: 700 Loss: 0.4058108329772949


Training:  75%|███████▍  | 801/1071 [05:54<02:03,  2.18it/s]

Training steps: 800 Loss: 0.5025597214698792


Training:  84%|████████▍ | 901/1071 [06:38<01:13,  2.32it/s]

Training steps: 900 Loss: 0.22877956926822662


Training:  93%|█████████▎| 1001/1071 [07:22<00:32,  2.17it/s]

Training steps: 1000 Loss: 0.8306623101234436


Training: 100%|██████████| 1071/1071 [07:53<00:00,  2.26it/s]

TRAIN ACC : 0.856809088519611, TRAIN LOSS : 0.4245314707929124



Training:   1%|          | 2/357 [00:00<01:02,  5.64it/s]

Validation steps: 0 Loss: 0.08045453578233719


Training:  29%|██▊       | 102/357 [00:17<00:43,  5.85it/s]

Validation steps: 100 Loss: 0.3597513735294342


Training:  57%|█████▋    | 202/357 [00:34<00:26,  5.80it/s]

Validation steps: 200 Loss: 0.4275251626968384


Training:  85%|████████▍ | 302/357 [00:51<00:09,  5.90it/s]

Validation steps: 300 Loss: 0.5763877034187317


Training: 100%|██████████| 357/357 [01:01<00:00,  5.84it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.81      0.82      1206
          경제       0.86      0.79      0.82      1555
          사회       0.76      0.77      0.77      1840
        생활문화       0.89      0.89      0.89      1484
          세계       0.88      0.93      0.90      1907
         스포츠       0.94      0.96      0.95      1733
          정치       0.90      0.90      0.90      1688

    accuracy                           0.87     11413
   macro avg       0.87      0.86      0.87     11413
weighted avg       0.87      0.87      0.87     11413

[0.80597015 0.79099678 0.77119565 0.89218329 0.92920818 0.96480092
 0.89514218]
VALID ACC : 0.8674318759309559, VALID LOSS : 0.40519483497037606
{'epoch': 2, 'train_loss': 0.4245314707929124, 'train_acc': 0.856809088519611, 'valid_acc': 0.8674318759309559, 'val_loss': 0.40519483497037606, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1071 [00:00<10:50,  1.64it/s]

Training steps: 0 Loss: 0.4235672354698181


Training:   9%|▉         | 101/1071 [01:02<09:35,  1.69it/s]

Training steps: 100 Loss: 0.4134660065174103


Training:  19%|█▉        | 201/1071 [02:03<09:03,  1.60it/s]

Training steps: 200 Loss: 0.4654408395290375


Training:  28%|██▊       | 301/1071 [03:04<07:37,  1.68it/s]

Training steps: 300 Loss: 0.39191174507141113


Training:  37%|███▋      | 401/1071 [04:04<06:59,  1.60it/s]

Training steps: 400 Loss: 0.2940024137496948


Training:  47%|████▋     | 501/1071 [05:05<05:37,  1.69it/s]

Training steps: 500 Loss: 0.4436433017253876


Training:  56%|█████▌    | 601/1071 [06:06<04:51,  1.61it/s]

Training steps: 600 Loss: 0.2528471052646637


Training:  65%|██████▌   | 701/1071 [07:07<03:39,  1.69it/s]

Training steps: 700 Loss: 0.218708336353302


Training:  75%|███████▍  | 801/1071 [08:08<02:47,  1.61it/s]

Training steps: 800 Loss: 0.3200453221797943


Training:  84%|████████▍ | 901/1071 [09:09<01:41,  1.68it/s]

Training steps: 900 Loss: 0.2736049294471741


Training:  93%|█████████▎| 1001/1071 [10:10<00:43,  1.61it/s]

Training steps: 1000 Loss: 0.38895368576049805


Training: 100%|██████████| 1071/1071 [10:52<00:00,  1.64it/s]

TRAIN ACC : 0.870593732659677, TRAIN LOSS : 0.38264010789918074



Training:   0%|          | 1/357 [00:00<02:03,  2.87it/s]

Validation steps: 0 Loss: 0.07695116102695465


Training:  28%|██▊       | 101/357 [00:33<01:28,  2.89it/s]

Validation steps: 100 Loss: 0.27794262766838074


Training:  56%|█████▋    | 201/357 [01:07<00:52,  2.97it/s]

Validation steps: 200 Loss: 0.537394642829895


Training:  84%|████████▍ | 301/357 [01:41<00:19,  2.91it/s]

Validation steps: 300 Loss: 0.7906066179275513


Training: 100%|██████████| 357/357 [02:00<00:00,  2.96it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.88      0.83      1206
          경제       0.84      0.83      0.83      1555
          사회       0.84      0.70      0.76      1840
        생활문화       0.87      0.92      0.89      1484
          세계       0.84      0.95      0.89      1907
         스포츠       0.97      0.96      0.96      1733
          정치       0.92      0.87      0.90      1688

    accuracy                           0.87     11413
   macro avg       0.87      0.87      0.87     11413
weighted avg       0.87      0.87      0.87     11413

[0.87645108 0.82572347 0.69728261 0.91576819 0.94756162 0.95556838
 0.87026066]
VALID ACC : 0.8687461666520634, VALID LOSS : 0.4125401576881816
{'epoch': 3, 'train_loss': 0.38264010789918074, 'train_acc': 0.870593732659677, 'valid_acc': 0.8687461666520634, 'val_loss': 0.4125401576881816, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1071 [00:00<10:52,  1.64it/s]

Training steps: 0 Loss: 0.526039183139801


Training:   9%|▉         | 101/1071 [01:01<09:34,  1.69it/s]

Training steps: 100 Loss: 0.39382603764533997


Training:  19%|█▉        | 201/1071 [02:02<09:06,  1.59it/s]

Training steps: 200 Loss: 0.32959410548210144


Training:  28%|██▊       | 301/1071 [03:03<07:36,  1.69it/s]

Training steps: 300 Loss: 0.326553076505661


Training:  37%|███▋      | 401/1071 [04:03<06:55,  1.61it/s]

Training steps: 400 Loss: 0.3104206919670105


Training:  47%|████▋     | 501/1071 [05:04<05:38,  1.68it/s]

Training steps: 500 Loss: 0.4315095543861389


Training:  56%|█████▌    | 601/1071 [06:05<04:52,  1.60it/s]

Training steps: 600 Loss: 0.4692404866218567


Training:  65%|██████▌   | 701/1071 [07:06<03:39,  1.69it/s]

Training steps: 700 Loss: 0.5793742537498474


Training:  75%|███████▍  | 801/1071 [08:06<02:46,  1.62it/s]

Training steps: 800 Loss: 0.4013068675994873


Training:  84%|████████▍ | 901/1071 [09:07<01:40,  1.69it/s]

Training steps: 900 Loss: 0.38161787390708923


Training:  93%|█████████▎| 1001/1071 [10:08<00:43,  1.63it/s]

Training steps: 1000 Loss: 0.5815708637237549


Training: 100%|██████████| 1071/1071 [10:50<00:00,  1.65it/s]

TRAIN ACC : 0.8799100493560351, TRAIN LOSS : 0.3562081465741096



Training:   0%|          | 1/357 [00:00<02:07,  2.79it/s]

Validation steps: 0 Loss: 0.03356332331895828


Training:  28%|██▊       | 101/357 [00:33<01:24,  3.03it/s]

Validation steps: 100 Loss: 0.29585838317871094


Training:  56%|█████▋    | 201/357 [01:07<00:53,  2.93it/s]

Validation steps: 200 Loss: 0.46592584252357483


Training:  84%|████████▍ | 301/357 [01:41<00:19,  2.90it/s]

Validation steps: 300 Loss: 0.6158206462860107


Training: 100%|██████████| 357/357 [02:00<00:00,  2.97it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.83      0.83      1206
          경제       0.83      0.84      0.84      1555
          사회       0.82      0.74      0.78      1840
        생활문화       0.90      0.90      0.90      1484
          세계       0.87      0.94      0.90      1907
         스포츠       0.95      0.97      0.96      1733
          정치       0.92      0.89      0.90      1688

    accuracy                           0.88     11413
   macro avg       0.87      0.87      0.87     11413
weighted avg       0.88      0.88      0.88     11413

[0.8291874  0.84051447 0.74402174 0.90431267 0.94389093 0.97403347
 0.88625592]
VALID ACC : 0.876369052834487, VALID LOSS : 0.38666968094054555
{'epoch': 4, 'train_loss': 0.3562081465741096, 'train_acc': 0.8799100493560351, 'valid_acc': 0.876369052834487, 'val_loss': 0.38666968094054555, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 6


Training:   0%|          | 1/1071 [00:00<10:56,  1.63it/s]

Training steps: 0 Loss: 0.20836131274700165


Training:   9%|▉         | 101/1071 [01:01<09:31,  1.70it/s]

Training steps: 100 Loss: 0.3682018220424652


Training:  19%|█▉        | 201/1071 [02:02<08:58,  1.62it/s]

Training steps: 200 Loss: 0.7541841864585876


Training:  28%|██▊       | 301/1071 [03:02<07:34,  1.70it/s]

Training steps: 300 Loss: 0.2455529421567917


Training:  37%|███▋      | 401/1071 [04:03<07:01,  1.59it/s]

Training steps: 400 Loss: 0.3651570975780487


Training:  47%|████▋     | 501/1071 [05:04<05:40,  1.67it/s]

Training steps: 500 Loss: 0.309929758310318


Training:  56%|█████▌    | 601/1071 [06:04<04:52,  1.61it/s]

Training steps: 600 Loss: 0.1553148627281189


Training:  65%|██████▌   | 701/1071 [07:05<03:38,  1.70it/s]

Training steps: 700 Loss: 0.6932926774024963


Training:  75%|███████▍  | 801/1071 [08:05<02:45,  1.63it/s]

Training steps: 800 Loss: 0.122916579246521


Training:  84%|████████▍ | 901/1071 [09:05<01:39,  1.70it/s]

Training steps: 900 Loss: 0.3037540912628174


Training:  93%|█████████▎| 1001/1071 [10:06<00:43,  1.60it/s]

Training steps: 1000 Loss: 0.13762906193733215


Training: 100%|██████████| 1071/1071 [10:48<00:00,  1.65it/s]

TRAIN ACC : 0.8852545194357642, TRAIN LOSS : 0.33870984487284245



Training:   0%|          | 1/357 [00:00<02:05,  2.85it/s]

Validation steps: 0 Loss: 0.07680770754814148


Training:  28%|██▊       | 101/357 [00:34<01:27,  2.92it/s]

Validation steps: 100 Loss: 0.2020896077156067


Training:  56%|█████▋    | 201/357 [01:08<00:52,  3.00it/s]

Validation steps: 200 Loss: 0.43108534812927246


Training:  84%|████████▍ | 301/357 [01:41<00:19,  2.91it/s]

Validation steps: 300 Loss: 0.48907777667045593


Training: 100%|██████████| 357/357 [02:00<00:00,  2.96it/s]


              precision    recall  f1-score   support

        IT과학       0.84      0.83      0.83      1206
          경제       0.84      0.84      0.84      1555
          사회       0.78      0.78      0.78      1840
        생활문화       0.90      0.89      0.89      1484
          세계       0.90      0.93      0.92      1907
         스포츠       0.96      0.96      0.96      1733
          정치       0.91      0.91      0.91      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.82587065 0.83858521 0.77934783 0.88746631 0.92973256 0.96076168
 0.90817536]
VALID ACC : 0.8781214404626303, VALID LOSS : 0.3625583505357097
{'epoch': 5, 'train_loss': 0.33870984487284245, 'train_acc': 0.8852545194357642, 'valid_acc': 0.8781214404626303, 'val_loss': 0.3625583505357097, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 7


Training:   0%|          | 1/1071 [00:00<12:50,  1.39it/s]

Training steps: 0 Loss: 0.5364122986793518


Training:   9%|▉         | 101/1071 [01:01<09:38,  1.68it/s]

Training steps: 100 Loss: 0.31128278374671936


Training:  19%|█▉        | 201/1071 [02:02<09:06,  1.59it/s]

Training steps: 200 Loss: 0.22540509700775146


Training:  28%|██▊       | 301/1071 [03:02<07:41,  1.67it/s]

Training steps: 300 Loss: 0.3559432327747345


Training:  37%|███▋      | 401/1071 [04:03<06:54,  1.62it/s]

Training steps: 400 Loss: 0.26749977469444275


Training:  47%|████▋     | 501/1071 [05:03<05:35,  1.70it/s]

Training steps: 500 Loss: 0.3221392333507538


Training:  56%|█████▌    | 601/1071 [06:03<04:51,  1.61it/s]

Training steps: 600 Loss: 0.4212326407432556


Training:  65%|██████▌   | 701/1071 [07:04<03:41,  1.67it/s]

Training steps: 700 Loss: 0.48489177227020264


Training:  75%|███████▍  | 801/1071 [08:04<02:49,  1.59it/s]

Training steps: 800 Loss: 0.34680625796318054


Training:  84%|████████▍ | 901/1071 [09:04<01:39,  1.71it/s]

Training steps: 900 Loss: 0.4713217318058014


Training:  93%|█████████▎| 1001/1071 [10:05<00:43,  1.63it/s]

Training steps: 1000 Loss: 0.3855389654636383


Training: 100%|██████████| 1071/1071 [10:47<00:00,  1.66it/s]

TRAIN ACC : 0.8903069419701527, TRAIN LOSS : 0.3204347702085861



Training:   0%|          | 1/357 [00:00<02:03,  2.89it/s]

Validation steps: 0 Loss: 0.06366974860429764


Training:  28%|██▊       | 101/357 [00:33<01:25,  3.00it/s]

Validation steps: 100 Loss: 0.23745036125183105


Training:  56%|█████▋    | 201/357 [01:07<00:52,  2.95it/s]

Validation steps: 200 Loss: 0.4216141998767853


Training:  84%|████████▍ | 301/357 [01:40<00:18,  3.03it/s]

Validation steps: 300 Loss: 0.6858425736427307


Training: 100%|██████████| 357/357 [01:58<00:00,  3.00it/s]

              precision    recall  f1-score   support

        IT과학       0.78      0.90      0.84      1206
          경제       0.82      0.84      0.83      1555
          사회       0.87      0.68      0.76      1840
        생활문화       0.90      0.89      0.90      1484
          세계       0.87      0.95      0.91      1907
         스포츠       0.97      0.96      0.96      1733
          정치       0.89      0.92      0.91      1688

    accuracy                           0.88     11413
   macro avg       0.87      0.88      0.87     11413
weighted avg       0.88      0.88      0.87     11413

[0.90215589 0.84180064 0.68097826 0.88679245 0.94598846 0.95556838
 0.92476303]
VALID ACC : 0.8750547621133795, VALID LOSS : 0.3819956027065255
{'epoch': 6, 'train_loss': 0.3204347702085861, 'train_acc': 0.8903069419701527, 'valid_acc': 0.8750547621133795, 'val_loss': 0.3819956027065255, 'learning_rate': 5e-06}
Start Training: Epoch 8



Training:   0%|          | 1/1071 [00:00<10:47,  1.65it/s]

Training steps: 0 Loss: 0.41801226139068604


Training:   9%|▉         | 101/1071 [01:01<09:33,  1.69it/s]

Training steps: 100 Loss: 0.7279711365699768


Training:  19%|█▉        | 201/1071 [02:02<09:04,  1.60it/s]

Training steps: 200 Loss: 0.3596429228782654


Training:  28%|██▊       | 301/1071 [03:02<07:41,  1.67it/s]

Training steps: 300 Loss: 0.521914005279541


Training:  37%|███▋      | 401/1071 [04:02<07:00,  1.59it/s]

Training steps: 400 Loss: 0.38288185000419617


Training:  47%|████▋     | 501/1071 [05:03<05:41,  1.67it/s]

Training steps: 500 Loss: 0.13858619332313538


Training:  56%|█████▌    | 601/1071 [06:03<04:53,  1.60it/s]

Training steps: 600 Loss: 0.5308518409729004


Training:  65%|██████▌   | 701/1071 [07:04<03:38,  1.69it/s]

Training steps: 700 Loss: 0.30230072140693665


Training:  75%|███████▍  | 801/1071 [08:04<02:49,  1.59it/s]

Training steps: 800 Loss: 0.2431778460741043


Training:  84%|████████▍ | 901/1071 [09:05<01:41,  1.68it/s]

Training steps: 900 Loss: 0.46620485186576843


Training:  93%|█████████▎| 1001/1071 [10:05<00:43,  1.60it/s]

Training steps: 1000 Loss: 0.46678826212882996


Training: 100%|██████████| 1071/1071 [10:47<00:00,  1.65it/s]

TRAIN ACC : 0.8954469787681435, TRAIN LOSS : 0.3035140518206898



Training:   0%|          | 1/357 [00:00<02:03,  2.89it/s]

Validation steps: 0 Loss: 0.06735585629940033


Training:  28%|██▊       | 101/357 [00:34<01:27,  2.94it/s]

Validation steps: 100 Loss: 0.2695814371109009


Training:  56%|█████▋    | 201/357 [01:07<00:51,  3.05it/s]

Validation steps: 200 Loss: 0.4495188295841217


Training:  84%|████████▍ | 301/357 [01:40<00:19,  2.94it/s]

Validation steps: 300 Loss: 0.712760329246521


Training: 100%|██████████| 357/357 [01:59<00:00,  2.98it/s]


              precision    recall  f1-score   support

        IT과학       0.77      0.91      0.84      1206
          경제       0.85      0.83      0.84      1555
          사회       0.86      0.71      0.78      1840
        생활문화       0.89      0.91      0.90      1484
          세계       0.88      0.95      0.91      1907
         스포츠       0.97      0.95      0.96      1733
          정치       0.91      0.90      0.91      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.91376451 0.82893891 0.71358696 0.91307278 0.94703723 0.95499134
 0.89810427]
VALID ACC : 0.8793481118023306, VALID LOSS : 0.36858797619896694
{'epoch': 7, 'train_loss': 0.3035140518206898, 'train_acc': 0.8954469787681435, 'valid_acc': 0.8793481118023306, 'val_loss': 0.36858797619896694, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 9


Training:   0%|          | 1/1071 [00:00<10:57,  1.63it/s]

Training steps: 0 Loss: 0.6103050112724304


Training:   9%|▉         | 101/1071 [00:59<09:25,  1.72it/s]

Training steps: 100 Loss: 0.3338810205459595


Training:  19%|█▉        | 201/1071 [01:58<08:41,  1.67it/s]

Training steps: 200 Loss: 0.19732609391212463


Training:  28%|██▊       | 301/1071 [02:56<07:16,  1.76it/s]

Training steps: 300 Loss: 0.22253622114658356


Training:  37%|███▋      | 401/1071 [03:55<06:43,  1.66it/s]

Training steps: 400 Loss: 0.3451637923717499


Training:  47%|████▋     | 501/1071 [04:54<05:29,  1.73it/s]

Training steps: 500 Loss: 0.3921605348587036


Training:  56%|█████▌    | 601/1071 [05:52<04:44,  1.65it/s]

Training steps: 600 Loss: 0.4011382758617401


Training:  65%|██████▌   | 701/1071 [06:51<03:30,  1.76it/s]

Training steps: 700 Loss: 0.35080012679100037


Training:  75%|███████▍  | 801/1071 [07:49<02:41,  1.67it/s]

Training steps: 800 Loss: 0.12504315376281738


Training:  84%|████████▍ | 901/1071 [08:48<01:36,  1.75it/s]

Training steps: 900 Loss: 0.1439615786075592


Training:  93%|█████████▎| 1001/1071 [09:47<00:42,  1.67it/s]

Training steps: 1000 Loss: 0.22827698290348053


Training: 100%|██████████| 1071/1071 [10:27<00:00,  1.71it/s]

TRAIN ACC : 0.8965275546859028, TRAIN LOSS : 0.29596197081818465



Training:   0%|          | 1/357 [00:00<02:00,  2.96it/s]

Validation steps: 0 Loss: 0.09646541625261307


Training:  28%|██▊       | 101/357 [00:32<01:21,  3.15it/s]

Validation steps: 100 Loss: 0.30510666966438293


Training:  56%|█████▋    | 201/357 [01:03<00:48,  3.20it/s]

Validation steps: 200 Loss: 0.4905858635902405


Training:  84%|████████▍ | 301/357 [01:34<00:17,  3.13it/s]

Validation steps: 300 Loss: 0.4493461847305298


Training: 100%|██████████| 357/357 [01:52<00:00,  3.19it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.87      0.84      1206
          경제       0.86      0.82      0.84      1555
          사회       0.81      0.78      0.79      1840
        생활문화       0.91      0.89      0.90      1484
          세계       0.85      0.96      0.90      1907
         스포츠       0.96      0.96      0.96      1733
          정치       0.94      0.86      0.90      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.8681592  0.82186495 0.775      0.89150943 0.95962244 0.96364686
 0.85900474]
VALID ACC : 0.8782966792254446, VALID LOSS : 0.3759065597821601
{'epoch': 8, 'train_loss': 0.29596197081818465, 'train_acc': 0.8965275546859028, 'valid_acc': 0.8782966792254446, 'val_loss': 0.3759065597821601, 'learning_rate': 5e-06}
Start Training: Epoch 10



Training:   0%|          | 1/1071 [00:00<10:12,  1.75it/s]

Training steps: 0 Loss: 0.205905944108963


Training:   9%|▉         | 101/1071 [00:58<09:17,  1.74it/s]

Training steps: 100 Loss: 0.10139839351177216


Training:  19%|█▉        | 201/1071 [01:56<08:46,  1.65it/s]

Training steps: 200 Loss: 0.26267924904823303


Training:  28%|██▊       | 301/1071 [02:54<07:21,  1.74it/s]

Training steps: 300 Loss: 0.28175273537635803


Training:  37%|███▋      | 401/1071 [03:53<06:40,  1.67it/s]

Training steps: 400 Loss: 0.3608699142932892


Training:  47%|████▋     | 501/1071 [04:50<05:26,  1.75it/s]

Training steps: 500 Loss: 0.23437736928462982


Training:  56%|█████▌    | 601/1071 [05:49<04:43,  1.66it/s]

Training steps: 600 Loss: 0.1793433129787445


Training:  65%|██████▌   | 701/1071 [06:46<03:31,  1.75it/s]

Training steps: 700 Loss: 0.13701900839805603


Training:  75%|███████▍  | 801/1071 [07:45<02:40,  1.69it/s]

Training steps: 800 Loss: 0.44974538683891296


Training:  84%|████████▍ | 901/1071 [08:43<01:36,  1.75it/s]

Training steps: 900 Loss: 0.35666123032569885


Training:  93%|█████████▎| 1001/1071 [09:41<00:42,  1.66it/s]

Training steps: 1000 Loss: 0.25438740849494934


Training: 100%|██████████| 1071/1071 [10:21<00:00,  1.72it/s]

TRAIN ACC : 0.9010834963932128, TRAIN LOSS : 0.28158282245797156



Training:   0%|          | 1/357 [00:00<01:55,  3.08it/s]

Validation steps: 0 Loss: 0.08266721665859222


Training:  28%|██▊       | 101/357 [00:31<01:20,  3.16it/s]

Validation steps: 100 Loss: 0.22253593802452087


Training:  56%|█████▋    | 201/357 [01:02<00:48,  3.25it/s]

Validation steps: 200 Loss: 0.44492632150650024


Training:  84%|████████▍ | 301/357 [01:34<00:17,  3.16it/s]

Validation steps: 300 Loss: 0.5673410296440125


Training: 100%|██████████| 357/357 [01:51<00:00,  3.19it/s]


              precision    recall  f1-score   support

        IT과학       0.78      0.92      0.84      1206
          경제       0.84      0.84      0.84      1555
          사회       0.83      0.75      0.79      1840
        생활문화       0.92      0.88      0.90      1484
          세계       0.91      0.93      0.92      1907
         스포츠       0.97      0.96      0.96      1733
          정치       0.92      0.92      0.92      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.91542289 0.84437299 0.7548913  0.87533693 0.92553749 0.96076168
 0.91528436]
VALID ACC : 0.8832033645842461, VALID LOSS : 0.35163645643521757
{'epoch': 9, 'train_loss': 0.28158282245797156, 'train_acc': 0.9010834963932128, 'valid_acc': 0.8832033645842461, 'val_loss': 0.35163645643521757, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 11


Training:   0%|          | 1/1071 [00:00<10:58,  1.63it/s]

Training steps: 0 Loss: 0.35304826498031616


Training:   9%|▉         | 101/1071 [00:59<09:14,  1.75it/s]

Training steps: 100 Loss: 0.27383893728256226


Training:  19%|█▉        | 201/1071 [01:58<08:46,  1.65it/s]

Training steps: 200 Loss: 0.15100529789924622


Training:  28%|██▊       | 301/1071 [02:57<07:20,  1.75it/s]

Training steps: 300 Loss: 0.4221010208129883


Training:  37%|███▋      | 401/1071 [03:55<06:39,  1.68it/s]

Training steps: 400 Loss: 0.25755685567855835


Training:  47%|████▋     | 501/1071 [04:54<05:22,  1.77it/s]

Training steps: 500 Loss: 0.3886969983577728


Training:  56%|█████▌    | 601/1071 [05:52<04:41,  1.67it/s]

Training steps: 600 Loss: 0.32849740982055664


Training:  65%|██████▌   | 701/1071 [06:50<03:34,  1.73it/s]

Training steps: 700 Loss: 0.36262816190719604


Training:  75%|███████▍  | 801/1071 [07:49<02:41,  1.67it/s]

Training steps: 800 Loss: 0.29272645711898804


Training:  84%|████████▍ | 901/1071 [08:48<01:36,  1.76it/s]

Training steps: 900 Loss: 0.0848953053355217


Training:  93%|█████████▎| 1001/1071 [09:46<00:41,  1.67it/s]

Training steps: 1000 Loss: 0.3371136486530304


Training: 100%|██████████| 1071/1071 [10:27<00:00,  1.71it/s]

TRAIN ACC : 0.9076253614088373, TRAIN LOSS : 0.2665508754662717



Training:   0%|          | 1/357 [00:00<01:57,  3.04it/s]

Validation steps: 0 Loss: 0.05792277678847313


Training:  28%|██▊       | 101/357 [00:31<01:19,  3.20it/s]

Validation steps: 100 Loss: 0.209504634141922


Training:  56%|█████▋    | 201/357 [01:02<00:49,  3.15it/s]

Validation steps: 200 Loss: 0.4282291829586029


Training:  84%|████████▍ | 301/357 [01:34<00:17,  3.19it/s]

Validation steps: 300 Loss: 0.6751582026481628


Training: 100%|██████████| 357/357 [01:51<00:00,  3.20it/s]

              precision    recall  f1-score   support

        IT과학       0.85      0.83      0.84      1206
          경제       0.80      0.88      0.84      1555
          사회       0.85      0.72      0.78      1840
        생활문화       0.88      0.92      0.90      1484
          세계       0.92      0.92      0.92      1907
         스포츠       0.96      0.97      0.96      1733
          정치       0.90      0.93      0.92      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.8291874  0.87588424 0.72445652 0.92183288 0.91976927 0.96768609
 0.93246445]
VALID ACC : 0.88215193200736, VALID LOSS : 0.3638805835729554
{'epoch': 10, 'train_loss': 0.2665508754662717, 'train_acc': 0.9076253614088373, 'valid_acc': 0.88215193200736, 'val_loss': 0.3638805835729554, 'learning_rate': 5e-06}
Start Training: Epoch 12



Training:   0%|          | 1/1071 [00:00<09:59,  1.78it/s]

Training steps: 0 Loss: 0.3162069320678711


Training:   9%|▉         | 101/1071 [00:58<09:15,  1.74it/s]

Training steps: 100 Loss: 0.2801479995250702


Training:  19%|█▉        | 201/1071 [01:56<08:40,  1.67it/s]

Training steps: 200 Loss: 0.3461258113384247


Training:  28%|██▊       | 301/1071 [02:54<07:13,  1.78it/s]

Training steps: 300 Loss: 0.23695588111877441


Training:  37%|███▋      | 401/1071 [03:52<06:38,  1.68it/s]

Training steps: 400 Loss: 0.4009895622730255


Training:  47%|████▋     | 501/1071 [04:50<05:22,  1.77it/s]

Training steps: 500 Loss: 0.31255003809928894


Training:  56%|█████▌    | 601/1071 [05:49<04:43,  1.66it/s]

Training steps: 600 Loss: 0.4202066957950592


Training:  65%|██████▌   | 701/1071 [06:47<03:30,  1.76it/s]

Training steps: 700 Loss: 0.2850100100040436


Training:  75%|███████▍  | 801/1071 [07:45<02:40,  1.68it/s]

Training steps: 800 Loss: 0.16833588480949402


Training:  84%|████████▍ | 901/1071 [08:43<01:36,  1.76it/s]

Training steps: 900 Loss: 0.10952523350715637


Training:  93%|█████████▎| 1001/1071 [09:41<00:41,  1.68it/s]

Training steps: 1000 Loss: 0.10044267028570175


Training: 100%|██████████| 1071/1071 [10:21<00:00,  1.72it/s]

TRAIN ACC : 0.9112467509710581, TRAIN LOSS : 0.2562241248654273



Training:   0%|          | 1/357 [00:00<01:59,  2.97it/s]

Validation steps: 0 Loss: 0.039959169924259186


Training:  28%|██▊       | 101/357 [00:31<01:17,  3.29it/s]

Validation steps: 100 Loss: 0.2968493700027466


Training:  56%|█████▋    | 201/357 [01:02<00:48,  3.20it/s]

Validation steps: 200 Loss: 0.5129906535148621


Training:  84%|████████▍ | 301/357 [01:33<00:17,  3.16it/s]

Validation steps: 300 Loss: 0.550713837146759


Training: 100%|██████████| 357/357 [01:51<00:00,  3.21it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.84      0.84      1206
          경제       0.83      0.85      0.84      1555
          사회       0.83      0.74      0.78      1840
        생활문화       0.89      0.91      0.90      1484
          세계       0.90      0.93      0.92      1907
         스포츠       0.95      0.98      0.96      1733
          정치       0.92      0.91      0.91      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.84411277 0.85209003 0.74456522 0.90633423 0.93340325 0.97691864
 0.90758294]
VALID ACC : 0.8817138351003242, VALID LOSS : 0.3647000917210048
{'epoch': 11, 'train_loss': 0.2562241248654273, 'train_acc': 0.9112467509710581, 'valid_acc': 0.8817138351003242, 'val_loss': 0.3647000917210048, 'learning_rate': 5e-06}
Start Training: Epoch 13



Training:   0%|          | 1/1071 [00:00<10:04,  1.77it/s]

Training steps: 0 Loss: 0.072475865483284


Training:   9%|▉         | 101/1071 [00:58<09:12,  1.76it/s]

Training steps: 100 Loss: 0.2835122346878052


Training:  19%|█▉        | 201/1071 [01:56<08:46,  1.65it/s]

Training steps: 200 Loss: 0.24189279973506927


Training:  28%|██▊       | 301/1071 [02:54<07:18,  1.76it/s]

Training steps: 300 Loss: 0.1137741208076477


Training:  37%|███▋      | 401/1071 [03:52<06:36,  1.69it/s]

Training steps: 400 Loss: 0.20035910606384277


Training:  47%|████▋     | 501/1071 [04:50<05:26,  1.74it/s]

Training steps: 500 Loss: 0.2990553379058838


Training:  56%|█████▌    | 601/1071 [05:48<04:42,  1.66it/s]

Training steps: 600 Loss: 0.0333264023065567


Training:  65%|██████▌   | 701/1071 [06:46<03:28,  1.77it/s]

Training steps: 700 Loss: 0.29953131079673767


Training:  75%|███████▍  | 801/1071 [07:43<02:39,  1.70it/s]

Training steps: 800 Loss: 0.0922568142414093


Training:  84%|████████▍ | 901/1071 [08:41<01:35,  1.78it/s]

Training steps: 900 Loss: 0.2942379415035248


Training:  93%|█████████▎| 1001/1071 [09:39<00:41,  1.68it/s]

Training steps: 1000 Loss: 0.31208744645118713


Training: 100%|██████████| 1071/1071 [10:19<00:00,  1.73it/s]

TRAIN ACC : 0.9154230308694256, TRAIN LOSS : 0.24281824840342298



Training:   0%|          | 1/357 [00:00<02:00,  2.96it/s]

Validation steps: 0 Loss: 0.0505281463265419


Training:  28%|██▊       | 101/357 [00:31<01:19,  3.21it/s]

Validation steps: 100 Loss: 0.2589377164840698


Training:  56%|█████▋    | 201/357 [01:01<00:47,  3.29it/s]

Validation steps: 200 Loss: 0.5479438900947571


Training:  84%|████████▍ | 301/357 [01:32<00:17,  3.24it/s]

Validation steps: 300 Loss: 0.785088300704956


Training: 100%|██████████| 357/357 [01:49<00:00,  3.26it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.87      0.84      1206
          경제       0.81      0.86      0.83      1555
          사회       0.87      0.70      0.77      1840
        생활문화       0.88      0.92      0.90      1484
          세계       0.91      0.93      0.92      1907
         스포츠       0.95      0.98      0.96      1733
          정치       0.91      0.92      0.91      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.86981758 0.85980707 0.6951087  0.91576819 0.93340325 0.97864974
 0.91528436]
VALID ACC : 0.8801366862349952, VALID LOSS : 0.382019029909364
{'epoch': 12, 'train_loss': 0.24281824840342298, 'train_acc': 0.9154230308694256, 'valid_acc': 0.8801366862349952, 'val_loss': 0.382019029909364, 'learning_rate': 5e-06}
EarlyStopping counter: 3 out of 3
************************************************** auc_a




In [9]:
torch.cuda.empty_cache()

## Inference

In [10]:
def inference_main():
    args = parse_args()
    args['vocab'] = make_vocab(args)
    args.model_name = "temp"
    preprocess = Preprocess(args)
    preprocess.load_test_data()
    test_data = preprocess.test_data

    print(f"size of test data : {len(test_data)}")
    torch.cuda.empty_cache()
    # del model
    inference(args, test_data)

inference_main()

category 0 reading end, size : 1446
category 1 reading end, size : 1522
category 2 reading end, size : 2107
category 3 reading end, size : 1853
category 4 reading end, size : 2006
category 5 reading end, size : 1527
category 6 reading end, size : 1692
size of test data : 9131
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_1.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.embeddings.token_label_0_type_embeddings.weight', 'c

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_1.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [01:41<00:00,  2.81it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/vocab20_multi_layer/output_1.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_2.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.embeddings.token_label_0_type_embeddings.weight', 'c

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_2.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [01:32<00:00,  3.10it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/vocab20_multi_layer/output_2.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_3.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.embeddings.token_label_0_type_embeddings.weight', 'c

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_3.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [01:18<00:00,  3.63it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/vocab20_multi_layer/output_3.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_4.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.embeddings.token_label_0_type_embeddings.weight', 'c

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_4.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:30<00:00,  9.34it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/vocab20_multi_layer/output_4.csv
writing prediction : /content/drive/MyDrive/KLUE_TC/output/vocab20_multi_layer/output_softvote.csv
