In [1]:
!nvidia-smi

Wed Jul 28 03:03:23 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    25W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Directory 설정, 구글 드라이브 import

In [2]:
cur_dir = '/content/drive/MyDrive/KLUE_TC'

## Utils

In [3]:
!pip install adamp
!pip install transformers

Collecting adamp
  Downloading adamp-0.3.0.tar.gz (5.1 kB)
Building wheels for collected packages: adamp
  Building wheel for adamp (setup.py) ... [?25l[?25hdone
  Created wheel for adamp: filename=adamp-0.3.0-py3-none-any.whl size=5998 sha256=19fd61fd15fee4f273f053078361c15f6829e14513e73e91c01c853e4fedbc1e
  Stored in directory: /root/.cache/pip/wheels/bb/95/21/ced2d2cb9944e3a72e58fece7958973eed3fd8d0aeb6e2e450
Successfully built adamp
Installing collected packages: adamp
Successfully installed adamp-0.3.0
Collecting transformers
  Downloading transformers-4.9.1-py3-none-any.whl (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 15.6 MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 65.1 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
[K     |███████████

In [4]:
import os
import random
import torch
import numpy as np
from torch import nn

from torch.optim import Adam, AdamW, SGD
from adamp import AdamP
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR, ExponentialLR, \
    CosineAnnealingWarmRestarts
from transformers import get_linear_schedule_with_warmup
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification


def set_seeds(seed=42):
    # 랜덤 시드를 설정하여 매 코드를 실행할 때마다 동일한 결과를 얻게 합니다.
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.benchmark = False


def save_checkpoint(state, model_dir, model_filename):
    print('saving model ...')
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    #torch.save(state, os.path.join(model_dir, model_filename))
    torch.save(state, os.path.join(model_filename))


def get_optimizer(model, args):
    if args.optimizer == 'adam':
        optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adamW':
        optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adamP':
        optimizer = AdamP(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'SGD':
        optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    # 모든 parameter들의 grad값을 0으로 초기화
    optimizer.zero_grad()

    return optimizer


def get_scheduler(optimizer, args):
    if args.scheduler == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer, patience=args.plateau_patience, factor=args.plateau_factor, mode='max',
                                      verbose=True)
    elif args.scheduler == 'linear_warmup':
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps,
                                                    num_training_steps=args.total_steps)
    elif args.scheduler == 'step_lr':
        scheduler = StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)
    elif args.scheduler == 'exp_lr':
        scheduler = ExponentialLR(optimizer, gamma=args.gamma)
    elif args.scheduler == 'cosine_annealing':
        scheduler = CosineAnnealingLR(optimizer, T_max=args.t_max, eta_min=args.eta_min)
    elif args.scheduler == 'cosine_annealing_warmstart':
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=args.T_0, T_mult=args.T_mult, eta_min=args.eta_min,
                                                last_epoch=-1)

    return scheduler


def update_params(loss, model, optimizer, batch_idx, max_len, args):
    if args.gradient_accumulation:
        # normalize loss to account for batch accumulation
        loss = loss / args.accum_iter 

        # backward pass
        loss.backward()

        # weights update
        if ((batch_idx + 1) % args.accum_iter == 0) or (batch_idx + 1 == max_len):
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
            optimizer.step()
            optimizer.zero_grad()
    else:
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
        optimizer.step()
        optimizer.zero_grad()


def load_tokenizer(args):
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name
        else args.model_name_or_path,
        use_fast=True,
    )

    return tokenizer


def load_model(args, model_name=None):
    if not model_name:
        model_name = args.model_name
    model_path = os.path.join(args.model_dir, model_name)
    print("Loading Model from:", model_path)
    load_state = torch.load(model_path)
    #load_state = torch.load(model_name)

    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        args.config_name
        if args.config_name
        else args.model_name_or_path,
    )

    config.num_labels = 7

    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    )

    model.classifier = nn.Sequential(
        nn.Linear(1024, 1024),
        nn.Dropout(p=0.3, inplace=False),
        nn.Linear(1024, 512),
        nn.Dropout(p=0.3, inplace=False),
        nn.Linear(512, 7),
    )

    #model.classifier.dropout = nn.Dropout(p=0.3, inplace = False)

    model.load_state_dict(load_state['state_dict'], strict=True)

    model = model.to(args.device)

    print("Loading Model from:", model_path, "...Finished.")

    return model


def get_model(args):
    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        args.config_name
        if args.config_name
        else args.model_name_or_path,
    )

    config.num_labels = 7
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    )

    model.classifier = nn.Sequential(
        nn.Linear(1024, 1024),
        nn.Dropout(p=0.4, inplace=False),
        nn.Linear(1024, 512),
        nn.Dropout(p=0.4, inplace=False),
        nn.Linear(512, 7),
    )

    #model.classifier.dropout = nn.Dropout(p=0.3, inplace = False)

    model = model.to(args.device)

    return model


def get_loaders(args, train, valid, is_inference=False):
    pin_memory = True
    train_loader, valid_loader = None, None

    if is_inference:
        test_dataset = YNAT_dataset(args, valid, is_inference)
        test_loader = torch.utils.data.DataLoader(test_dataset, num_workers=args.num_workers, shuffle=False,
                                                  batch_size=args.batch_size, pin_memory=pin_memory)
        return test_loader

    if train is not None:
        train_dataset = YNAT_dataset(args, train, is_inference)
        train_loader = torch.utils.data.DataLoader(train_dataset, num_workers=args.num_workers, shuffle=True,
                                                   batch_size=args.batch_size, pin_memory=pin_memory)
    if valid is not None:
        valid_dataset = YNAT_dataset(args, valid, is_inference)
        valid_loader = torch.utils.data.DataLoader(valid_dataset, num_workers=args.num_workers, shuffle=False,
                                                   batch_size=args.batch_size, pin_memory=pin_memory)

    return train_loader, valid_loader


# loss계산하고 parameter update!
def compute_loss(preds, targets, args):
    """
    Args :
        preds   : (batch_size, max_seq_len)
        targets : (batch_size, max_seq_len)
    """
    # print(preds, targets)
    loss = get_criterion(preds, targets, args)
    # 마지막 시퀀스에 대한 값만 loss 계산
    # loss = loss[:, -1]
    # loss = torch.mean(loss)
    return loss


def get_criterion(pred, target, args):
    if args.criterion == 'BCE':
        loss = nn.BCELoss(reduction="none")
    elif args.criterion == "BCELogit":
        loss = nn.BCEWithLogitsLoss(reduction="none")
    elif args.criterion == "MSE":
        loss = nn.MSELoss(reduction="none")
    elif args.criterion == "L1":
        loss = nn.L1Loss(reduction="none")
    elif args.criterion == "CE":
        #weights = [1,1,2,1,1,1,1] #as class distribution
        #class_weights = torch.FloatTensor(weights).cuda()
        #loss = nn.CrossEntropyLoss(weight=class_weights)
        loss = nn.CrossEntropyLoss()
    # NLL, CrossEntropy not available
    return loss(pred, target)


## Dataloader

In [5]:
import os
import torch
import pandas as pd


class Preprocess:
    def __init__(self, args):
        self.args = args
        self.train_data = None
        self.test_data = None

    def load_data(self, file_name):
        csv_file_name = os.path.join(self.args.data_dir, file_name)
        df = pd.read_csv(csv_file_name)
        #del df['Unnamed: 0']
        return df.values

    def load_train_data(self):
        self.train_data = self.load_data('train_data.csv')

    def load_test_data(self):
        self.test_data = self.load_data('test_data.csv')


class YNAT_dataset(torch.utils.data.Dataset):
    def __init__(self, args, data, is_inference):
        self.args = args
        self.data = data
        self.is_inference = is_inference

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data[index]
        element = [row[i] for i in range(len(row))]
        #print(type(row))
        # np.array -> torch.tensor 형변환
        #for i, col in enumerate(row):
        #    if type(col) == str:
        #        pass
        #    else:
        #        row[i] = torch.tensor(col)

        return element



## Trainer

In [6]:
from sklearn.metrics import accuracy_score
from torch.nn.functional import one_hot
from tqdm import tqdm
from sklearn import metrics


def run(args, tokenizer, train_data, valid_data, cv_count):
    train_loader, valid_loader = get_loaders(args, train_data, valid_data)

    # only when using warmup scheduler
    # args.total_steps = int(len(train_loader.dataset) / args.batch_size) * args.n_epochs
    # args.warmup_steps = int(args.total_steps * args.warmup_ratio)

    model = get_model(args)
    optimizer = get_optimizer(model, args)
    scheduler = get_scheduler(optimizer, args)

    best_acc = -1
    early_stopping_counter = 0
    for epoch in range(args.n_epochs):

        print(f"Start Training: Epoch {epoch + 1}")

        if not args.cv_strategy:
            model_name = args.run_name
        else:
            model_name = f"{args.run_name.split('.pt')[0]}_{cv_count}.pt"

        # TRAIN
        train_acc, train_loss = train(args, model, tokenizer, train_loader, optimizer)

        # VALID
        acc, val_loss = validate(args, model, tokenizer, valid_loader)

        # TODO: model save or early stopping
        if args.scheduler == 'plateau':
            last_lr = optimizer.param_groups[0]['lr']
        else:
            last_lr = scheduler.get_last_lr()[0]

        print({"epoch": epoch, "train_loss": train_loss, "train_acc": train_acc,
                   "valid_acc": acc, "val_loss": val_loss, "learning_rate": last_lr})

        if acc > best_acc:
            best_acc = acc
            # torch.nn.DataParallel로 감싸진 경우 원래의 model을 가져옵니다.
            model_to_save = model.module if hasattr(model, 'module') else model
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model_to_save.state_dict(),
            },
                args.model_dir, model_name,
            )
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= args.patience:
                print(f'EarlyStopping counter: {early_stopping_counter} out of {args.patience}')
                break

        # scheduler
        if args.scheduler == 'plateau':
            scheduler.step(best_acc)
        else:
            scheduler.step()

    return best_acc


def inference(args, test_data):
    # ckpt_file_names = []
    all_fold_preds = []
    all_fold_argmax_preds = []

    if not args.cv_strategy:
        ckpt_file_names = [args.model_name]
    else:
        ckpt_file_names = [f"{args.model_name.split('.pt')[0]}_{i + 1}.pt" for i in range(args.fold_num)]

    tokenizer = load_tokenizer(args)

    for fold_idx, ckpt in enumerate(ckpt_file_names):
        model = load_model(args, ckpt)
        model.eval()
        test_loader = get_loaders(args, None, test_data, True)

        total_preds = []
        total_argmax_preds = []
        total_ids = []

        for step, batch in tqdm(enumerate(test_loader), desc='Inferencing', total=len(test_loader)):
            idx, text = batch
            tokenized_examples = tokenizer(
                text,
                max_length=args.max_seq_len,
                padding="max_length",
                return_tensors="pt"
            ).to(args.device)

            preds = model(**tokenized_examples)

            logits = preds['logits']
            logits = logits[:,0,:]
            argmax_logits = torch.argmax(logits, dim=1)

            if args.device == 'cuda':
                argmax_preds = argmax_logits.to('cpu').detach().numpy()
                preds = logits.to('cpu').detach().numpy()
            else:  # cpu
                argmax_preds = argmax_logits.detach().numpy()
                preds = logits.detach().numpy()

            total_preds += list(preds)
            total_argmax_preds += list(argmax_preds)
            total_ids += list(idx)

        all_fold_preds.append(total_preds)
        all_fold_argmax_preds.append(total_argmax_preds)

        output_file_name = "output.csv" if not args.cv_strategy else f"output_{fold_idx + 1}.csv"
        write_path = os.path.join(args.output_dir, output_file_name)
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(write_path, 'w', encoding='utf8') as w:
            print("writing prediction : {}".format(write_path))
            w.write("index,topic_idx\n")
            for index, p in zip(total_ids, total_argmax_preds):
                w.write('{},{}\n'.format(index, p))

    if len(all_fold_preds) > 1:
        # Soft voting ensemble
        votes = np.sum(all_fold_preds, axis=0)
        votes = np.argmax(votes, axis=1)

        write_path = os.path.join(args.output_dir, "output_softvote.csv")
        #write_path = "output_softvote.csv"
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(write_path, 'w', encoding='utf8') as w:
            print("writing prediction : {}".format(write_path))
            w.write("index,topic_idx\n")
            for id, p in zip(total_ids, votes):
                w.write('{},{}\n'.format(id, p))


def train(args, model, tokenizer, train_loader, optimizer):
    model.train()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in tqdm(enumerate(train_loader), desc='Training', total=len(train_loader)):
        idx, text, label = batch
        label = label.to(args.device)
        # print(idx[:10])
        # print(text[:10])
        # print(label[:10])
        tokenized_examples = tokenizer(
            text,
            max_length=args.max_seq_len,
            padding="max_length",
            return_tensors="pt"
        ).to(args.device)
        
        # tokenize
        # 모델의 입력으로
        # label은 one-hot?
        # loss 주고
        # argmax를 golden

        preds = model(**tokenized_examples)
        logits = preds['logits']
        logits = logits[:,0,:]
        softmax_logits = nn.Softmax(dim=1)(logits)
        argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        loss = compute_loss(logits,
                            label, args)

        # print(loss)

        update_params(loss, model, optimizer, step, len(train_loader), args)

        if step % args.log_steps == 0:
            print(f"Training steps: {step} Loss: {str(loss.item())}")

        if args.device == 'cuda':
            argmax_logits = argmax_logits.to('cpu').detach().numpy()
            label = label.to('cpu').detach().numpy()
            loss = loss.to('cpu').detach().numpy()
        else:  # cpu
            argmax_logits = argmax_logits.detach().numpy()
            label = label.detach().numpy()
            loss = loss.detach().numpy()

        total_preds.append(argmax_logits)
        total_targets.append(label)
        losses.append(loss)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    acc = accuracy_score(total_targets, total_preds)
    loss_avg = sum(losses) / len(losses)
    print(f'TRAIN ACC : {acc}, TRAIN LOSS : {loss_avg}')
    return acc, loss_avg


def validate(args, model, tokenizer, valid_loader):
    model.eval()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in tqdm(enumerate(valid_loader), desc='Training', total=len(valid_loader)):
        idx, text, label = batch
        label = label.to(args.device)
        tokenized_examples = tokenizer(
            text,
            max_length=args.max_seq_len,
            padding="max_length",
            return_tensors="pt"
        ).to(args.device)

        # tokenize
        # 모델의 입력으로
        # label은 one-hot?
        # loss 주고
        # argmax를 golden

        preds = model(**tokenized_examples)
        logits = preds['logits']
        logits = logits[:,0,:]
        softmax_logits = nn.Softmax(dim=1)(logits)
        argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        loss = compute_loss(logits,
                            label, args)

        if step % args.log_steps == 0:
            print(f"Validation steps: {step} Loss: {str(loss.item())}")

        if args.device == 'cuda':
            argmax_logits = argmax_logits.to('cpu').detach().numpy()
            label = label.to('cpu').detach().numpy()
            loss = loss.to('cpu').detach().numpy()
        else:  # cpu
            argmax_logits = argmax_logits.detach().numpy()
            label = label.detach().numpy()
            loss = loss.detach().numpy()

        total_preds.append(argmax_logits)
        total_targets.append(label)
        losses.append(loss)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    target_names = ['IT과학', '경제', '사회', '생활문화', '세계', '스포츠', '정치']
    print(metrics.classification_report(total_targets, total_preds, target_names=target_names))
    matrix = metrics.confusion_matrix(total_targets, total_preds)
    print(matrix.diagonal()/matrix.sum(axis=1))

    acc = accuracy_score(total_targets, total_preds)
    loss_avg = sum(losses) / len(losses)
    print(f'VALID ACC : {acc}, VALID LOSS : {loss_avg}')
    return acc, loss_avg


## Train

In [7]:
import torch
from sklearn.model_selection import KFold, StratifiedKFold
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
from datetime import datetime
from pytz import timezone


def main(args):
    if not args.run_name:
        args.run_name = datetime.now(timezone("Asia/Seoul")).strftime("%Y-%m-%d-%H:%M:%S")

    set_seeds(args.seed)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    args.device = device

    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name
        else args.model_name_or_path,
        use_fast=True,
    )

    preprocess = Preprocess(args)
    preprocess.load_train_data()
    train_data_origin = preprocess.train_data

    print(f"Size of train data : {len(train_data_origin)}")
    # print(f"size of test data : {len(test_data)}")

    if args.cv_strategy == 'random':
        kf = KFold(n_splits=args.fold_num, shuffle=True)
        splits = kf.split(X=train_data_origin)
    else:
        # default
        # 여기 각 label로 바꿔야됨
        train_labels = [sequence[-1] for sequence in train_data_origin]
        skf = StratifiedKFold(n_splits=args.fold_num, shuffle=True)
        splits = skf.split(X=train_data_origin, y=train_labels)

    acc_avg = 0
    for fold_num, (train_index, valid_index) in enumerate(splits):
        train_data = train_data_origin[train_index]
        valid_data = train_data_origin[valid_index]
        best_acc = run(args, tokenizer, train_data, valid_data, fold_num + 1)

        if not args.cv_strategy:
            break

        acc_avg += best_acc

    if args.cv_strategy:
        acc_avg /= args.fold_num

        print("*" * 50, 'auc_avg', "*" * 50)
        print(acc_avg)


## Run

In [8]:
import argparse
import easydict

def parse_args():
    args = easydict.EasyDict({'run_name' : 'temp',
                             'seed':42,
                             'device' :'cuda',
                             'data_dir': cur_dir + '/data/open/',
                             'model_dir' : '/content/drive/MyDrive/KLUE_TC/models/',
                             'model_name_or_path' : 'klue/roberta-large',
                             'config_name' : None,
                             'tokenizer_name' : None,
                             'output_dir' : '/content/drive/MyDrive/KLUE_TC/output/loss',
                             
                             'accum_iter' : 8,
                             'gradient_accumulation' : True,

                             'cv_strategy' : 'stratified',
                             'fold_num' : 4,

                             'num_workers' : 1,

                             # 훈련
                             'n_epochs' : 5,
                             'batch_size' : 32,
                             'lr' : 5e-6,
                             'clip_grad' : 10,
                             'patience' : 5,
                             'max_seq_len' : 40,

                             # Optimizer
                             'optimizer' : 'adamP',

                             # Optimizer-parameters
                             'weight_decay' : 0.05,
                             'momentum' : 0.9,

                             # Scheduler
                             'scheduler' : 'step_lr',

                             # Scheduler-parameters
                             # plateau
                             'plateau_patience' : 10,
                             'plateau_factor' : 0.5,
                              
                             't_max' : 10,
                             'T_0' : 10,
                             'T_mult' : 2,
                             '--eta_min' : 0.01,

                             # linear_warmup
                             'warmup_ratio' : 0.3,

                             # Step LR
                             'step_size' : 50,
                             'gamma' : 0.1,

                             'criterion' : 'CE',

                             'log_steps' : 100})
    
    return args

In [9]:
if __name__ == '__main__':
    args = parse_args()
    main(args)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=337.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=547.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=248477.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=173.0, style=ProgressStyle(description_…


Size of train data : 45654


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1346854671.0, style=ProgressStyle(descr…




Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Start Training: Epoch 1


Training:   0%|          | 1/1070 [00:00<06:15,  2.84it/s]

Training steps: 0 Loss: 1.9070595502853394


Training:   9%|▉         | 101/1070 [00:23<03:32,  4.55it/s]

Training steps: 100 Loss: 1.9672377109527588


Training:  19%|█▉        | 201/1070 [00:46<03:30,  4.13it/s]

Training steps: 200 Loss: 1.6293144226074219


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.52it/s]

Training steps: 300 Loss: 1.4402148723602295


Training:  37%|███▋      | 401/1070 [01:32<02:42,  4.11it/s]

Training steps: 400 Loss: 1.0429401397705078


Training:  47%|████▋     | 501/1070 [01:55<02:05,  4.52it/s]

Training steps: 500 Loss: 0.9223495125770569


Training:  56%|█████▌    | 601/1070 [02:18<01:54,  4.09it/s]

Training steps: 600 Loss: 0.4544483423233032


Training:  66%|██████▌   | 701/1070 [02:41<01:21,  4.53it/s]

Training steps: 700 Loss: 0.6042417883872986


Training:  75%|███████▍  | 801/1070 [03:04<01:05,  4.11it/s]

Training steps: 800 Loss: 0.49643972516059875


Training:  84%|████████▍ | 901/1070 [03:27<00:37,  4.54it/s]

Training steps: 900 Loss: 0.5173304677009583


Training:  94%|█████████▎| 1001/1070 [03:50<00:16,  4.09it/s]

Training steps: 1000 Loss: 0.45720022916793823


Training: 100%|██████████| 1070/1070 [04:06<00:00,  4.34it/s]

TRAIN ACC : 0.6664719626168224, TRAIN LOSS : 0.9696927337863734



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.29885202646255493


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.24it/s]

Validation steps: 100 Loss: 0.5412469506263733


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.17it/s]

Validation steps: 200 Loss: 0.455304890871048


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.30it/s]

Validation steps: 300 Loss: 0.19735848903656006


Training: 100%|██████████| 357/357 [00:25<00:00, 14.21it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85      1206
          경제       0.87      0.80      0.83      1555
          사회       0.79      0.79      0.79      1841
        생활문화       0.90      0.89      0.90      1483
          세계       0.93      0.92      0.93      1908
         스포츠       0.96      0.99      0.98      1734
          정치       0.92      0.92      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.89      0.89     11414

[0.88971808 0.79807074 0.78544269 0.89480782 0.92348008 0.99192618
 0.91938352]
VALID ACC : 0.8866304538286315, VALID LOSS : 0.40321529328990047
{'epoch': 0, 'train_loss': 0.9696927337863734, 'train_acc': 0.6664719626168224, 'valid_acc': 0.8866304538286315, 'val_loss': 0.40321529328990047, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1070 [00:00<04:23,  4.06it/s]

Training steps: 0 Loss: 0.23874521255493164


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.53it/s]

Training steps: 100 Loss: 0.46513769030570984


Training:  19%|█▉        | 201/1070 [00:46<03:32,  4.10it/s]

Training steps: 200 Loss: 0.4840695559978485


Training:  28%|██▊       | 301/1070 [01:09<02:49,  4.53it/s]

Training steps: 300 Loss: 0.2654828727245331


Training:  37%|███▋      | 401/1070 [01:32<02:44,  4.08it/s]

Training steps: 400 Loss: 0.3273584842681885


Training:  47%|████▋     | 501/1070 [01:55<02:05,  4.55it/s]

Training steps: 500 Loss: 0.22063785791397095


Training:  56%|█████▌    | 601/1070 [02:18<01:55,  4.07it/s]

Training steps: 600 Loss: 0.586391031742096


Training:  66%|██████▌   | 701/1070 [02:41<01:21,  4.53it/s]

Training steps: 700 Loss: 0.15054024755954742


Training:  75%|███████▍  | 801/1070 [03:04<01:05,  4.08it/s]

Training steps: 800 Loss: 0.4524767994880676


Training:  84%|████████▍ | 901/1070 [03:27<00:37,  4.52it/s]

Training steps: 900 Loss: 0.1467886120080948


Training:  94%|█████████▎| 1001/1070 [03:50<00:16,  4.10it/s]

Training steps: 1000 Loss: 0.3047342002391815


Training: 100%|██████████| 1070/1070 [04:06<00:00,  4.34it/s]

TRAIN ACC : 0.8850759345794392, TRAIN LOSS : 0.38593963074071386



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.23665550351142883


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.19it/s]

Validation steps: 100 Loss: 0.5071125030517578


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.19it/s]

Validation steps: 200 Loss: 0.3790842294692993


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.21it/s]

Validation steps: 300 Loss: 0.16493235528469086


Training: 100%|██████████| 357/357 [00:25<00:00, 14.21it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.87      0.85      1206
          경제       0.87      0.81      0.84      1555
          사회       0.77      0.82      0.80      1841
        생활문화       0.92      0.88      0.90      1483
          세계       0.93      0.93      0.93      1908
         스포츠       0.95      0.99      0.97      1734
          정치       0.94      0.88      0.91      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.88      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.86981758 0.81286174 0.82074959 0.88334457 0.93396226 0.99192618
 0.88085359]
VALID ACC : 0.886805677238479, VALID LOSS : 0.3601946984665568
{'epoch': 1, 'train_loss': 0.38593963074071386, 'train_acc': 0.8850759345794392, 'valid_acc': 0.886805677238479, 'val_loss': 0.3601946984665568, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1070 [00:00<04:36,  3.86it/s]

Training steps: 0 Loss: 0.39415451884269714


Training:   9%|▉         | 101/1070 [00:23<03:33,  4.53it/s]

Training steps: 100 Loss: 0.2581581771373749


Training:  19%|█▉        | 201/1070 [00:46<03:32,  4.08it/s]

Training steps: 200 Loss: 0.14057351648807526


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.52it/s]

Training steps: 300 Loss: 0.24799057841300964


Training:  37%|███▋      | 401/1070 [01:32<02:43,  4.08it/s]

Training steps: 400 Loss: 0.28767576813697815


Training:  47%|████▋     | 501/1070 [01:55<02:05,  4.52it/s]

Training steps: 500 Loss: 0.4987742304801941


Training:  56%|█████▌    | 601/1070 [02:18<01:54,  4.10it/s]

Training steps: 600 Loss: 0.16908057034015656


Training:  66%|██████▌   | 701/1070 [02:41<01:21,  4.50it/s]

Training steps: 700 Loss: 0.23588311672210693


Training:  75%|███████▍  | 801/1070 [03:04<01:05,  4.08it/s]

Training steps: 800 Loss: 0.394882470369339


Training:  84%|████████▍ | 901/1070 [03:27<00:37,  4.52it/s]

Training steps: 900 Loss: 0.16385585069656372


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.07it/s]

Training steps: 1000 Loss: 0.24489568173885345


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.33it/s]

TRAIN ACC : 0.8984228971962617, TRAIN LOSS : 0.32701705129834535



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.24858541786670685


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.14it/s]

Validation steps: 100 Loss: 0.4818074405193329


Training:  57%|█████▋    | 202/357 [00:14<00:11, 13.99it/s]

Validation steps: 200 Loss: 0.32036200165748596


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.12it/s]

Validation steps: 300 Loss: 0.1610119491815567


Training: 100%|██████████| 357/357 [00:25<00:00, 14.09it/s]


              precision    recall  f1-score   support

        IT과학       0.84      0.87      0.85      1206
          경제       0.86      0.82      0.84      1555
          사회       0.76      0.83      0.79      1841
        생활문화       0.93      0.88      0.90      1483
          세계       0.93      0.94      0.94      1908
         스포츠       0.97      0.98      0.98      1734
          정치       0.93      0.89      0.91      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.86981758 0.82186495 0.82509506 0.87525287 0.93867925 0.98154556
 0.88678127]
VALID ACC : 0.8877694059926406, VALID LOSS : 0.3494186485629706
{'epoch': 2, 'train_loss': 0.32701705129834535, 'train_acc': 0.8984228971962617, 'valid_acc': 0.8877694059926406, 'val_loss': 0.3494186485629706, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1070 [00:00<04:23,  4.05it/s]

Training steps: 0 Loss: 0.43254753947257996


Training:   9%|▉         | 101/1070 [00:23<03:33,  4.53it/s]

Training steps: 100 Loss: 0.3424892723560333


Training:  19%|█▉        | 201/1070 [00:46<03:32,  4.09it/s]

Training steps: 200 Loss: 0.10152821987867355


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.51it/s]

Training steps: 300 Loss: 0.12485767900943756


Training:  37%|███▋      | 401/1070 [01:32<02:42,  4.11it/s]

Training steps: 400 Loss: 0.2579312324523926


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.50it/s]

Training steps: 500 Loss: 0.39016425609588623


Training:  56%|█████▌    | 601/1070 [02:18<01:54,  4.10it/s]

Training steps: 600 Loss: 0.4336763620376587


Training:  66%|██████▌   | 701/1070 [02:41<01:21,  4.53it/s]

Training steps: 700 Loss: 0.07577954232692719


Training:  75%|███████▍  | 801/1070 [03:04<01:06,  4.07it/s]

Training steps: 800 Loss: 0.14407874643802643


Training:  84%|████████▍ | 901/1070 [03:27<00:37,  4.53it/s]

Training steps: 900 Loss: 0.45252725481987


Training:  94%|█████████▎| 1001/1070 [03:50<00:16,  4.11it/s]

Training steps: 1000 Loss: 0.5453522801399231


Training: 100%|██████████| 1070/1070 [04:06<00:00,  4.33it/s]

TRAIN ACC : 0.9095502336448598, TRAIN LOSS : 0.2910470582509988



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.1612420529127121


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.08it/s]

Validation steps: 100 Loss: 0.6603726744651794


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.34it/s]

Validation steps: 200 Loss: 0.4388505518436432


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.20it/s]

Validation steps: 300 Loss: 0.12595981359481812


Training: 100%|██████████| 357/357 [00:25<00:00, 14.18it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.92      0.86      1206
          경제       0.89      0.79      0.84      1555
          사회       0.83      0.77      0.80      1841
        생활문화       0.90      0.91      0.90      1483
          세계       0.92      0.94      0.93      1908
         스포츠       0.97      0.98      0.98      1734
          정치       0.91      0.93      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.92371476 0.79356913 0.77077675 0.90694538 0.94496855 0.97923875
 0.93420273]
VALID ACC : 0.8926756614683722, VALID LOSS : 0.37555638701767147
{'epoch': 3, 'train_loss': 0.2910470582509988, 'train_acc': 0.9095502336448598, 'valid_acc': 0.8926756614683722, 'val_loss': 0.37555638701767147, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1070 [00:00<04:19,  4.13it/s]

Training steps: 0 Loss: 0.2493470013141632


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.52it/s]

Training steps: 100 Loss: 0.2967534363269806


Training:  19%|█▉        | 201/1070 [00:46<03:30,  4.12it/s]

Training steps: 200 Loss: 0.1732420176267624


Training:  28%|██▊       | 301/1070 [01:09<02:49,  4.53it/s]

Training steps: 300 Loss: 0.2011859118938446


Training:  37%|███▋      | 401/1070 [01:32<02:42,  4.12it/s]

Training steps: 400 Loss: 0.09436299651861191


Training:  47%|████▋     | 501/1070 [01:55<02:05,  4.53it/s]

Training steps: 500 Loss: 0.1233527734875679


Training:  56%|█████▌    | 601/1070 [02:18<01:54,  4.09it/s]

Training steps: 600 Loss: 0.11999427527189255


Training:  66%|██████▌   | 701/1070 [02:41<01:21,  4.53it/s]

Training steps: 700 Loss: 0.30417877435684204


Training:  75%|███████▍  | 801/1070 [03:04<01:05,  4.11it/s]

Training steps: 800 Loss: 0.3322409391403198


Training:  84%|████████▍ | 901/1070 [03:27<00:37,  4.52it/s]

Training steps: 900 Loss: 0.2488584965467453


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.11it/s]

Training steps: 1000 Loss: 0.22605355083942413


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.33it/s]

TRAIN ACC : 0.9198306074766355, TRAIN LOSS : 0.25749029746783947



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.21109268069267273


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.09it/s]

Validation steps: 100 Loss: 0.6030737161636353


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.20it/s]

Validation steps: 200 Loss: 0.41303184628486633


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.07it/s]

Validation steps: 300 Loss: 0.12142647057771683


Training: 100%|██████████| 357/357 [00:25<00:00, 14.14it/s]

              precision    recall  f1-score   support

        IT과학       0.79      0.93      0.85      1206
          경제       0.88      0.79      0.83      1555
          사회       0.80      0.79      0.79      1841
        생활문화       0.93      0.88      0.90      1483
          세계       0.93      0.94      0.93      1908
         스포츠       0.97      0.99      0.98      1734
          정치       0.92      0.91      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.92620232 0.79228296 0.78544269 0.87592717 0.94392034 0.98558247
 0.9122703 ]
VALID ACC : 0.8886455230418784, VALID LOSS : 0.3728250814366992
{'epoch': 4, 'train_loss': 0.25749029746783947, 'train_acc': 0.9198306074766355, 'valid_acc': 0.8886455230418784, 'val_loss': 0.3728250814366992, 'learning_rate': 5e-06}
Start Training: Epoch 6



Training:   0%|          | 1/1070 [00:00<04:04,  4.37it/s]

Training steps: 0 Loss: 0.05046868324279785


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.52it/s]

Training steps: 100 Loss: 0.09919217228889465


Training:  19%|█▉        | 201/1070 [00:46<03:31,  4.10it/s]

Training steps: 200 Loss: 0.24583198130130768


Training:  28%|██▊       | 301/1070 [01:09<02:51,  4.50it/s]

Training steps: 300 Loss: 0.4392736256122589


Training:  37%|███▋      | 401/1070 [01:32<02:43,  4.08it/s]

Training steps: 400 Loss: 0.07726852595806122


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.51it/s]

Training steps: 500 Loss: 0.24126587808132172


Training:  56%|█████▌    | 601/1070 [02:18<01:54,  4.10it/s]

Training steps: 600 Loss: 0.3184376657009125


Training:  66%|██████▌   | 701/1070 [02:41<01:21,  4.52it/s]

Training steps: 700 Loss: 0.14672622084617615


Training:  75%|███████▍  | 801/1070 [03:05<01:06,  4.06it/s]

Training steps: 800 Loss: 0.2393413484096527


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.53it/s]

Training steps: 900 Loss: 0.1705465465784073


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.09it/s]

Training steps: 1000 Loss: 0.12326186895370483


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.32it/s]

TRAIN ACC : 0.9294976635514018, TRAIN LOSS : 0.22704953359805535



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.24542847275733948


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.15it/s]

Validation steps: 100 Loss: 0.5655587315559387


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.12it/s]

Validation steps: 200 Loss: 0.3514283001422882


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.22it/s]

Validation steps: 300 Loss: 0.13224026560783386


Training: 100%|██████████| 357/357 [00:25<00:00, 14.17it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85      1206
          경제       0.88      0.79      0.83      1555
          사회       0.78      0.81      0.79      1841
        생활문화       0.92      0.89      0.90      1483
          세계       0.92      0.95      0.93      1908
         스포츠       0.97      0.98      0.98      1734
          정치       0.93      0.89      0.91      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.8946932  0.79292605 0.8077132  0.88941335 0.94706499 0.98096886
 0.89152341]
VALID ACC : 0.8875065708778693, VALID LOSS : 0.378910873820927
{'epoch': 5, 'train_loss': 0.22704953359805535, 'train_acc': 0.9294976635514018, 'valid_acc': 0.8875065708778693, 'val_loss': 0.378910873820927, 'learning_rate': 5e-06}
Start Training: Epoch 7



Training:   0%|          | 1/1070 [00:00<03:53,  4.58it/s]

Training steps: 0 Loss: 0.12091080099344254


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.51it/s]

Training steps: 100 Loss: 0.2502775490283966


Training:  19%|█▉        | 201/1070 [00:46<03:31,  4.10it/s]

Training steps: 200 Loss: 0.0955376848578453


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.52it/s]

Training steps: 300 Loss: 0.2281850129365921


Training:  37%|███▋      | 401/1070 [01:32<02:43,  4.10it/s]

Training steps: 400 Loss: 0.09135963022708893


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.51it/s]

Training steps: 500 Loss: 0.11995910108089447


Training:  56%|█████▌    | 601/1070 [02:18<01:54,  4.09it/s]

Training steps: 600 Loss: 0.21486032009124756


Training:  66%|██████▌   | 701/1070 [02:41<01:22,  4.50it/s]

Training steps: 700 Loss: 0.19523918628692627


Training:  75%|███████▍  | 801/1070 [03:05<01:06,  4.04it/s]

Training steps: 800 Loss: 0.09168976545333862


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.51it/s]

Training steps: 900 Loss: 0.20240065455436707


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.11it/s]

Training steps: 1000 Loss: 0.12466086447238922


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.33it/s]

TRAIN ACC : 0.9366822429906542, TRAIN LOSS : 0.20313762853914333



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.3907235562801361


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.09it/s]

Validation steps: 100 Loss: 0.6435546875


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.12it/s]

Validation steps: 200 Loss: 0.4847709834575653


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.05it/s]

Validation steps: 300 Loss: 0.1389276534318924


Training: 100%|██████████| 357/357 [00:25<00:00, 14.10it/s]

              precision    recall  f1-score   support

        IT과학       0.79      0.92      0.85      1206
          경제       0.86      0.82      0.84      1555
          사회       0.81      0.77      0.79      1841
        생활문화       0.90      0.90      0.90      1483
          세계       0.95      0.92      0.93      1908
         스포츠       0.97      0.98      0.98      1734
          정치       0.92      0.91      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.91791045 0.81864952 0.7691472  0.90222522 0.92295597 0.98096886
 0.91345584]
VALID ACC : 0.8881198528123357, VALID LOSS : 0.4111378034368074
{'epoch': 6, 'train_loss': 0.20313762853914333, 'train_acc': 0.9366822429906542, 'valid_acc': 0.8881198528123357, 'val_loss': 0.4111378034368074, 'learning_rate': 5e-06}
Start Training: Epoch 8



Training:   0%|          | 1/1070 [00:00<04:02,  4.42it/s]

Training steps: 0 Loss: 0.04966077208518982


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.52it/s]

Training steps: 100 Loss: 0.3576187193393707


Training:  19%|█▉        | 201/1070 [00:46<03:33,  4.06it/s]

Training steps: 200 Loss: 0.10359911620616913


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.51it/s]

Training steps: 300 Loss: 0.3439175486564636


Training:  37%|███▋      | 401/1070 [01:32<02:43,  4.10it/s]

Training steps: 400 Loss: 0.08698894828557968


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.51it/s]

Training steps: 500 Loss: 0.31097212433815


Training:  56%|█████▌    | 601/1070 [02:19<01:55,  4.06it/s]

Training steps: 600 Loss: 0.12032029777765274


Training:  66%|██████▌   | 701/1070 [02:42<01:21,  4.51it/s]

Training steps: 700 Loss: 0.1362837553024292


Training:  75%|███████▍  | 801/1070 [03:05<01:05,  4.09it/s]

Training steps: 800 Loss: 0.08479658514261246


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.50it/s]

Training steps: 900 Loss: 0.2685154974460602


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.11it/s]

Training steps: 1000 Loss: 0.17566119134426117


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.32it/s]

TRAIN ACC : 0.9452978971962617, TRAIN LOSS : 0.17657488099882535



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.357563853263855


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.08it/s]

Validation steps: 100 Loss: 0.6256101727485657


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.13it/s]

Validation steps: 200 Loss: 0.43541496992111206


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.14it/s]

Validation steps: 300 Loss: 0.15528567135334015


Training: 100%|██████████| 357/357 [00:25<00:00, 14.11it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.88      0.85      1206
          경제       0.84      0.83      0.83      1555
          사회       0.81      0.77      0.79      1841
        생활문화       0.90      0.90      0.90      1483
          세계       0.93      0.94      0.93      1908
         스포츠       0.96      0.99      0.97      1734
          정치       0.92      0.90      0.91      1687

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.89      0.89     11414

[0.87645108 0.83022508 0.76806084 0.89615644 0.93501048 0.99019608
 0.90397155]
VALID ACC : 0.8863676187138602, VALID LOSS : 0.42576463396350545
{'epoch': 7, 'train_loss': 0.17657488099882535, 'train_acc': 0.9452978971962617, 'valid_acc': 0.8863676187138602, 'val_loss': 0.42576463396350545, 'learning_rate': 5e-06}
Start Training: Epoch 9



Training:   0%|          | 1/1070 [00:00<04:00,  4.44it/s]

Training steps: 0 Loss: 0.07329244166612625


Training:   9%|▉         | 101/1070 [00:23<03:33,  4.53it/s]

Training steps: 100 Loss: 0.16135449707508087


Training:  19%|█▉        | 201/1070 [00:46<03:31,  4.10it/s]

Training steps: 200 Loss: 0.1739240437746048


Training:  28%|██▊       | 301/1070 [01:09<02:51,  4.49it/s]

Training steps: 300 Loss: 0.028817584738135338


Training:  37%|███▋      | 401/1070 [01:32<02:44,  4.07it/s]

Training steps: 400 Loss: 0.03791144862771034


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.51it/s]

Training steps: 500 Loss: 0.1476648449897766


Training:  56%|█████▌    | 601/1070 [02:19<01:53,  4.12it/s]

Training steps: 600 Loss: 0.07145726680755615


Training:  66%|██████▌   | 701/1070 [02:42<01:21,  4.50it/s]

Training steps: 700 Loss: 0.026806343346834183


Training:  75%|███████▍  | 801/1070 [03:05<01:05,  4.09it/s]

Training steps: 800 Loss: 0.28152239322662354


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.51it/s]

Training steps: 900 Loss: 0.14709889888763428


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.06it/s]

Training steps: 1000 Loss: 0.20045045018196106


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.32it/s]

TRAIN ACC : 0.9551693925233645, TRAIN LOSS : 0.1524286817646124



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.38561636209487915


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.28it/s]

Validation steps: 100 Loss: 0.749171793460846


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.20it/s]

Validation steps: 200 Loss: 0.3451906740665436


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.18it/s]

Validation steps: 300 Loss: 0.2650204598903656


Training: 100%|██████████| 357/357 [00:25<00:00, 14.19it/s]


              precision    recall  f1-score   support

        IT과학       0.84      0.82      0.83      1206
          경제       0.85      0.82      0.83      1555
          사회       0.77      0.78      0.78      1841
        생활문화       0.88      0.90      0.89      1483
          세계       0.94      0.92      0.93      1908
         스포츠       0.96      0.99      0.98      1734
          정치       0.91      0.92      0.91      1687

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.81674959 0.81800643 0.7789245  0.89885367 0.92348008 0.99019608
 0.91582691]
VALID ACC : 0.8803224110741195, VALID LOSS : 0.45291299404477287
{'epoch': 8, 'train_loss': 0.1524286817646124, 'train_acc': 0.9551693925233645, 'valid_acc': 0.8803224110741195, 'val_loss': 0.45291299404477287, 'learning_rate': 5e-06}
EarlyStopping counter: 5 out of 5


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Start Training: Epoch 1


Training:   0%|          | 1/1070 [00:00<04:13,  4.21it/s]

Training steps: 0 Loss: 2.0323307514190674


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.51it/s]

Training steps: 100 Loss: 1.8949542045593262


Training:  19%|█▉        | 201/1070 [00:46<03:31,  4.10it/s]

Training steps: 200 Loss: 1.6697219610214233


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.52it/s]

Training steps: 300 Loss: 1.4464308023452759


Training:  37%|███▋      | 401/1070 [01:32<02:44,  4.08it/s]

Training steps: 400 Loss: 0.9977882504463196


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.50it/s]

Training steps: 500 Loss: 0.903636634349823


Training:  56%|█████▌    | 601/1070 [02:18<01:54,  4.10it/s]

Training steps: 600 Loss: 0.6200249195098877


Training:  66%|██████▌   | 701/1070 [02:42<01:22,  4.49it/s]

Training steps: 700 Loss: 0.8784829378128052


Training:  75%|███████▍  | 801/1070 [03:05<01:05,  4.09it/s]

Training steps: 800 Loss: 0.5687240362167358


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.51it/s]

Training steps: 900 Loss: 0.5224358439445496


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.08it/s]

Training steps: 1000 Loss: 0.48777395486831665


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.32it/s]

TRAIN ACC : 0.6402161214953271, TRAIN LOSS : 1.0067489100393847



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.3294868767261505


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.21it/s]

Validation steps: 100 Loss: 0.6934536695480347


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.16it/s]

Validation steps: 200 Loss: 0.4307495355606079


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.17it/s]

Validation steps: 300 Loss: 0.5096657872200012


Training: 100%|██████████| 357/357 [00:25<00:00, 14.18it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.86      0.83      1206
          경제       0.86      0.80      0.83      1556
          사회       0.70      0.83      0.76      1841
        생활문화       0.95      0.84      0.89      1483
          세계       0.92      0.92      0.92      1907
         스포츠       0.96      0.99      0.97      1733
          정치       0.94      0.83      0.88      1688

    accuracy                           0.87     11414
   macro avg       0.88      0.87      0.87     11414
weighted avg       0.88      0.87      0.87     11414

[0.85903814 0.79820051 0.8332428  0.83546864 0.91924489 0.98672822
 0.83234597]
VALID ACC : 0.8690205011389521, VALID LOSS : 0.422456832326093
{'epoch': 0, 'train_loss': 1.0067489100393847, 'train_acc': 0.6402161214953271, 'valid_acc': 0.8690205011389521, 'val_loss': 0.422456832326093, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1070 [00:00<04:16,  4.17it/s]

Training steps: 0 Loss: 0.44724041223526


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.51it/s]

Training steps: 100 Loss: 0.47023022174835205


Training:  19%|█▉        | 201/1070 [00:46<03:32,  4.08it/s]

Training steps: 200 Loss: 0.43269678950309753


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.52it/s]

Training steps: 300 Loss: 0.33121228218078613


Training:  37%|███▋      | 401/1070 [01:32<02:43,  4.10it/s]

Training steps: 400 Loss: 0.18627715110778809


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.50it/s]

Training steps: 500 Loss: 0.5932410955429077


Training:  56%|█████▌    | 601/1070 [02:19<01:54,  4.11it/s]

Training steps: 600 Loss: 0.15328608453273773


Training:  66%|██████▌   | 701/1070 [02:42<01:21,  4.50it/s]

Training steps: 700 Loss: 0.21198982000350952


Training:  75%|███████▍  | 801/1070 [03:05<01:06,  4.06it/s]

Training steps: 800 Loss: 0.106922946870327


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.51it/s]

Training steps: 900 Loss: 0.25530239939689636


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.09it/s]

Training steps: 1000 Loss: 0.4133252501487732


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.32it/s]

TRAIN ACC : 0.8865362149532711, TRAIN LOSS : 0.38590807804715965



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.29242566227912903


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.20it/s]

Validation steps: 100 Loss: 0.5871220827102661


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.19it/s]

Validation steps: 200 Loss: 0.3192673921585083


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.08it/s]

Validation steps: 300 Loss: 0.5442298054695129


Training: 100%|██████████| 357/357 [00:25<00:00, 14.10it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.90      0.85      1206
          경제       0.90      0.77      0.83      1556
          사회       0.75      0.82      0.78      1841
        생활문화       0.92      0.90      0.91      1483
          세계       0.92      0.92      0.92      1907
         스포츠       0.96      0.99      0.97      1733
          정치       0.93      0.88      0.90      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.89      0.88      0.88     11414

[0.90215589 0.77249357 0.82074959 0.89817937 0.91767174 0.98672822
 0.87618483]
VALID ACC : 0.8824250919922901, VALID LOSS : 0.36835945548922744
{'epoch': 1, 'train_loss': 0.38590807804715965, 'train_acc': 0.8865362149532711, 'valid_acc': 0.8824250919922901, 'val_loss': 0.36835945548922744, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1070 [00:00<04:22,  4.07it/s]

Training steps: 0 Loss: 0.32078754901885986


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.51it/s]

Training steps: 100 Loss: 0.33889704942703247


Training:  19%|█▉        | 201/1070 [00:46<03:32,  4.08it/s]

Training steps: 200 Loss: 0.5606036186218262


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.50it/s]

Training steps: 300 Loss: 0.1870296597480774


Training:  37%|███▋      | 401/1070 [01:32<02:42,  4.11it/s]

Training steps: 400 Loss: 0.2770157754421234


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.51it/s]

Training steps: 500 Loss: 0.19687581062316895


Training:  56%|█████▌    | 601/1070 [02:19<01:55,  4.07it/s]

Training steps: 600 Loss: 0.4100654125213623


Training:  66%|██████▌   | 701/1070 [02:42<01:21,  4.51it/s]

Training steps: 700 Loss: 0.42889392375946045


Training:  75%|███████▍  | 801/1070 [03:05<01:05,  4.08it/s]

Training steps: 800 Loss: 0.3634478747844696


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.51it/s]

Training steps: 900 Loss: 0.25794780254364014


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.09it/s]

Training steps: 1000 Loss: 0.46067047119140625


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.32it/s]

TRAIN ACC : 0.9012558411214954, TRAIN LOSS : 0.32225761923625645



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.3084445893764496


Training:  29%|██▊       | 102/357 [00:07<00:18, 13.98it/s]

Validation steps: 100 Loss: 0.5821377038955688


Training:  57%|█████▋    | 202/357 [00:14<00:11, 13.98it/s]

Validation steps: 200 Loss: 0.3169921338558197


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.04it/s]

Validation steps: 300 Loss: 0.5610206723213196


Training: 100%|██████████| 357/357 [00:25<00:00, 14.01it/s]


              precision    recall  f1-score   support

        IT과학       0.84      0.85      0.85      1206
          경제       0.84      0.83      0.84      1556
          사회       0.78      0.80      0.79      1841
        생활문화       0.93      0.90      0.91      1483
          세계       0.92      0.93      0.92      1907
         스포츠       0.95      0.99      0.97      1733
          정치       0.93      0.89      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.89      0.88      0.88     11414
weighted avg       0.89      0.89      0.89     11414

[0.84908789 0.83419023 0.801195   0.89750506 0.92815941 0.98845932
 0.88744076]
VALID ACC : 0.88566672507447, VALID LOSS : 0.367733107384096
{'epoch': 2, 'train_loss': 0.32225761923625645, 'train_acc': 0.9012558411214954, 'valid_acc': 0.88566672507447, 'val_loss': 0.367733107384096, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1070 [00:00<04:18,  4.13it/s]

Training steps: 0 Loss: 0.16602982580661774


Training:   9%|▉         | 101/1070 [00:23<03:35,  4.50it/s]

Training steps: 100 Loss: 0.34807634353637695


Training:  19%|█▉        | 201/1070 [00:46<03:32,  4.08it/s]

Training steps: 200 Loss: 0.13393761217594147


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.50it/s]

Training steps: 300 Loss: 0.05484873428940773


Training:  37%|███▋      | 401/1070 [01:32<02:44,  4.07it/s]

Training steps: 400 Loss: 0.3406929075717926


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.49it/s]

Training steps: 500 Loss: 0.38387331366539


Training:  56%|█████▌    | 601/1070 [02:19<01:54,  4.09it/s]

Training steps: 600 Loss: 0.3756865859031677


Training:  66%|██████▌   | 701/1070 [02:42<01:22,  4.50it/s]

Training steps: 700 Loss: 0.14898161590099335


Training:  75%|███████▍  | 801/1070 [03:05<01:05,  4.10it/s]

Training steps: 800 Loss: 0.3245209753513336


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.49it/s]

Training steps: 900 Loss: 0.3115713596343994


Training:  94%|█████████▎| 1001/1070 [03:51<00:17,  4.05it/s]

Training steps: 1000 Loss: 0.31482112407684326


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.32it/s]

TRAIN ACC : 0.9109228971962616, TRAIN LOSS : 0.28443932357444385



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.30396369099617004


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.07it/s]

Validation steps: 100 Loss: 0.6336658000946045


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.13it/s]

Validation steps: 200 Loss: 0.2051793485879898


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.09it/s]

Validation steps: 300 Loss: 0.5417990684509277


Training: 100%|██████████| 357/357 [00:25<00:00, 14.09it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85      1206
          경제       0.86      0.83      0.84      1556
          사회       0.77      0.81      0.79      1841
        생활문화       0.92      0.90      0.91      1483
          세계       0.94      0.90      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.93      0.89      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.88888889 0.83097686 0.8142314  0.90155091 0.9024646  0.98095788
 0.88744076]
VALID ACC : 0.8866304538286315, VALID LOSS : 0.36243788377285335
{'epoch': 3, 'train_loss': 0.28443932357444385, 'train_acc': 0.9109228971962616, 'valid_acc': 0.8866304538286315, 'val_loss': 0.36243788377285335, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1070 [00:00<04:23,  4.05it/s]

Training steps: 0 Loss: 0.23242415487766266


Training:   9%|▉         | 101/1070 [00:23<03:35,  4.49it/s]

Training steps: 100 Loss: 0.3953257203102112


Training:  19%|█▉        | 201/1070 [00:46<03:32,  4.10it/s]

Training steps: 200 Loss: 0.2631278932094574


Training:  28%|██▊       | 301/1070 [01:09<02:51,  4.49it/s]

Training steps: 300 Loss: 0.4050326347351074


Training:  37%|███▋      | 401/1070 [01:32<02:43,  4.09it/s]

Training steps: 400 Loss: 0.2599172592163086


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.50it/s]

Training steps: 500 Loss: 0.14091749489307404


Training:  56%|█████▌    | 601/1070 [02:19<01:55,  4.07it/s]

Training steps: 600 Loss: 0.16116048395633698


Training:  66%|██████▌   | 701/1070 [02:42<01:22,  4.47it/s]

Training steps: 700 Loss: 0.1281466782093048


Training:  75%|███████▍  | 801/1070 [03:05<01:06,  4.06it/s]

Training steps: 800 Loss: 0.452654093503952


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.52it/s]

Training steps: 900 Loss: 0.4968860149383545


Training:  94%|█████████▎| 1001/1070 [03:51<00:17,  4.05it/s]

Training steps: 1000 Loss: 0.19426381587982178


Training: 100%|██████████| 1070/1070 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.9190420560747663, TRAIN LOSS : 0.2565409727882002



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.3043569326400757


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.10it/s]

Validation steps: 100 Loss: 0.6095123291015625


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.16it/s]

Validation steps: 200 Loss: 0.33574819564819336


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.09it/s]

Validation steps: 300 Loss: 0.7490357756614685


Training: 100%|██████████| 357/357 [00:25<00:00, 14.06it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85      1206
          경제       0.88      0.79      0.84      1556
          사회       0.81      0.78      0.79      1841
        생활문화       0.89      0.93      0.91      1483
          세계       0.93      0.92      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.91      0.93      0.92      1688

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.88723051 0.79498715 0.77512222 0.93189481 0.9187205  0.98442008
 0.92535545]
VALID ACC : 0.888032241107412, VALID LOSS : 0.385745698001747
{'epoch': 4, 'train_loss': 0.2565409727882002, 'train_acc': 0.9190420560747663, 'valid_acc': 0.888032241107412, 'val_loss': 0.385745698001747, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 6


Training:   0%|          | 1/1070 [00:00<04:22,  4.07it/s]

Training steps: 0 Loss: 0.12282495945692062


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.52it/s]

Training steps: 100 Loss: 0.2254314422607422


Training:  19%|█▉        | 201/1070 [00:46<03:31,  4.10it/s]

Training steps: 200 Loss: 0.31676486134529114


Training:  28%|██▊       | 301/1070 [01:09<02:49,  4.52it/s]

Training steps: 300 Loss: 0.11377377063035965


Training:  37%|███▋      | 401/1070 [01:32<02:43,  4.09it/s]

Training steps: 400 Loss: 0.13143612444400787


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.51it/s]

Training steps: 500 Loss: 0.11025604605674744


Training:  56%|█████▌    | 601/1070 [02:18<01:54,  4.11it/s]

Training steps: 600 Loss: 0.40179285407066345


Training:  66%|██████▌   | 701/1070 [02:41<01:21,  4.51it/s]

Training steps: 700 Loss: 0.08818934857845306


Training:  75%|███████▍  | 801/1070 [03:04<01:05,  4.08it/s]

Training steps: 800 Loss: 0.22508925199508667


Training:  84%|████████▍ | 901/1070 [03:27<00:37,  4.53it/s]

Training steps: 900 Loss: 0.39169278740882874


Training:  94%|█████████▎| 1001/1070 [03:50<00:16,  4.12it/s]

Training steps: 1000 Loss: 0.3230728507041931


Training: 100%|██████████| 1070/1070 [04:06<00:00,  4.34it/s]

TRAIN ACC : 0.9288259345794393, TRAIN LOSS : 0.2264948653527231



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.32111528515815735


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.04it/s]

Validation steps: 100 Loss: 0.5459609031677246


Training:  57%|█████▋    | 202/357 [00:14<00:11, 14.02it/s]

Validation steps: 200 Loss: 0.23212748765945435


Training:  85%|████████▍ | 302/357 [00:21<00:03, 13.99it/s]

Validation steps: 300 Loss: 0.8456640243530273


Training: 100%|██████████| 357/357 [00:25<00:00, 14.01it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.90      0.85      1206
          경제       0.87      0.82      0.84      1556
          사회       0.79      0.79      0.79      1841
        생활문화       0.93      0.89      0.91      1483
          세계       0.93      0.92      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.92      0.91      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.9013267  0.81748072 0.79413362 0.89413351 0.92081804 0.97749567
 0.91054502]
VALID ACC : 0.8878570176975644, VALID LOSS : 0.3902691114036476
{'epoch': 5, 'train_loss': 0.2264948653527231, 'train_acc': 0.9288259345794393, 'valid_acc': 0.8878570176975644, 'val_loss': 0.3902691114036476, 'learning_rate': 5e-06}
Start Training: Epoch 7



Training:   0%|          | 1/1070 [00:00<03:59,  4.46it/s]

Training steps: 0 Loss: 0.09981212019920349


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.51it/s]

Training steps: 100 Loss: 0.06308074295520782


Training:  19%|█▉        | 201/1070 [00:46<03:34,  4.06it/s]

Training steps: 200 Loss: 0.2526775896549225


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.50it/s]

Training steps: 300 Loss: 0.37114036083221436


Training:  37%|███▋      | 401/1070 [01:32<02:42,  4.11it/s]

Training steps: 400 Loss: 0.2229321002960205


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.50it/s]

Training steps: 500 Loss: 0.17527471482753754


Training:  56%|█████▌    | 601/1070 [02:19<01:55,  4.07it/s]

Training steps: 600 Loss: 0.10229755192995071


Training:  66%|██████▌   | 701/1070 [02:42<01:21,  4.50it/s]

Training steps: 700 Loss: 0.4708544909954071


Training:  75%|███████▍  | 801/1070 [03:05<01:06,  4.07it/s]

Training steps: 800 Loss: 0.08095572888851166


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.51it/s]

Training steps: 900 Loss: 0.26318010687828064


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.08it/s]

Training steps: 1000 Loss: 0.5833375453948975


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.32it/s]

TRAIN ACC : 0.9364778037383178, TRAIN LOSS : 0.20239292539516898



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.3734661340713501


Training:  29%|██▊       | 102/357 [00:07<00:18, 13.99it/s]

Validation steps: 100 Loss: 0.6296419501304626


Training:  57%|█████▋    | 202/357 [00:14<00:11, 13.99it/s]

Validation steps: 200 Loss: 0.28441789746284485


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.12it/s]

Validation steps: 300 Loss: 0.7309668064117432


Training: 100%|██████████| 357/357 [00:25<00:00, 14.01it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.90      0.85      1206
          경제       0.86      0.82      0.84      1556
          사회       0.76      0.81      0.78      1841
        생활문화       0.93      0.88      0.90      1483
          세계       0.93      0.91      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.93      0.88      0.90      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.89718076 0.82005141 0.80717002 0.88132165 0.91137913 0.97691864
 0.875     ]
VALID ACC : 0.8812861398282811, VALID LOSS : 0.4131907400562942
{'epoch': 6, 'train_loss': 0.20239292539516898, 'train_acc': 0.9364778037383178, 'valid_acc': 0.8812861398282811, 'val_loss': 0.4131907400562942, 'learning_rate': 5e-06}
Start Training: Epoch 8



Training:   0%|          | 1/1070 [00:00<04:04,  4.38it/s]

Training steps: 0 Loss: 0.09261006861925125


Training:   9%|▉         | 101/1070 [00:23<03:35,  4.49it/s]

Training steps: 100 Loss: 0.07055655121803284


Training:  19%|█▉        | 201/1070 [00:46<03:32,  4.09it/s]

Training steps: 200 Loss: 0.19159649312496185


Training:  28%|██▊       | 301/1070 [01:09<02:51,  4.48it/s]

Training steps: 300 Loss: 0.2869662642478943


Training:  37%|███▋      | 401/1070 [01:32<02:45,  4.05it/s]

Training steps: 400 Loss: 0.13153745234012604


Training:  47%|████▋     | 501/1070 [01:56<02:05,  4.52it/s]

Training steps: 500 Loss: 0.25175440311431885


Training:  56%|█████▌    | 601/1070 [02:19<01:54,  4.08it/s]

Training steps: 600 Loss: 0.1000201627612114


Training:  66%|██████▌   | 701/1070 [02:42<01:21,  4.52it/s]

Training steps: 700 Loss: 0.2762679159641266


Training:  75%|███████▍  | 801/1070 [03:05<01:04,  4.15it/s]

Training steps: 800 Loss: 0.1928168088197708


Training:  84%|████████▍ | 901/1070 [03:28<00:37,  4.51it/s]

Training steps: 900 Loss: 0.16518601775169373


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.08it/s]

Training steps: 1000 Loss: 0.2933264374732971


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.32it/s]

TRAIN ACC : 0.9466997663551402, TRAIN LOSS : 0.16964279359781018



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.25861695408821106


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.17it/s]

Validation steps: 100 Loss: 0.7070347666740417


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.15it/s]

Validation steps: 200 Loss: 0.3224433660507202


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.16it/s]

Validation steps: 300 Loss: 0.8944447040557861


Training: 100%|██████████| 357/357 [00:25<00:00, 14.20it/s]

              precision    recall  f1-score   support

        IT과학       0.84      0.82      0.83      1206
          경제       0.83      0.84      0.83      1556
          사회       0.78      0.79      0.78      1841
        생활문화       0.93      0.89      0.91      1483
          세계       0.92      0.92      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.90      0.91      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.81757877 0.8374036  0.78924498 0.88739042 0.91924489 0.98268898
 0.91469194]
VALID ACC : 0.8811985281233573, VALID LOSS : 0.4563534707039678
{'epoch': 7, 'train_loss': 0.16964279359781018, 'train_acc': 0.9466997663551402, 'valid_acc': 0.8811985281233573, 'val_loss': 0.4563534707039678, 'learning_rate': 5e-06}
Start Training: Epoch 9



Training:   0%|          | 1/1070 [00:00<04:05,  4.36it/s]

Training steps: 0 Loss: 0.09902552515268326


Training:   9%|▉         | 101/1070 [00:23<03:34,  4.51it/s]

Training steps: 100 Loss: 0.1793956607580185


Training:  19%|█▉        | 201/1070 [00:46<03:32,  4.09it/s]

Training steps: 200 Loss: 0.08708552271127701


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.52it/s]

Training steps: 300 Loss: 0.18846574425697327


Training:  37%|███▋      | 401/1070 [01:32<02:44,  4.08it/s]

Training steps: 400 Loss: 0.0647859200835228


Training:  47%|████▋     | 501/1070 [01:55<02:05,  4.53it/s]

Training steps: 500 Loss: 0.09732288122177124


Training:  56%|█████▌    | 601/1070 [02:18<01:54,  4.10it/s]

Training steps: 600 Loss: 0.023553265258669853


Training:  66%|██████▌   | 701/1070 [02:41<01:21,  4.50it/s]

Training steps: 700 Loss: 0.2073773592710495


Training:  75%|███████▍  | 801/1070 [03:04<01:05,  4.11it/s]

Training steps: 800 Loss: 0.07129652053117752


Training:  84%|████████▍ | 901/1070 [03:27<00:37,  4.52it/s]

Training steps: 900 Loss: 0.08948245644569397


Training:  94%|█████████▎| 1001/1070 [03:51<00:17,  4.05it/s]

Training steps: 1000 Loss: 0.13357630372047424


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.33it/s]

TRAIN ACC : 0.9539135514018692, TRAIN LOSS : 0.1465983222995963



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.33749404549598694


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.42it/s]

Validation steps: 100 Loss: 0.6259688138961792


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.28it/s]

Validation steps: 200 Loss: 0.26422828435897827


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.15it/s]

Validation steps: 300 Loss: 0.955329418182373


Training: 100%|██████████| 357/357 [00:25<00:00, 14.26it/s]

              precision    recall  f1-score   support

        IT과학       0.79      0.91      0.84      1206
          경제       0.85      0.83      0.84      1556
          사회       0.80      0.76      0.78      1841
        생활문화       0.91      0.90      0.90      1483
          세계       0.93      0.92      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.92      0.89      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.9079602  0.82969152 0.76045627 0.89683075 0.9155742  0.97749567
 0.89395735]
VALID ACC : 0.8818118100578237, VALID LOSS : 0.47713003885679217
{'epoch': 8, 'train_loss': 0.1465983222995963, 'train_acc': 0.9539135514018692, 'valid_acc': 0.8818118100578237, 'val_loss': 0.47713003885679217, 'learning_rate': 5e-06}
Start Training: Epoch 10



Training:   0%|          | 1/1070 [00:00<03:59,  4.46it/s]

Training steps: 0 Loss: 0.12221759557723999


Training:   9%|▉         | 101/1070 [00:23<03:35,  4.50it/s]

Training steps: 100 Loss: 0.18399322032928467


Training:  19%|█▉        | 201/1070 [00:46<03:31,  4.11it/s]

Training steps: 200 Loss: 0.3512265682220459


Training:  28%|██▊       | 301/1070 [01:09<02:50,  4.50it/s]

Training steps: 300 Loss: 0.07971198856830597


Training:  37%|███▋      | 401/1070 [01:32<02:45,  4.05it/s]

Training steps: 400 Loss: 0.04331526532769203


Training:  47%|████▋     | 501/1070 [01:55<02:06,  4.49it/s]

Training steps: 500 Loss: 0.029440537095069885


Training:  56%|█████▌    | 601/1070 [02:18<01:54,  4.10it/s]

Training steps: 600 Loss: 0.060346975922584534


Training:  66%|██████▌   | 701/1070 [02:41<01:21,  4.52it/s]

Training steps: 700 Loss: 0.25353917479515076


Training:  75%|███████▍  | 801/1070 [03:04<01:05,  4.08it/s]

Training steps: 800 Loss: 0.3925269842147827


Training:  84%|████████▍ | 901/1070 [03:27<00:37,  4.54it/s]

Training steps: 900 Loss: 0.21546192467212677


Training:  94%|█████████▎| 1001/1070 [03:51<00:16,  4.10it/s]

Training steps: 1000 Loss: 0.5843466520309448


Training: 100%|██████████| 1070/1070 [04:07<00:00,  4.33it/s]

TRAIN ACC : 0.9595794392523365, TRAIN LOSS : 0.1276731469294178



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.3783842921257019


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.01it/s]

Validation steps: 100 Loss: 0.6664840579032898


Training:  57%|█████▋    | 202/357 [00:14<00:11, 14.07it/s]

Validation steps: 200 Loss: 0.23655909299850464


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.08it/s]

Validation steps: 300 Loss: 1.091096043586731


Training: 100%|██████████| 357/357 [00:25<00:00, 14.03it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.87      0.84      1206
          경제       0.84      0.81      0.83      1556
          사회       0.78      0.77      0.78      1841
        생활문화       0.90      0.90      0.90      1483
          세계       0.93      0.92      0.92      1907
         스포츠       0.96      0.97      0.97      1733
          정치       0.92      0.90      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.86981758 0.81426735 0.77186312 0.90424815 0.91609858 0.9746105
 0.8957346 ]
VALID ACC : 0.8783949535657964, VALID LOSS : 0.503993709845578
{'epoch': 9, 'train_loss': 0.1276731469294178, 'train_acc': 0.9595794392523365, 'valid_acc': 0.8783949535657964, 'val_loss': 0.503993709845578, 'learning_rate': 5e-06}
EarlyStopping counter: 5 out of 5


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Start Training: Epoch 1


Training:   0%|          | 1/1071 [00:00<04:08,  4.30it/s]

Training steps: 0 Loss: 1.9609253406524658


Training:   9%|▉         | 101/1071 [00:23<03:34,  4.53it/s]

Training steps: 100 Loss: 1.8344353437423706


Training:  19%|█▉        | 201/1071 [00:46<03:33,  4.07it/s]

Training steps: 200 Loss: 1.7923800945281982


Training:  28%|██▊       | 301/1071 [01:09<02:50,  4.53it/s]

Training steps: 300 Loss: 1.6047694683074951


Training:  37%|███▋      | 401/1071 [01:32<02:43,  4.11it/s]

Training steps: 400 Loss: 1.0730807781219482


Training:  47%|████▋     | 501/1071 [01:55<02:06,  4.52it/s]

Training steps: 500 Loss: 0.8804627060890198


Training:  56%|█████▌    | 601/1071 [02:18<01:54,  4.10it/s]

Training steps: 600 Loss: 0.6887491345405579


Training:  65%|██████▌   | 701/1071 [02:41<01:21,  4.55it/s]

Training steps: 700 Loss: 0.4894460439682007


Training:  75%|███████▍  | 801/1071 [03:04<01:06,  4.08it/s]

Training steps: 800 Loss: 0.6470314860343933


Training:  84%|████████▍ | 901/1071 [03:27<00:37,  4.52it/s]

Training steps: 900 Loss: 0.35304713249206543


Training:  93%|█████████▎| 1001/1071 [03:51<00:17,  4.10it/s]

Training steps: 1000 Loss: 0.6928544044494629


Training: 100%|██████████| 1071/1071 [04:07<00:00,  4.33it/s]

TRAIN ACC : 0.6238135568470548, TRAIN LOSS : 1.0466544766302537



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.379283607006073


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.31it/s]

Validation steps: 100 Loss: 0.5311490893363953


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.38it/s]

Validation steps: 200 Loss: 0.2606584429740906


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.32it/s]

Validation steps: 300 Loss: 0.7761558890342712


Training: 100%|██████████| 357/357 [00:25<00:00, 14.26it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.90      0.84      1206
          경제       0.89      0.83      0.86      1556
          사회       0.77      0.81      0.79      1840
        생활문화       0.95      0.82      0.88      1483
          세계       0.92      0.94      0.93      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.92      0.90      0.91      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.88      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.90381426 0.8251928  0.81358696 0.8246797  0.9449397  0.97576457
 0.90165877]
VALID ACC : 0.8857443266450539, VALID LOSS : 0.40360934243482705
{'epoch': 0, 'train_loss': 1.0466544766302537, 'train_acc': 0.6238135568470548, 'valid_acc': 0.8857443266450539, 'val_loss': 0.40360934243482705, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1071 [00:00<04:25,  4.02it/s]

Training steps: 0 Loss: 0.35393765568733215


Training:   9%|▉         | 101/1071 [00:23<03:34,  4.53it/s]

Training steps: 100 Loss: 0.47478699684143066


Training:  19%|█▉        | 201/1071 [00:46<03:30,  4.13it/s]

Training steps: 200 Loss: 0.5238931775093079


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.49it/s]

Training steps: 300 Loss: 0.25497522950172424


Training:  37%|███▋      | 401/1071 [01:32<02:43,  4.11it/s]

Training steps: 400 Loss: 0.17082059383392334


Training:  47%|████▋     | 501/1071 [01:55<02:06,  4.50it/s]

Training steps: 500 Loss: 0.2948898375034332


Training:  56%|█████▌    | 601/1071 [02:18<01:55,  4.08it/s]

Training steps: 600 Loss: 0.4117654263973236


Training:  65%|██████▌   | 701/1071 [02:41<01:21,  4.51it/s]

Training steps: 700 Loss: 0.4478543996810913


Training:  75%|███████▍  | 801/1071 [03:04<01:05,  4.12it/s]

Training steps: 800 Loss: 0.6248756051063538


Training:  84%|████████▍ | 901/1071 [03:27<00:37,  4.52it/s]

Training steps: 900 Loss: 0.34786003828048706


Training:  93%|█████████▎| 1001/1071 [03:50<00:17,  4.09it/s]

Training steps: 1000 Loss: 0.1761091947555542


Training: 100%|██████████| 1071/1071 [04:06<00:00,  4.34it/s]

TRAIN ACC : 0.8846996290996174, TRAIN LOSS : 0.39069746362339325



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.23550155758857727


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.26it/s]

Validation steps: 100 Loss: 0.4204934537410736


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.24it/s]

Validation steps: 200 Loss: 0.18424566090106964


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.22it/s]

Validation steps: 300 Loss: 0.7402742505073547


Training: 100%|██████████| 357/357 [00:25<00:00, 14.22it/s]


              precision    recall  f1-score   support

        IT과학       0.86      0.78      0.82      1206
          경제       0.83      0.88      0.85      1556
          사회       0.78      0.81      0.80      1840
        생활문화       0.93      0.87      0.90      1483
          세계       0.92      0.95      0.93      1907
         스포츠       0.97      0.97      0.97      1733
          정치       0.93      0.90      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.88      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.78192371 0.88046272 0.8125     0.86918409 0.95070792 0.97057126
 0.90402844]
VALID ACC : 0.8865329010777184, VALID LOSS : 0.3610029924942666
{'epoch': 1, 'train_loss': 0.39069746362339325, 'train_acc': 0.8846996290996174, 'valid_acc': 0.8865329010777184, 'val_loss': 0.3610029924942666, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1071 [00:00<04:19,  4.12it/s]

Training steps: 0 Loss: 0.29751408100128174


Training:   9%|▉         | 101/1071 [00:23<03:36,  4.49it/s]

Training steps: 100 Loss: 0.20880602300167084


Training:  19%|█▉        | 201/1071 [00:46<03:30,  4.12it/s]

Training steps: 200 Loss: 0.16410337388515472


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.50it/s]

Training steps: 300 Loss: 0.0928683951497078


Training:  37%|███▋      | 401/1071 [01:32<02:44,  4.07it/s]

Training steps: 400 Loss: 0.27344921231269836


Training:  47%|████▋     | 501/1071 [01:55<02:06,  4.51it/s]

Training steps: 500 Loss: 0.11629227548837662


Training:  56%|█████▌    | 601/1071 [02:19<01:54,  4.09it/s]

Training steps: 600 Loss: 0.5592326521873474


Training:  65%|██████▌   | 701/1071 [02:42<01:21,  4.52it/s]

Training steps: 700 Loss: 0.15035684406757355


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.08it/s]

Training steps: 800 Loss: 0.36402931809425354


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.50it/s]

Training steps: 900 Loss: 0.3334782123565674


Training:  93%|█████████▎| 1001/1071 [03:51<00:17,  4.09it/s]

Training steps: 1000 Loss: 0.48841920495033264


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.32it/s]

TRAIN ACC : 0.8975497210945942, TRAIN LOSS : 0.33282106768872055



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.3381466865539551


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.14it/s]

Validation steps: 100 Loss: 0.2828441858291626


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.31it/s]

Validation steps: 200 Loss: 0.14779292047023773


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.48it/s]

Validation steps: 300 Loss: 0.7351604104042053


Training: 100%|██████████| 357/357 [00:25<00:00, 14.25it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.92      0.86      1206
          경제       0.93      0.79      0.85      1556
          사회       0.78      0.82      0.80      1840
        생활문화       0.92      0.88      0.90      1483
          세계       0.92      0.94      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.93      0.91      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.90      0.89      0.89     11413

[0.91625207 0.79048843 0.825      0.87997303 0.94389093 0.97922677
 0.90580569]
VALID ACC : 0.8923157802505914, VALID LOSS : 0.348846246924053
{'epoch': 2, 'train_loss': 0.33282106768872055, 'train_acc': 0.8975497210945942, 'valid_acc': 0.8923157802505914, 'val_loss': 0.348846246924053, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1071 [00:00<04:29,  3.98it/s]

Training steps: 0 Loss: 0.2903009355068207


Training:   9%|▉         | 101/1071 [00:23<03:35,  4.50it/s]

Training steps: 100 Loss: 0.24786104261875153


Training:  19%|█▉        | 201/1071 [00:46<03:33,  4.07it/s]

Training steps: 200 Loss: 0.1290065050125122


Training:  28%|██▊       | 301/1071 [01:09<02:50,  4.52it/s]

Training steps: 300 Loss: 0.5481109023094177


Training:  37%|███▋      | 401/1071 [01:32<02:44,  4.08it/s]

Training steps: 400 Loss: 0.33268147706985474


Training:  47%|████▋     | 501/1071 [01:56<02:06,  4.51it/s]

Training steps: 500 Loss: 0.404799222946167


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.08it/s]

Training steps: 600 Loss: 0.18372806906700134


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.50it/s]

Training steps: 700 Loss: 0.4633829891681671


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.08it/s]

Training steps: 800 Loss: 0.7308379411697388


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.52it/s]

Training steps: 900 Loss: 0.6620991230010986


Training:  93%|█████████▎| 1001/1071 [03:51<00:17,  4.08it/s]

Training steps: 1000 Loss: 0.29747670888900757


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.32it/s]

TRAIN ACC : 0.9074793376361672, TRAIN LOSS : 0.29311387802984645



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.3032280206680298


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.29it/s]

Validation steps: 100 Loss: 0.2904362380504608


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.18it/s]

Validation steps: 200 Loss: 0.13319022953510284


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.16it/s]

Validation steps: 300 Loss: 0.7360470294952393


Training: 100%|██████████| 357/357 [00:25<00:00, 14.18it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.88      0.86      1206
          경제       0.91      0.81      0.86      1556
          사회       0.79      0.82      0.80      1840
        생활문화       0.91      0.90      0.90      1483
          세계       0.93      0.94      0.93      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.92      0.92      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.90      0.89      0.89     11413

[0.88059701 0.81426735 0.81847826 0.89548213 0.94336654 0.97518754
 0.92120853]
VALID ACC : 0.8943310260229563, VALID LOSS : 0.3483430749739335
{'epoch': 3, 'train_loss': 0.29311387802984645, 'train_acc': 0.9074793376361672, 'valid_acc': 0.8943310260229563, 'val_loss': 0.3483430749739335, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1071 [00:00<04:30,  3.96it/s]

Training steps: 0 Loss: 0.22365505993366241


Training:   9%|▉         | 101/1071 [00:23<03:34,  4.51it/s]

Training steps: 100 Loss: 0.13603657484054565


Training:  19%|█▉        | 201/1071 [00:46<03:32,  4.09it/s]

Training steps: 200 Loss: 0.2985054850578308


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.48it/s]

Training steps: 300 Loss: 0.16068798303604126


Training:  37%|███▋      | 401/1071 [01:32<02:44,  4.07it/s]

Training steps: 400 Loss: 0.20942994952201843


Training:  47%|████▋     | 501/1071 [01:55<02:06,  4.50it/s]

Training steps: 500 Loss: 0.24615821242332458


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.07it/s]

Training steps: 600 Loss: 0.16631516814231873


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.50it/s]

Training steps: 700 Loss: 0.21729230880737305


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.08it/s]

Training steps: 800 Loss: 0.2589763104915619


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.51it/s]

Training steps: 900 Loss: 0.2475394606590271


Training:  93%|█████████▎| 1001/1071 [03:51<00:17,  4.09it/s]

Training steps: 1000 Loss: 0.3974955379962921


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.32it/s]

TRAIN ACC : 0.9171753161414679, TRAIN LOSS : 0.25753546366511554



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.4360278248786926


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.20it/s]

Validation steps: 100 Loss: 0.19122999906539917


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.19it/s]

Validation steps: 200 Loss: 0.1287708729505539


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.18it/s]

Validation steps: 300 Loss: 0.8741803169250488


Training: 100%|██████████| 357/357 [00:25<00:00, 14.20it/s]

              precision    recall  f1-score   support

        IT과학       0.78      0.93      0.85      1206
          경제       0.90      0.84      0.87      1556
          사회       0.80      0.80      0.80      1840
        생활문화       0.91      0.89      0.90      1483
          세계       0.95      0.92      0.94      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.93      0.90      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.92537313 0.83547558 0.79728261 0.8934592  0.92186681 0.97864974
 0.90106635]
VALID ACC : 0.8922281608691842, VALID LOSS : 0.3653007650471368
{'epoch': 4, 'train_loss': 0.25753546366511554, 'train_acc': 0.9171753161414679, 'valid_acc': 0.8922281608691842, 'val_loss': 0.3653007650471368, 'learning_rate': 5e-06}
Start Training: Epoch 6



Training:   0%|          | 1/1071 [00:00<03:58,  4.48it/s]

Training steps: 0 Loss: 0.4136578142642975


Training:   9%|▉         | 101/1071 [00:23<03:35,  4.50it/s]

Training steps: 100 Loss: 0.3063028156757355


Training:  19%|█▉        | 201/1071 [00:46<03:34,  4.06it/s]

Training steps: 200 Loss: 0.11353777348995209


Training:  28%|██▊       | 301/1071 [01:09<02:50,  4.51it/s]

Training steps: 300 Loss: 0.2983796000480652


Training:  37%|███▋      | 401/1071 [01:32<02:45,  4.06it/s]

Training steps: 400 Loss: 0.21031047403812408


Training:  47%|████▋     | 501/1071 [01:56<02:06,  4.50it/s]

Training steps: 500 Loss: 0.053772278130054474


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.07it/s]

Training steps: 600 Loss: 0.182178795337677


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.49it/s]

Training steps: 700 Loss: 0.19315581023693085


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.09it/s]

Training steps: 800 Loss: 0.2253851294517517


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.50it/s]

Training steps: 900 Loss: 0.40049096941947937


Training:  93%|█████████▎| 1001/1071 [03:51<00:17,  4.11it/s]

Training steps: 1000 Loss: 0.501948356628418


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.32it/s]

TRAIN ACC : 0.925732309219941, TRAIN LOSS : 0.2322828465060485



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.26906079053878784


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.05it/s]

Validation steps: 100 Loss: 0.24106484651565552


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.26it/s]

Validation steps: 200 Loss: 0.093195341527462


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.31it/s]

Validation steps: 300 Loss: 0.906152606010437


Training: 100%|██████████| 357/357 [00:25<00:00, 14.20it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85      1206
          경제       0.88      0.84      0.86      1556
          사회       0.81      0.79      0.80      1840
        생활문화       0.90      0.90      0.90      1483
          세계       0.94      0.93      0.94      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.93      0.91      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.8946932  0.84061697 0.79402174 0.9008766  0.9349764  0.9763416
 0.9063981 ]
VALID ACC : 0.8927538771576272, VALID LOSS : 0.3700796166325317
{'epoch': 5, 'train_loss': 0.2322828465060485, 'train_acc': 0.925732309219941, 'valid_acc': 0.8927538771576272, 'val_loss': 0.3700796166325317, 'learning_rate': 5e-06}
Start Training: Epoch 7



Training:   0%|          | 1/1071 [00:00<04:05,  4.36it/s]

Training steps: 0 Loss: 0.12021400034427643


Training:   9%|▉         | 101/1071 [00:23<03:35,  4.49it/s]

Training steps: 100 Loss: 0.1550673097372055


Training:  19%|█▉        | 201/1071 [00:46<03:32,  4.09it/s]

Training steps: 200 Loss: 0.16813386976718903


Training:  28%|██▊       | 301/1071 [01:09<02:50,  4.50it/s]

Training steps: 300 Loss: 0.1457785964012146


Training:  37%|███▋      | 401/1071 [01:33<02:44,  4.07it/s]

Training steps: 400 Loss: 0.25135502219200134


Training:  47%|████▋     | 501/1071 [01:56<02:07,  4.48it/s]

Training steps: 500 Loss: 0.14671318233013153


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.07it/s]

Training steps: 600 Loss: 0.0835333839058876


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.48it/s]

Training steps: 700 Loss: 0.23407484591007233


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.08it/s]

Training steps: 800 Loss: 0.16471682488918304


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.50it/s]

Training steps: 900 Loss: 0.14738474786281586


Training:  93%|█████████▎| 1001/1071 [03:52<00:17,  4.08it/s]

Training steps: 1000 Loss: 0.20422348380088806


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.9348733973890949, TRAIN LOSS : 0.2038200865269236



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.23674951493740082


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.25it/s]

Validation steps: 100 Loss: 0.26350137591362


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.27it/s]

Validation steps: 200 Loss: 0.07443471997976303


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.29it/s]

Validation steps: 300 Loss: 0.899925708770752


Training: 100%|██████████| 357/357 [00:25<00:00, 14.22it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85      1206
          경제       0.90      0.81      0.85      1556
          사회       0.79      0.80      0.80      1840
        생활문화       0.90      0.90      0.90      1483
          세계       0.92      0.94      0.93      1907
         스포츠       0.97      0.97      0.97      1733
          정치       0.92      0.91      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.89054726 0.81491003 0.79728261 0.89548213 0.93812271 0.9694172
 0.91469194]
VALID ACC : 0.8893367212827478, VALID LOSS : 0.392443310808526
{'epoch': 6, 'train_loss': 0.2038200865269236, 'train_acc': 0.9348733973890949, 'valid_acc': 0.8893367212827478, 'val_loss': 0.392443310808526, 'learning_rate': 5e-06}
Start Training: Epoch 8



Training:   0%|          | 1/1071 [00:00<04:02,  4.41it/s]

Training steps: 0 Loss: 0.24877725541591644


Training:   9%|▉         | 101/1071 [00:23<03:35,  4.50it/s]

Training steps: 100 Loss: 0.15904641151428223


Training:  19%|█▉        | 201/1071 [00:46<03:32,  4.10it/s]

Training steps: 200 Loss: 0.11049042642116547


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.50it/s]

Training steps: 300 Loss: 0.3902752995491028


Training:  37%|███▋      | 401/1071 [01:32<02:44,  4.07it/s]

Training steps: 400 Loss: 0.3678058981895447


Training:  47%|████▋     | 501/1071 [01:56<02:06,  4.50it/s]

Training steps: 500 Loss: 0.14772722125053406


Training:  56%|█████▌    | 601/1071 [02:19<01:54,  4.10it/s]

Training steps: 600 Loss: 0.07276905328035355


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.49it/s]

Training steps: 700 Loss: 0.1979200392961502


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.08it/s]

Training steps: 800 Loss: 0.4573931396007538


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.49it/s]

Training steps: 900 Loss: 0.08638692647218704


Training:  93%|█████████▎| 1001/1071 [03:52<00:17,  4.06it/s]

Training steps: 1000 Loss: 0.09886468946933746


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.9435764142402383, TRAIN LOSS : 0.1780086933860397



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.3129030168056488


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.24it/s]

Validation steps: 100 Loss: 0.3169747591018677


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.20it/s]

Validation steps: 200 Loss: 0.04284375533461571


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.10it/s]

Validation steps: 300 Loss: 0.9892446398735046


Training: 100%|██████████| 357/357 [00:25<00:00, 14.20it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.88      0.84      1206
          경제       0.89      0.83      0.86      1556
          사회       0.79      0.79      0.79      1840
        생활문화       0.90      0.88      0.89      1483
          세계       0.93      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.92      0.92      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.87976783 0.82904884 0.79076087 0.87592717 0.93130572 0.97980381
 0.92180095]
VALID ACC : 0.8880224305616402, VALID LOSS : 0.42753393467025264
{'epoch': 7, 'train_loss': 0.1780086933860397, 'train_acc': 0.9435764142402383, 'valid_acc': 0.8880224305616402, 'val_loss': 0.42753393467025264, 'learning_rate': 5e-06}
Start Training: Epoch 9



Training:   0%|          | 1/1071 [00:00<04:04,  4.37it/s]

Training steps: 0 Loss: 0.25259509682655334


Training:   9%|▉         | 101/1071 [00:23<03:35,  4.50it/s]

Training steps: 100 Loss: 0.10706213861703873


Training:  19%|█▉        | 201/1071 [00:46<03:33,  4.07it/s]

Training steps: 200 Loss: 0.07116107642650604


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.50it/s]

Training steps: 300 Loss: 0.19254431128501892


Training:  37%|███▋      | 401/1071 [01:33<02:43,  4.09it/s]

Training steps: 400 Loss: 0.06188201159238815


Training:  47%|████▋     | 501/1071 [01:56<02:07,  4.48it/s]

Training steps: 500 Loss: 0.06742389500141144


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.07it/s]

Training steps: 600 Loss: 0.0383361354470253


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.51it/s]

Training steps: 700 Loss: 0.10328739881515503


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.06it/s]

Training steps: 800 Loss: 0.05294651538133621


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.51it/s]

Training steps: 900 Loss: 0.19435745477676392


Training:  93%|█████████▎| 1001/1071 [03:52<00:17,  4.05it/s]

Training steps: 1000 Loss: 0.1651044338941574


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.9522210215823136, TRAIN LOSS : 0.15122795601490185



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.29626137018203735


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.32it/s]

Validation steps: 100 Loss: 0.2836596667766571


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.14it/s]

Validation steps: 200 Loss: 0.053250450640916824


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.20it/s]

Validation steps: 300 Loss: 1.1256844997406006


Training: 100%|██████████| 357/357 [00:25<00:00, 14.21it/s]


              precision    recall  f1-score   support

        IT과학       0.85      0.82      0.83      1206
          경제       0.84      0.87      0.85      1556
          사회       0.78      0.79      0.79      1840
        생활문화       0.90      0.88      0.89      1483
          세계       0.93      0.93      0.93      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.92      0.90      0.91      1688

    accuracy                           0.89     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.82338308 0.87275064 0.78967391 0.87997303 0.93235448 0.97576457
 0.90402844]
VALID ACC : 0.8853062297380181, VALID LOSS : 0.4432061989022111
{'epoch': 8, 'train_loss': 0.15122795601490185, 'train_acc': 0.9522210215823136, 'valid_acc': 0.8853062297380181, 'val_loss': 0.4432061989022111, 'learning_rate': 5e-06}
EarlyStopping counter: 5 out of 5


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Start Training: Epoch 1


Training:   0%|          | 1/1071 [00:00<04:13,  4.23it/s]

Training steps: 0 Loss: 1.9147484302520752


Training:   9%|▉         | 101/1071 [00:23<03:36,  4.48it/s]

Training steps: 100 Loss: 1.8714510202407837


Training:  19%|█▉        | 201/1071 [00:46<03:33,  4.08it/s]

Training steps: 200 Loss: 1.8690991401672363


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.50it/s]

Training steps: 300 Loss: 1.5439491271972656


Training:  37%|███▋      | 401/1071 [01:33<02:44,  4.08it/s]

Training steps: 400 Loss: 1.1554570198059082


Training:  47%|████▋     | 501/1071 [01:56<02:06,  4.50it/s]

Training steps: 500 Loss: 0.8069637417793274


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.07it/s]

Training steps: 600 Loss: 0.5206065773963928


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.51it/s]

Training steps: 700 Loss: 0.38858872652053833


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.08it/s]

Training steps: 800 Loss: 0.36589890718460083


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.48it/s]

Training steps: 900 Loss: 0.740342378616333


Training:  93%|█████████▎| 1001/1071 [03:52<00:17,  4.09it/s]

Training steps: 1000 Loss: 0.47445058822631836


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.6530183113810928, TRAIN LOSS : 0.9893692746188015



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.06375382840633392


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.08it/s]

Validation steps: 100 Loss: 0.27950742840766907


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.15it/s]

Validation steps: 200 Loss: 0.49927493929862976


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.11it/s]

Validation steps: 300 Loss: 0.42652449011802673


Training: 100%|██████████| 357/357 [00:25<00:00, 14.04it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.85      0.83      1206
          경제       0.86      0.81      0.84      1555
          사회       0.75      0.80      0.77      1840
        생활문화       0.92      0.86      0.89      1484
          세계       0.93      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.92      0.91      0.91      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.85489221 0.80836013 0.79782609 0.85579515 0.92815941 0.97922677
 0.90699052]
VALID ACC : 0.8782966792254446, VALID LOSS : 0.41162291897528647
{'epoch': 0, 'train_loss': 0.9893692746188015, 'train_acc': 0.6530183113810928, 'valid_acc': 0.8782966792254446, 'val_loss': 0.41162291897528647, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1071 [00:00<04:20,  4.12it/s]

Training steps: 0 Loss: 0.4121205806732178


Training:   9%|▉         | 101/1071 [00:23<03:37,  4.46it/s]

Training steps: 100 Loss: 0.6158592104911804


Training:  19%|█▉        | 201/1071 [00:46<03:34,  4.05it/s]

Training steps: 200 Loss: 0.42226123809814453


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.50it/s]

Training steps: 300 Loss: 0.43484827876091003


Training:  37%|███▋      | 401/1071 [01:33<02:45,  4.06it/s]

Training steps: 400 Loss: 0.59819495677948


Training:  47%|████▋     | 501/1071 [01:56<02:07,  4.48it/s]

Training steps: 500 Loss: 0.37711301445961


Training:  56%|█████▌    | 601/1071 [02:19<01:54,  4.09it/s]

Training steps: 600 Loss: 0.41952812671661377


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.48it/s]

Training steps: 700 Loss: 0.2728714942932129


Training:  75%|███████▍  | 801/1071 [03:06<01:06,  4.06it/s]

Training steps: 800 Loss: 0.5510279536247253


Training:  84%|████████▍ | 901/1071 [03:29<00:37,  4.52it/s]

Training steps: 900 Loss: 0.20062732696533203


Training:  93%|█████████▎| 1001/1071 [03:52<00:17,  4.06it/s]

Training steps: 1000 Loss: 0.3096766173839569


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.8869483951987384, TRAIN LOSS : 0.38388580792730487



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.040528591722249985


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.04it/s]

Validation steps: 100 Loss: 0.22127380967140198


Training:  57%|█████▋    | 202/357 [00:14<00:11, 14.03it/s]

Validation steps: 200 Loss: 0.5084813237190247


Training:  85%|████████▍ | 302/357 [00:21<00:03, 13.93it/s]

Validation steps: 300 Loss: 0.5738239288330078


Training: 100%|██████████| 357/357 [00:25<00:00, 13.99it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.93      0.85      1206
          경제       0.90      0.79      0.84      1555
          사회       0.78      0.80      0.79      1840
        생활문화       0.90      0.89      0.90      1484
          세계       0.93      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.93      0.89      0.91      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.92951907 0.78906752 0.80163043 0.88746631 0.92711064 0.98038084
 0.88862559]
VALID ACC : 0.8855690878822395, VALID LOSS : 0.3794613568326171
{'epoch': 1, 'train_loss': 0.38388580792730487, 'train_acc': 0.8869483951987384, 'valid_acc': 0.8855690878822395, 'val_loss': 0.3794613568326171, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1071 [00:00<04:16,  4.17it/s]

Training steps: 0 Loss: 0.5206873416900635


Training:   9%|▉         | 101/1071 [00:23<03:36,  4.48it/s]

Training steps: 100 Loss: 0.258022278547287


Training:  19%|█▉        | 201/1071 [00:46<03:32,  4.09it/s]

Training steps: 200 Loss: 0.24705353379249573


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.49it/s]

Training steps: 300 Loss: 0.536747395992279


Training:  37%|███▋      | 401/1071 [01:33<02:44,  4.08it/s]

Training steps: 400 Loss: 0.4189530313014984


Training:  47%|████▋     | 501/1071 [01:56<02:07,  4.48it/s]

Training steps: 500 Loss: 0.4779001474380493


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.07it/s]

Training steps: 600 Loss: 0.1099681481719017


Training:  65%|██████▌   | 701/1071 [02:42<01:21,  4.52it/s]

Training steps: 700 Loss: 0.4645492434501648


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.06it/s]

Training steps: 800 Loss: 0.27959227561950684


Training:  84%|████████▍ | 901/1071 [03:29<00:37,  4.51it/s]

Training steps: 900 Loss: 0.39335012435913086


Training:  93%|█████████▎| 1001/1071 [03:52<00:17,  4.06it/s]

Training steps: 1000 Loss: 0.14099754393100739


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.898980754066762, TRAIN LOSS : 0.3260412567254264



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.033807240426540375


Training:  29%|██▊       | 102/357 [00:07<00:18, 13.93it/s]

Validation steps: 100 Loss: 0.19084018468856812


Training:  57%|█████▋    | 202/357 [00:14<00:11, 13.95it/s]

Validation steps: 200 Loss: 0.47039979696273804


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.15it/s]

Validation steps: 300 Loss: 0.38143786787986755


Training: 100%|██████████| 357/357 [00:25<00:00, 14.02it/s]

              precision    recall  f1-score   support

        IT과학       0.84      0.86      0.85      1206
          경제       0.91      0.78      0.84      1555
          사회       0.74      0.84      0.78      1840
        생활문화       0.94      0.86      0.90      1484
          세계       0.91      0.95      0.93      1907
         스포츠       0.97      0.97      0.97      1733
          정치       0.93      0.91      0.92      1688

    accuracy                           0.88     11413
   macro avg       0.89      0.88      0.88     11413
weighted avg       0.89      0.88      0.88     11413

[0.85986733 0.77684887 0.83804348 0.85781671 0.948086   0.9746105
 0.90876777]
VALID ACC : 0.8841671777797249, VALID LOSS : 0.36141281631835204
{'epoch': 2, 'train_loss': 0.3260412567254264, 'train_acc': 0.898980754066762, 'valid_acc': 0.8841671777797249, 'val_loss': 0.36141281631835204, 'learning_rate': 5e-06}
Start Training: Epoch 4



Training:   0%|          | 1/1071 [00:00<04:02,  4.41it/s]

Training steps: 0 Loss: 0.5615975856781006


Training:   9%|▉         | 101/1071 [00:23<03:35,  4.50it/s]

Training steps: 100 Loss: 0.22157113254070282


Training:  19%|█▉        | 201/1071 [00:46<03:34,  4.06it/s]

Training steps: 200 Loss: 0.31963205337524414


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.48it/s]

Training steps: 300 Loss: 0.18798492848873138


Training:  37%|███▋      | 401/1071 [01:33<02:45,  4.06it/s]

Training steps: 400 Loss: 0.2529560625553131


Training:  47%|████▋     | 501/1071 [01:56<02:06,  4.50it/s]

Training steps: 500 Loss: 0.2535407245159149


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.07it/s]

Training steps: 600 Loss: 0.32986488938331604


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.48it/s]

Training steps: 700 Loss: 0.07475700229406357


Training:  75%|███████▍  | 801/1071 [03:06<01:06,  4.08it/s]

Training steps: 800 Loss: 0.48671332001686096


Training:  84%|████████▍ | 901/1071 [03:29<00:37,  4.48it/s]

Training steps: 900 Loss: 0.26351243257522583


Training:  93%|█████████▎| 1001/1071 [03:52<00:17,  4.08it/s]

Training steps: 1000 Loss: 0.1872742623090744


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.9090563943810053, TRAIN LOSS : 0.2863422163616932



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.017986999824643135


Training:  29%|██▊       | 102/357 [00:07<00:18, 13.97it/s]

Validation steps: 100 Loss: 0.27952009439468384


Training:  57%|█████▋    | 202/357 [00:14<00:11, 13.87it/s]

Validation steps: 200 Loss: 0.4345056116580963


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.03it/s]

Validation steps: 300 Loss: 0.5975586771965027


Training: 100%|██████████| 357/357 [00:25<00:00, 13.94it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.87      0.85      1206
          경제       0.84      0.85      0.84      1555
          사회       0.81      0.76      0.78      1840
        생활문화       0.93      0.88      0.90      1484
          세계       0.93      0.93      0.93      1907
         스포츠       0.95      0.99      0.97      1733
          정치       0.91      0.93      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.88      0.89      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.86898839 0.84694534 0.75978261 0.87803235 0.93130572 0.99134449
 0.92535545]
VALID ACC : 0.8868833786033471, VALID LOSS : 0.3882844696650986
{'epoch': 3, 'train_loss': 0.2863422163616932, 'train_acc': 0.9090563943810053, 'valid_acc': 0.8868833786033471, 'val_loss': 0.3882844696650986, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1071 [00:00<04:32,  3.92it/s]

Training steps: 0 Loss: 0.16490206122398376


Training:   9%|▉         | 101/1071 [00:23<03:35,  4.50it/s]

Training steps: 100 Loss: 0.17595720291137695


Training:  19%|█▉        | 201/1071 [00:46<03:34,  4.06it/s]

Training steps: 200 Loss: 0.25505268573760986


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.50it/s]

Training steps: 300 Loss: 0.3695361018180847


Training:  37%|███▋      | 401/1071 [01:33<02:44,  4.08it/s]

Training steps: 400 Loss: 0.1281677633523941


Training:  47%|████▋     | 501/1071 [01:56<02:07,  4.49it/s]

Training steps: 500 Loss: 0.1468084752559662


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.06it/s]

Training steps: 600 Loss: 0.3317220211029053


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.51it/s]

Training steps: 700 Loss: 0.19157053530216217


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.07it/s]

Training steps: 800 Loss: 0.1647428721189499


Training:  84%|████████▍ | 901/1071 [03:29<00:37,  4.50it/s]

Training steps: 900 Loss: 0.2754758894443512


Training:  93%|█████████▎| 1001/1071 [03:52<00:17,  4.07it/s]

Training steps: 1000 Loss: 0.11635410040616989


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.9204754534038141, TRAIN LOSS : 0.25750490373659646



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.02342326194047928


Training:  29%|██▊       | 102/357 [00:07<00:18, 13.84it/s]

Validation steps: 100 Loss: 0.1648535132408142


Training:  57%|█████▋    | 202/357 [00:14<00:11, 13.87it/s]

Validation steps: 200 Loss: 0.48847055435180664


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.05it/s]

Validation steps: 300 Loss: 0.5185155868530273


Training: 100%|██████████| 357/357 [00:25<00:00, 13.96it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.88      0.85      1206
          경제       0.89      0.80      0.84      1555
          사회       0.77      0.82      0.79      1840
        생활문화       0.93      0.87      0.90      1484
          세계       0.91      0.94      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.93      0.90      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.88      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.87810945 0.80385852 0.81576087 0.86590296 0.9449397  0.97807271
 0.90106635]
VALID ACC : 0.8860948041706825, VALID LOSS : 0.37411461709750715
{'epoch': 4, 'train_loss': 0.25750490373659646, 'train_acc': 0.9204754534038141, 'valid_acc': 0.8860948041706825, 'val_loss': 0.37411461709750715, 'learning_rate': 5e-06}
Start Training: Epoch 6



Training:   0%|          | 1/1071 [00:00<04:01,  4.43it/s]

Training steps: 0 Loss: 0.26241713762283325


Training:   9%|▉         | 101/1071 [00:23<03:36,  4.49it/s]

Training steps: 100 Loss: 0.2446623146533966


Training:  19%|█▉        | 201/1071 [00:46<03:33,  4.08it/s]

Training steps: 200 Loss: 0.14984676241874695


Training:  28%|██▊       | 301/1071 [01:09<02:50,  4.51it/s]

Training steps: 300 Loss: 0.2660917639732361


Training:  37%|███▋      | 401/1071 [01:32<02:44,  4.07it/s]

Training steps: 400 Loss: 0.44936490058898926


Training:  47%|████▋     | 501/1071 [01:56<02:06,  4.50it/s]

Training steps: 500 Loss: 0.1580566167831421


Training:  56%|█████▌    | 601/1071 [02:19<01:56,  4.05it/s]

Training steps: 600 Loss: 0.1278010457754135


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.50it/s]

Training steps: 700 Loss: 0.3437550365924835


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.07it/s]

Training steps: 800 Loss: 0.11635398864746094


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.53it/s]

Training steps: 900 Loss: 0.25255274772644043


Training:  93%|█████████▎| 1001/1071 [03:51<00:17,  4.05it/s]

Training steps: 1000 Loss: 0.22459517419338226


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.32it/s]

TRAIN ACC : 0.9285651704097427, TRAIN LOSS : 0.2265328386055878



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.018157215788960457


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.13it/s]

Validation steps: 100 Loss: 0.16062240302562714


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.22it/s]

Validation steps: 200 Loss: 0.5344827771186829


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.25it/s]

Validation steps: 300 Loss: 0.5312908291816711


Training: 100%|██████████| 357/357 [00:25<00:00, 14.15it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.90      0.85      1206
          경제       0.90      0.78      0.84      1555
          사회       0.78      0.82      0.80      1840
        생활문화       0.93      0.88      0.90      1484
          세계       0.92      0.95      0.93      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.93      0.91      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.89883914 0.78456592 0.81630435 0.88140162 0.94546408 0.97807271
 0.90521327]
VALID ACC : 0.8884605274686761, VALID LOSS : 0.38672795962980566
{'epoch': 5, 'train_loss': 0.2265328386055878, 'train_acc': 0.9285651704097427, 'valid_acc': 0.8884605274686761, 'val_loss': 0.38672795962980566, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 7


Training:   0%|          | 1/1071 [00:00<04:22,  4.07it/s]

Training steps: 0 Loss: 0.23713578283786774


Training:   9%|▉         | 101/1071 [00:23<03:35,  4.50it/s]

Training steps: 100 Loss: 0.06727636605501175


Training:  19%|█▉        | 201/1071 [00:46<03:33,  4.08it/s]

Training steps: 200 Loss: 0.25319376587867737


Training:  28%|██▊       | 301/1071 [01:09<02:49,  4.53it/s]

Training steps: 300 Loss: 0.3362329602241516


Training:  37%|███▋      | 401/1071 [01:32<02:43,  4.10it/s]

Training steps: 400 Loss: 0.05684280768036842


Training:  47%|████▋     | 501/1071 [01:55<02:05,  4.53it/s]

Training steps: 500 Loss: 0.15255603194236755


Training:  56%|█████▌    | 601/1071 [02:19<01:54,  4.09it/s]

Training steps: 600 Loss: 0.1507992446422577


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.49it/s]

Training steps: 700 Loss: 0.2708419859409332


Training:  75%|███████▍  | 801/1071 [03:05<01:05,  4.10it/s]

Training steps: 800 Loss: 0.30812886357307434


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.50it/s]

Training steps: 900 Loss: 0.26311975717544556


Training:  93%|█████████▎| 1001/1071 [03:51<00:17,  4.09it/s]

Training steps: 1000 Loss: 0.271419495344162


Training: 100%|██████████| 1071/1071 [04:07<00:00,  4.32it/s]

TRAIN ACC : 0.9367717064338075, TRAIN LOSS : 0.20201439692888384



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.016367144882678986


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.16it/s]

Validation steps: 100 Loss: 0.183349147439003


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.09it/s]

Validation steps: 200 Loss: 0.5730568766593933


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.16it/s]

Validation steps: 300 Loss: 0.562688410282135


Training: 100%|██████████| 357/357 [00:25<00:00, 14.11it/s]

              precision    recall  f1-score   support

        IT과학       0.84      0.84      0.84      1206
          경제       0.87      0.82      0.84      1555
          사회       0.77      0.81      0.79      1840
        생활문화       0.91      0.89      0.90      1484
          세계       0.92      0.94      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.93      0.89      0.91      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.88      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.84494196 0.82122186 0.81195652 0.89150943 0.93864709 0.98211194
 0.88862559]
VALID ACC : 0.8853938491194252, VALID LOSS : 0.405772942692271
{'epoch': 6, 'train_loss': 0.20201439692888384, 'train_acc': 0.9367717064338075, 'valid_acc': 0.8853938491194252, 'val_loss': 0.405772942692271, 'learning_rate': 5e-06}
Start Training: Epoch 8



Training:   0%|          | 1/1071 [00:00<04:00,  4.46it/s]

Training steps: 0 Loss: 0.5779348015785217


Training:   9%|▉         | 101/1071 [00:23<03:33,  4.53it/s]

Training steps: 100 Loss: 0.11921137571334839


Training:  19%|█▉        | 201/1071 [00:46<03:33,  4.08it/s]

Training steps: 200 Loss: 0.039324939250946045


Training:  28%|██▊       | 301/1071 [01:09<02:50,  4.51it/s]

Training steps: 300 Loss: 0.134158194065094


Training:  37%|███▋      | 401/1071 [01:32<02:43,  4.11it/s]

Training steps: 400 Loss: 0.20008815824985504


Training:  47%|████▋     | 501/1071 [01:55<02:06,  4.52it/s]

Training steps: 500 Loss: 0.23557867109775543


Training:  56%|█████▌    | 601/1071 [02:18<01:54,  4.11it/s]

Training steps: 600 Loss: 0.3758866786956787


Training:  65%|██████▌   | 701/1071 [02:41<01:22,  4.51it/s]

Training steps: 700 Loss: 0.1400577872991562


Training:  75%|███████▍  | 801/1071 [03:05<01:05,  4.09it/s]

Training steps: 800 Loss: 0.16259793937206268


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.52it/s]

Training steps: 900 Loss: 0.05748247355222702


Training:  93%|█████████▎| 1001/1071 [03:51<00:17,  4.10it/s]

Training steps: 1000 Loss: 0.04357524961233139


Training: 100%|██████████| 1071/1071 [04:07<00:00,  4.33it/s]

TRAIN ACC : 0.9469641657661867, TRAIN LOSS : 0.1732651938681676



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.010727624408900738


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.16it/s]

Validation steps: 100 Loss: 0.2576729953289032


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.16it/s]

Validation steps: 200 Loss: 0.6470053195953369


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.19it/s]

Validation steps: 300 Loss: 0.6178387403488159


Training: 100%|██████████| 357/357 [00:25<00:00, 14.19it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.87      0.84      1206
          경제       0.87      0.81      0.84      1555
          사회       0.80      0.78      0.79      1840
        생활문화       0.91      0.89      0.90      1484
          세계       0.91      0.94      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.91      0.92      0.91      1688

    accuracy                           0.89     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.89      0.88     11413

[0.87396352 0.80707395 0.77934783 0.8861186  0.9449397  0.97576457
 0.91824645]
VALID ACC : 0.8850433715937965, VALID LOSS : 0.44875657644008055
{'epoch': 7, 'train_loss': 0.1732651938681676, 'train_acc': 0.9469641657661867, 'valid_acc': 0.8850433715937965, 'val_loss': 0.44875657644008055, 'learning_rate': 5e-06}
Start Training: Epoch 9



Training:   0%|          | 1/1071 [00:00<04:01,  4.43it/s]

Training steps: 0 Loss: 0.09715209156274796


Training:   9%|▉         | 101/1071 [00:23<03:36,  4.49it/s]

Training steps: 100 Loss: 0.12838801741600037


Training:  19%|█▉        | 201/1071 [00:46<03:32,  4.09it/s]

Training steps: 200 Loss: 0.2751874029636383


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.48it/s]

Training steps: 300 Loss: 0.23856033384799957


Training:  37%|███▋      | 401/1071 [01:32<02:43,  4.11it/s]

Training steps: 400 Loss: 0.14678213000297546


Training:  47%|████▋     | 501/1071 [01:55<02:06,  4.52it/s]

Training steps: 500 Loss: 0.23817524313926697


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.09it/s]

Training steps: 600 Loss: 0.15011882781982422


Training:  65%|██████▌   | 701/1071 [02:42<01:21,  4.52it/s]

Training steps: 700 Loss: 0.044256940484046936


Training:  75%|███████▍  | 801/1071 [03:05<01:05,  4.10it/s]

Training steps: 800 Loss: 0.03560090437531471


Training:  84%|████████▍ | 901/1071 [03:28<00:37,  4.52it/s]

Training steps: 900 Loss: 0.020285800099372864


Training:  93%|█████████▎| 1001/1071 [03:51<00:17,  4.10it/s]

Training steps: 1000 Loss: 0.20164614915847778


Training: 100%|██████████| 1071/1071 [04:07<00:00,  4.33it/s]

TRAIN ACC : 0.9541485353815601, TRAIN LOSS : 0.14882658091291132



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.014863254502415657


Training:  29%|██▊       | 102/357 [00:07<00:17, 14.28it/s]

Validation steps: 100 Loss: 0.27436044812202454


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.18it/s]

Validation steps: 200 Loss: 0.6391286849975586


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.08it/s]

Validation steps: 300 Loss: 0.6753995418548584


Training: 100%|██████████| 357/357 [00:25<00:00, 14.17it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.88      0.84      1206
          경제       0.89      0.78      0.83      1555
          사회       0.77      0.80      0.79      1840
        생활문화       0.91      0.89      0.90      1484
          세계       0.92      0.94      0.93      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.91      0.92      0.92      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.87728027 0.77942122 0.79891304 0.88746631 0.93602517 0.9763416
 0.91706161]
VALID ACC : 0.8833786033470604, VALID LOSS : 0.45763753456160094
{'epoch': 8, 'train_loss': 0.14882658091291132, 'train_acc': 0.9541485353815601, 'valid_acc': 0.8833786033470604, 'val_loss': 0.45763753456160094, 'learning_rate': 5e-06}
Start Training: Epoch 10



Training:   0%|          | 1/1071 [00:00<04:02,  4.41it/s]

Training steps: 0 Loss: 0.04335492476820946


Training:   9%|▉         | 101/1071 [00:23<03:35,  4.50it/s]

Training steps: 100 Loss: 0.170024111866951


Training:  19%|█▉        | 201/1071 [00:46<03:30,  4.13it/s]

Training steps: 200 Loss: 0.1019071489572525


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.48it/s]

Training steps: 300 Loss: 0.07603970915079117


Training:  37%|███▋      | 401/1071 [01:33<02:45,  4.04it/s]

Training steps: 400 Loss: 0.0302328672260046


Training:  47%|████▋     | 501/1071 [01:56<02:06,  4.50it/s]

Training steps: 500 Loss: 0.06836792081594467


Training:  56%|█████▌    | 601/1071 [02:19<01:54,  4.10it/s]

Training steps: 600 Loss: 0.11766377836465836


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.50it/s]

Training steps: 700 Loss: 0.18177440762519836


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.09it/s]

Training steps: 800 Loss: 0.03289027512073517


Training:  84%|████████▍ | 901/1071 [03:29<00:38,  4.46it/s]

Training steps: 900 Loss: 0.05812717229127884


Training:  93%|█████████▎| 1001/1071 [03:52<00:17,  4.06it/s]

Training steps: 1000 Loss: 0.14105063676834106


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.9606319908881166, TRAIN LOSS : 0.12854487873460788



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.01969211734831333


Training:  29%|██▊       | 102/357 [00:07<00:18, 14.10it/s]

Validation steps: 100 Loss: 0.3885464668273926


Training:  57%|█████▋    | 202/357 [00:14<00:10, 14.15it/s]

Validation steps: 200 Loss: 0.7276132702827454


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.14it/s]

Validation steps: 300 Loss: 0.8071425557136536


Training: 100%|██████████| 357/357 [00:25<00:00, 14.07it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.85      0.84      1206
          경제       0.89      0.76      0.82      1555
          사회       0.77      0.79      0.78      1840
        생활문화       0.90      0.90      0.90      1484
          세계       0.90      0.94      0.92      1907
         스포츠       0.97      0.97      0.97      1733
          정치       0.91      0.92      0.91      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.85406302 0.76463023 0.79130435 0.89959569 0.9449397  0.96884016
 0.91706161]
VALID ACC : 0.8796109699465522, VALID LOSS : 0.4995928053243258
{'epoch': 9, 'train_loss': 0.12854487873460788, 'train_acc': 0.9606319908881166, 'valid_acc': 0.8796109699465522, 'val_loss': 0.4995928053243258, 'learning_rate': 5e-06}
Start Training: Epoch 11



Training:   0%|          | 1/1071 [00:00<03:58,  4.49it/s]

Training steps: 0 Loss: 0.021279694512486458


Training:   9%|▉         | 101/1071 [00:23<03:37,  4.47it/s]

Training steps: 100 Loss: 0.13891936838626862


Training:  19%|█▉        | 201/1071 [00:46<03:33,  4.08it/s]

Training steps: 200 Loss: 0.12529665231704712


Training:  28%|██▊       | 301/1071 [01:09<02:51,  4.50it/s]

Training steps: 300 Loss: 0.21052399277687073


Training:  37%|███▋      | 401/1071 [01:33<02:44,  4.06it/s]

Training steps: 400 Loss: 0.03123466856777668


Training:  47%|████▋     | 501/1071 [01:56<02:06,  4.49it/s]

Training steps: 500 Loss: 0.009697799570858479


Training:  56%|█████▌    | 601/1071 [02:19<01:55,  4.06it/s]

Training steps: 600 Loss: 0.2312835454940796


Training:  65%|██████▌   | 701/1071 [02:42<01:22,  4.50it/s]

Training steps: 700 Loss: 0.01627977564930916


Training:  75%|███████▍  | 801/1071 [03:05<01:06,  4.06it/s]

Training steps: 800 Loss: 0.0480910949409008


Training:  84%|████████▍ | 901/1071 [03:29<00:37,  4.52it/s]

Training steps: 900 Loss: 0.16386769711971283


Training:  93%|█████████▎| 1001/1071 [03:52<00:17,  4.10it/s]

Training steps: 1000 Loss: 0.05786262825131416


Training: 100%|██████████| 1071/1071 [04:08<00:00,  4.31it/s]

TRAIN ACC : 0.9648374755410181, TRAIN LOSS : 0.11257264527916581



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.08327020704746246


Training:  29%|██▊       | 102/357 [00:07<00:18, 13.88it/s]

Validation steps: 100 Loss: 0.43866580724716187


Training:  57%|█████▋    | 202/357 [00:14<00:11, 14.01it/s]

Validation steps: 200 Loss: 0.755425751209259


Training:  85%|████████▍ | 302/357 [00:21<00:03, 14.03it/s]

Validation steps: 300 Loss: 0.8925138115882874


Training: 100%|██████████| 357/357 [00:25<00:00, 13.97it/s]

              precision    recall  f1-score   support

        IT과학       0.80      0.88      0.84      1206
          경제       0.87      0.78      0.82      1555
          사회       0.77      0.79      0.78      1840
        생활문화       0.91      0.89      0.90      1484
          세계       0.91      0.93      0.92      1907
         스포츠       0.97      0.96      0.97      1733
          정치       0.91      0.91      0.91      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.8814262  0.77877814 0.79184783 0.89083558 0.93340325 0.96364686
 0.90876777]
VALID ACC : 0.8794357311837379, VALID LOSS : 0.5354688403220111
{'epoch': 10, 'train_loss': 0.11257264527916581, 'train_acc': 0.9648374755410181, 'valid_acc': 0.8794357311837379, 'val_loss': 0.5354688403220111, 'learning_rate': 5e-06}
EarlyStopping counter: 5 out of 5
************************************************** auc




In [10]:
torch.cuda.empty_cache()

## Inference

In [11]:
def inference_main():
    args = parse_args()
    args.model_name = "temp"
    preprocess = Preprocess(args)
    preprocess.load_test_data()
    test_data = preprocess.test_data

    print(f"size of test data : {len(test_data)}")
    torch.cuda.empty_cache()
    # del model
    inference(args, test_data)

inference_main()

size of test data : 9131
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_1.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_1.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:20<00:00, 14.05it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/loss/output_1.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_2.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_2.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:20<00:00, 14.09it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/loss/output_2.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_3.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_3.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:20<00:00, 14.10it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/loss/output_3.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_4.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classif

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_4.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:20<00:00, 14.05it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/loss/output_4.csv
writing prediction : /content/drive/MyDrive/KLUE_TC/output/loss/output_softvote.csv
