In [1]:
!nvidia-smi

Thu Jul 29 13:56:52 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   64C    P0    48W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Directory 설정, 구글 드라이브 import

In [2]:
cur_dir = '/content/drive/MyDrive/KLUE_TC'

## Utils

In [3]:
!pip install adamp
!pip install transformers



In [12]:
import os
import random
import torch
import numpy as np
from torch import nn

from torch.optim import Adam, AdamW, SGD
from adamp import AdamP
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR, ExponentialLR, \
    CosineAnnealingWarmRestarts
from transformers import get_linear_schedule_with_warmup
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification


def set_seeds(seed=42):
    # 랜덤 시드를 설정하여 매 코드를 실행할 때마다 동일한 결과를 얻게 합니다.
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.benchmark = False


def save_checkpoint(state, model_dir, model_filename):
    print('saving model ...')
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    #torch.save(state, os.path.join(model_dir, model_filename))
    torch.save(state, os.path.join(model_filename))


def get_optimizer(model, args):
    if args.optimizer == 'adam':
        optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adamW':
        optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adamP':
        optimizer = AdamP(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'SGD':
        optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    # 모든 parameter들의 grad값을 0으로 초기화
    optimizer.zero_grad()

    return optimizer


def get_scheduler(optimizer, args):
    if args.scheduler == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer, patience=args.plateau_patience, factor=args.plateau_factor, mode='max',
                                      verbose=True)
    elif args.scheduler == 'linear_warmup':
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps,
                                                    num_training_steps=args.total_steps)
    elif args.scheduler == 'step_lr':
        scheduler = StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)
    elif args.scheduler == 'exp_lr':
        scheduler = ExponentialLR(optimizer, gamma=args.gamma)
    elif args.scheduler == 'cosine_annealing':
        scheduler = CosineAnnealingLR(optimizer, T_max=args.t_max, eta_min=args.eta_min)
    elif args.scheduler == 'cosine_annealing_warmstart':
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=args.T_0, T_mult=args.T_mult, eta_min=args.eta_min,
                                                last_epoch=-1)

    return scheduler


def update_params(loss, model, optimizer, batch_idx, max_len, args):
    if args.gradient_accumulation:
        # normalize loss to account for batch accumulation
        loss = loss / args.accum_iter 

        # backward pass
        loss.backward()

        # weights update
        if ((batch_idx + 1) % args.accum_iter == 0) or (batch_idx + 1 == max_len):
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
            optimizer.step()
            optimizer.zero_grad()
    else:
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
        optimizer.step()
        optimizer.zero_grad()


def load_tokenizer(args):
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name
        else args.model_name_or_path,
        use_fast=True,
    )

    return tokenizer


def load_model(args, model_name=None):
    if not model_name:
        model_name = args.model_name
    model_path = os.path.join(args.model_dir, model_name)
    print("Loading Model from:", model_path)
    # load_state = torch.load(model_path)
    load_state = torch.load(model_name)

    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        args.config_name
        if args.config_name
        else args.model_name_or_path,
    )

    config.num_labels = 7

    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    ).to(args.device)

    # model.classifier = nn.Sequential(
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 1024),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 512),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(512, 7),
    # )

    model.load_state_dict(load_state['state_dict'], strict=True)

    # model = model.to(args.device)

    print("Loading Model from:", model_path, "...Finished.")

    return model


def get_model(args):
    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        args.config_name
        if args.config_name
        else args.model_name_or_path,
    )

    config.num_labels = 7
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    ).to(args.device)

    # model.classifier = nn.Sequential(
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 1024),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 512),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(512, 7),
    # )

    # print(model)
    #model.classifier.dropout = nn.Dropout(p=0.3, inplace = False)

    model = model.to(args.device)

    return model


def get_loaders(args, train, valid, is_inference=False):
    pin_memory = True
    train_loader, valid_loader = None, None

    if is_inference:
        test_dataset = YNAT_dataset(args, valid, is_inference)
        test_loader = torch.utils.data.DataLoader(test_dataset, num_workers=args.num_workers, shuffle=False,
                                                  batch_size=args.batch_size, pin_memory=pin_memory)
        return test_loader

    if train is not None:
        train_dataset = YNAT_dataset(args, train, is_inference)
        train_loader = torch.utils.data.DataLoader(train_dataset, num_workers=args.num_workers, shuffle=True,
                                                   batch_size=args.batch_size, pin_memory=pin_memory)
    if valid is not None:
        valid_dataset = YNAT_dataset(args, valid, is_inference)
        valid_loader = torch.utils.data.DataLoader(valid_dataset, num_workers=args.num_workers, shuffle=False,
                                                   batch_size=args.batch_size, pin_memory=pin_memory)

    return train_loader, valid_loader


# loss계산하고 parameter update!
def compute_loss(preds, targets, args):
    """
    Args :
        preds   : (batch_size, max_seq_len)
        targets : (batch_size, max_seq_len)
    """
    # print(preds, targets)
    loss = get_criterion(preds, targets, args)
    # 마지막 시퀀스에 대한 값만 loss 계산
    # loss = loss[:, -1]
    # loss = torch.mean(loss)
    return loss


def get_criterion(pred, target, args):
    if args.criterion == 'BCE':
        loss = nn.BCELoss(reduction="none")
    elif args.criterion == "BCELogit":
        loss = nn.BCEWithLogitsLoss(reduction="none")
    elif args.criterion == "MSE":
        loss = nn.MSELoss(reduction="none")
    elif args.criterion == "L1":
        loss = nn.L1Loss(reduction="none")
    elif args.criterion == "CE":
        #weights = [1,1,2,1,1,1,1] #as class distribution
        #class_weights = torch.FloatTensor(weights).cuda()
        #loss = nn.CrossEntropyLoss(weight=class_weights)
        loss = nn.CrossEntropyLoss()
    # NLL, CrossEntropy not available
    return loss(pred, target)


## Dataloader

In [5]:
import os
import torch
import pandas as pd


class Preprocess:
    def __init__(self, args):
        self.args = args
        self.train_data = None
        self.test_data = None

    def load_data(self, file_name):
        csv_file_name = os.path.join(self.args.data_dir, file_name)
        df = pd.read_csv(csv_file_name)
        #del df['Unnamed: 0']
        return df.values

    def load_train_data(self):
        self.train_data = self.load_data('train_data.csv')

    def load_test_data(self):
        self.test_data = self.load_data('test_data.csv')


class YNAT_dataset(torch.utils.data.Dataset):
    def __init__(self, args, data, is_inference):
        self.args = args
        self.data = data
        self.is_inference = is_inference

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data[index]
        element = [row[i] for i in range(len(row))]
        #print(type(row))
        # np.array -> torch.tensor 형변환
        #for i, col in enumerate(row):
        #    if type(col) == str:
        #        pass
        #    else:
        #        row[i] = torch.tensor(col)

        return element



## Trainer

In [14]:
from sklearn.metrics import accuracy_score
from torch.nn.functional import one_hot
from tqdm import tqdm
from sklearn import metrics


def run(args, tokenizer, train_data, valid_data, cv_count):
    train_loader, valid_loader = get_loaders(args, train_data, valid_data)

    # only when using warmup scheduler
    # args.total_steps = int(len(train_loader.dataset) / args.batch_size) * args.n_epochs
    # args.warmup_steps = int(args.total_steps * args.warmup_ratio)

    model = get_model(args)
    optimizer = get_optimizer(model, args)
    scheduler = get_scheduler(optimizer, args)

    best_acc = -1
    early_stopping_counter = 0
    for epoch in range(args.n_epochs):

        print(f"Start Training: Epoch {epoch + 1}")

        if not args.cv_strategy:
            model_name = args.run_name
        else:
            model_name = f"{args.run_name.split('.pt')[0]}_{cv_count}.pt"

        # TRAIN
        train_acc, train_loss = train(args, model, tokenizer, train_loader, optimizer)

        # VALID
        acc, val_loss = validate(args, model, tokenizer, valid_loader)

        # TODO: model save or early stopping
        if args.scheduler == 'plateau':
            last_lr = optimizer.param_groups[0]['lr']
        else:
            last_lr = scheduler.get_last_lr()[0]

        print({"epoch": epoch, "train_loss": train_loss, "train_acc": train_acc,
                   "valid_acc": acc, "val_loss": val_loss, "learning_rate": last_lr})

        if acc > best_acc:
            best_acc = acc
            # torch.nn.DataParallel로 감싸진 경우 원래의 model을 가져옵니다.
            model_to_save = model.module if hasattr(model, 'module') else model
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model_to_save.state_dict(),
            },
                args.model_dir, model_name,
            )
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= args.patience:
                print(f'EarlyStopping counter: {early_stopping_counter} out of {args.patience}')
                break

        # scheduler
        if args.scheduler == 'plateau':
            scheduler.step(best_acc)
        else:
            scheduler.step()

    return best_acc


def inference(args, test_data):
    # ckpt_file_names = []
    all_fold_preds = []
    all_fold_argmax_preds = []

    if not args.cv_strategy:
        ckpt_file_names = [args.model_name]
    else:
        ckpt_file_names = [f"{args.model_name.split('.pt')[0]}_{i + 1}.pt" for i in range(args.fold_num)]

    tokenizer = load_tokenizer(args)

    for fold_idx, ckpt in enumerate(ckpt_file_names):
        model = load_model(args, ckpt)
        model.eval()
        test_loader = get_loaders(args, None, test_data, True)

        total_preds = []
        total_argmax_preds = []
        total_ids = []

        for step, batch in tqdm(enumerate(test_loader), desc='Inferencing', total=len(test_loader)):
            idx, text = batch
            tokenized_examples = tokenizer(
                text,
                max_length=args.max_seq_len,
                padding="max_length",
                return_tensors="pt"
            ).to(args.device)

            preds = model(**tokenized_examples)


            logits = preds['logits']
            # logits = logits[:,0,:]
            argmax_logits = torch.argmax(logits, dim=1)

            if args.device == 'cuda':
                argmax_preds = argmax_logits.to('cpu').detach().numpy()
                preds = logits.to('cpu').detach().numpy()
            else:  # cpu
                argmax_preds = argmax_logits.detach().numpy()
                preds = logits.detach().numpy()

            total_preds += list(preds)
            total_argmax_preds += list(argmax_preds)
            total_ids += list(idx)

        all_fold_preds.append(total_preds)
        all_fold_argmax_preds.append(total_argmax_preds)

        output_file_name = "output.csv" if not args.cv_strategy else f"output_{fold_idx + 1}.csv"
        write_path = os.path.join(args.output_dir, output_file_name)
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(write_path, 'w', encoding='utf8') as w:
            print("writing prediction : {}".format(write_path))
            w.write("index,topic_idx\n")
            for index, p in zip(total_ids, total_argmax_preds):
                w.write('{},{}\n'.format(index, p))

    if len(all_fold_preds) > 1:
        # Soft voting ensemble
        votes = np.sum(all_fold_preds, axis=0)
        votes = np.argmax(votes, axis=1)

        write_path = os.path.join(args.output_dir, "output_softvote.csv")
        #write_path = "output_softvote.csv"
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(write_path, 'w', encoding='utf8') as w:
            print("writing prediction : {}".format(write_path))
            w.write("index,topic_idx\n")
            for id, p in zip(total_ids, votes):
                w.write('{},{}\n'.format(id, p))


def train(args, model, tokenizer, train_loader, optimizer):
    model.train()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in tqdm(enumerate(train_loader), desc='Training', total=len(train_loader)):
        idx, text, label = batch
        label = label.to(args.device)
        # print(idx[:10])
        # print(text[:10])
        # print(label[:10])
        tokenized_examples = tokenizer(
            text,
            max_length=args.max_seq_len,
            padding="max_length",
            return_tensors="pt"
        ).to(args.device)
        
        # tokenize
        # 모델의 입력으로
        # label은 one-hot?
        # loss 주고
        # argmax를 golden

        preds = model(**tokenized_examples, labels = label)
        # print(preds)
        logits = preds['logits']
        # logits = logits[:,0,:]
        softmax_logits = nn.Softmax(dim=1)(logits)
        argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        # loss = compute_loss(logits,
        #                     label, args)
        loss = preds['loss']
        # print(loss)

        update_params(loss, model, optimizer, step, len(train_loader), args)

        if step % args.log_steps == 0:
            print(f"Training steps: {step} Loss: {str(loss.item())}")

        if args.device == 'cuda':
            argmax_logits = argmax_logits.to('cpu').detach().numpy()
            label = label.to('cpu').detach().numpy()
            loss = loss.to('cpu').detach().numpy()
        else:  # cpu
            argmax_logits = argmax_logits.detach().numpy()
            label = label.detach().numpy()
            loss = loss.detach().numpy()

        total_preds.append(argmax_logits)
        total_targets.append(label)
        losses.append(loss)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    acc = accuracy_score(total_targets, total_preds)
    loss_avg = sum(losses) / len(losses)
    print(f'TRAIN ACC : {acc}, TRAIN LOSS : {loss_avg}')
    return acc, loss_avg


def validate(args, model, tokenizer, valid_loader):
    model.eval()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in tqdm(enumerate(valid_loader), desc='Training', total=len(valid_loader)):
        idx, text, label = batch
        label = label.to(args.device)
        tokenized_examples = tokenizer(
            text,
            max_length=args.max_seq_len,
            padding="max_length",
            return_tensors="pt"
        ).to(args.device)

        # tokenize
        # 모델의 입력으로
        # label은 one-hot?
        # loss 주고
        # argmax를 golden

        preds = model(**tokenized_examples, labels = label)
        logits = preds['logits']
        # logits = logits[:,0,:]
        softmax_logits = nn.Softmax(dim=1)(logits)
        argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        # loss = compute_loss(logits,
        #                     label, args)
        loss = preds['loss']
        if step % args.log_steps == 0:
            print(f"Validation steps: {step} Loss: {str(loss.item())}")

        if args.device == 'cuda':
            argmax_logits = argmax_logits.to('cpu').detach().numpy()
            label = label.to('cpu').detach().numpy()
            loss = loss.to('cpu').detach().numpy()
        else:  # cpu
            argmax_logits = argmax_logits.detach().numpy()
            label = label.detach().numpy()
            loss = loss.detach().numpy()

        total_preds.append(argmax_logits)
        total_targets.append(label)
        losses.append(loss)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    target_names = ['IT과학', '경제', '사회', '생활문화', '세계', '스포츠', '정치']
    print(metrics.classification_report(total_targets, total_preds, target_names=target_names))
    matrix = metrics.confusion_matrix(total_targets, total_preds)
    print(matrix.diagonal()/matrix.sum(axis=1))

    acc = accuracy_score(total_targets, total_preds)
    loss_avg = sum(losses) / len(losses)
    print(f'VALID ACC : {acc}, VALID LOSS : {loss_avg}')
    return acc, loss_avg


## Train

In [7]:
import torch
from sklearn.model_selection import KFold, StratifiedKFold
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
from datetime import datetime
from pytz import timezone


def main(args):
    if not args.run_name:
        args.run_name = datetime.now(timezone("Asia/Seoul")).strftime("%Y-%m-%d-%H:%M:%S")

    set_seeds(args.seed)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    args.device = device

    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name
        else args.model_name_or_path,
        use_fast=True,
    )

    preprocess = Preprocess(args)
    preprocess.load_train_data()
    train_data_origin = preprocess.train_data

    print(f"Size of train data : {len(train_data_origin)}")
    # print(f"size of test data : {len(test_data)}")

    if args.cv_strategy == 'random':
        kf = KFold(n_splits=args.fold_num, shuffle=True)
        splits = kf.split(X=train_data_origin)
    else:
        # default
        # 여기 각 label로 바꿔야됨
        train_labels = [sequence[-1] for sequence in train_data_origin]
        skf = StratifiedKFold(n_splits=args.fold_num, shuffle=True)
        splits = skf.split(X=train_data_origin, y=train_labels)

    acc_avg = 0
    for fold_num, (train_index, valid_index) in enumerate(splits):
        train_data = train_data_origin[train_index]
        valid_data = train_data_origin[valid_index]
        best_acc = run(args, tokenizer, train_data, valid_data, fold_num + 1)

        if not args.cv_strategy:
            break

        acc_avg += best_acc

    if args.cv_strategy:
        acc_avg /= args.fold_num

        print("*" * 50, 'auc_avg', "*" * 50)
        print(acc_avg)


## Run

In [8]:
import argparse
import easydict

def parse_args():
    args = easydict.EasyDict({'run_name' : 'temp',
                             'seed':42,
                             'device' :'cuda',
                             'data_dir': cur_dir + '/data/open/',
                             'model_dir' : '/content/drive/MyDrive/KLUE_TC/models/',
                             'model_name_or_path' : 'klue/roberta-large',
                             'config_name' : None,
                             'tokenizer_name' : None,
                             'output_dir' : '/content/drive/MyDrive/KLUE_TC/output/0730',
                             
                             'accum_iter' : 8,
                             'gradient_accumulation' : True,

                             'cv_strategy' : 'stratified',
                             'fold_num' : 4,

                             'num_workers' : 1,

                             # 훈련
                             'n_epochs' : 5,
                             'batch_size' : 32,
                             'lr' : 5e-6,
                             'clip_grad' : 15,
                             'patience' : 5,
                             'max_seq_len' : 40,

                             # Optimizer
                             'optimizer' : 'adamP',

                             # Optimizer-parameters
                             'weight_decay' : 0.05,
                             'momentum' : 0.9,

                             # Scheduler
                             'scheduler' : 'step_lr',

                             # Scheduler-parameters
                             # plateau
                             'plateau_patience' : 10,
                             'plateau_factor' : 0.5,
                              
                             't_max' : 10,
                             'T_0' : 10,
                             'T_mult' : 2,
                             '--eta_min' : 0.01,

                             # linear_warmup
                             'warmup_ratio' : 0.3,

                             # Step LR
                             'step_size' : 50,
                             'gamma' : 0.1,

                             'criterion' : 'CE',

                             'log_steps' : 100})
    
    return args

In [9]:
if __name__ == '__main__':
    args = parse_args()
    main(args)

Size of train data : 45654


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'cla

Start Training: Epoch 1


Training:   0%|          | 1/1070 [00:00<07:20,  2.43it/s]

Training steps: 0 Loss: 1.9826524257659912


Training:   9%|▉         | 101/1070 [00:41<06:23,  2.52it/s]

Training steps: 100 Loss: 1.9169511795043945


Training:  19%|█▉        | 201/1070 [01:22<06:13,  2.33it/s]

Training steps: 200 Loss: 1.5410817861557007


Training:  28%|██▊       | 301/1070 [02:03<05:04,  2.53it/s]

Training steps: 300 Loss: 0.915111780166626


Training:  37%|███▋      | 401/1070 [02:44<04:47,  2.33it/s]

Training steps: 400 Loss: 0.31032440066337585


Training:  47%|████▋     | 501/1070 [03:25<03:45,  2.52it/s]

Training steps: 500 Loss: 0.2510310411453247


Training:  56%|█████▌    | 601/1070 [04:06<03:22,  2.32it/s]

Training steps: 600 Loss: 0.42316901683807373


Training:  66%|██████▌   | 701/1070 [04:47<02:25,  2.53it/s]

Training steps: 700 Loss: 0.46593034267425537


Training:  75%|███████▍  | 801/1070 [05:28<01:55,  2.33it/s]

Training steps: 800 Loss: 0.1391078531742096


Training:  84%|████████▍ | 901/1070 [06:09<01:06,  2.53it/s]

Training steps: 900 Loss: 0.4726301431655884


Training:  94%|█████████▎| 1001/1070 [06:50<00:29,  2.33it/s]

Training steps: 1000 Loss: 0.7261714935302734


Training: 100%|██████████| 1070/1070 [07:18<00:00,  2.44it/s]

TRAIN ACC : 0.7429906542056075, TRAIN LOSS : 0.8005842629854925



Training:   0%|          | 1/357 [00:00<00:50,  6.98it/s]

Validation steps: 0 Loss: 0.3084559440612793


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.36it/s]

Validation steps: 100 Loss: 0.45152080059051514


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.35it/s]

Validation steps: 200 Loss: 0.40622636675834656


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.36it/s]

Validation steps: 300 Loss: 0.21925239264965057


Training: 100%|██████████| 357/357 [00:48<00:00,  7.36it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.91      0.85      1206
          경제       0.85      0.82      0.83      1555
          사회       0.81      0.76      0.78      1841
        생활문화       0.90      0.91      0.90      1483
          세계       0.94      0.92      0.93      1908
         스포츠       0.97      0.98      0.98      1734
          정치       0.92      0.91      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.89      0.89     11414

[0.91210614 0.81864952 0.75502444 0.90761969 0.92085954 0.98212226
 0.91286307]
VALID ACC : 0.88566672507447, VALID LOSS : 0.3424806482776874
{'epoch': 0, 'train_loss': 0.8005842629854925, 'train_acc': 0.7429906542056075, 'valid_acc': 0.88566672507447, 'val_loss': 0.3424806482776874, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1070 [00:00<07:22,  2.41it/s]

Training steps: 0 Loss: 0.20460638403892517


Training:   9%|▉         | 101/1070 [00:41<06:24,  2.52it/s]

Training steps: 100 Loss: 0.15300694108009338


Training:  19%|█▉        | 201/1070 [01:22<06:13,  2.33it/s]

Training steps: 200 Loss: 0.2667609751224518


Training:  28%|██▊       | 301/1070 [02:03<05:03,  2.53it/s]

Training steps: 300 Loss: 0.12399812042713165


Training:  37%|███▋      | 401/1070 [02:44<04:47,  2.32it/s]

Training steps: 400 Loss: 0.27518990635871887


Training:  47%|████▋     | 501/1070 [03:25<03:45,  2.52it/s]

Training steps: 500 Loss: 0.28591716289520264


Training:  56%|█████▌    | 601/1070 [04:06<03:22,  2.32it/s]

Training steps: 600 Loss: 0.4926159679889679


Training:  66%|██████▌   | 701/1070 [04:47<02:25,  2.53it/s]

Training steps: 700 Loss: 0.4758014976978302


Training:  75%|███████▍  | 801/1070 [05:28<01:55,  2.33it/s]

Training steps: 800 Loss: 0.3712504506111145


Training:  84%|████████▍ | 901/1070 [06:09<01:06,  2.52it/s]

Training steps: 900 Loss: 0.22603103518486023


Training:  94%|█████████▎| 1001/1070 [06:50<00:29,  2.32it/s]

Training steps: 1000 Loss: 0.1664663851261139


Training: 100%|██████████| 1070/1070 [07:18<00:00,  2.44it/s]

TRAIN ACC : 0.8927278037383177, TRAIN LOSS : 0.3266496484594367



Training:   0%|          | 0/357 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.2061453014612198


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.32it/s]

Validation steps: 100 Loss: 0.46015891432762146


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.37it/s]

Validation steps: 200 Loss: 0.3504126965999603


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.36it/s]

Validation steps: 300 Loss: 0.1949702501296997


Training: 100%|██████████| 357/357 [00:48<00:00,  7.35it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.92      0.86      1206
          경제       0.88      0.80      0.84      1555
          사회       0.83      0.76      0.79      1841
        생활문화       0.90      0.91      0.90      1483
          세계       0.93      0.94      0.93      1908
         스포츠       0.97      0.98      0.98      1734
          정치       0.91      0.94      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.92371476 0.79871383 0.75611081 0.908294   0.93867925 0.98385236
 0.94013041]
VALID ACC : 0.8917119327142106, VALID LOSS : 0.3245314266845709
{'epoch': 1, 'train_loss': 0.3266496484594367, 'train_acc': 0.8927278037383177, 'valid_acc': 0.8917119327142106, 'val_loss': 0.3245314266845709, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1070 [00:00<07:25,  2.40it/s]

Training steps: 0 Loss: 0.19208388030529022


Training:   9%|▉         | 101/1070 [00:41<06:23,  2.53it/s]

Training steps: 100 Loss: 0.24500812590122223


Training:  19%|█▉        | 201/1070 [01:22<06:15,  2.31it/s]

Training steps: 200 Loss: 0.6244062185287476


Training:  28%|██▊       | 301/1070 [02:03<05:04,  2.53it/s]

Training steps: 300 Loss: 0.4134194850921631


Training:  37%|███▋      | 401/1070 [02:44<04:48,  2.32it/s]

Training steps: 400 Loss: 0.5059863924980164


Training:  47%|████▋     | 501/1070 [03:25<03:45,  2.52it/s]

Training steps: 500 Loss: 0.5987372994422913


Training:  56%|█████▌    | 601/1070 [04:06<03:21,  2.32it/s]

Training steps: 600 Loss: 0.2520946264266968


Training:  66%|██████▌   | 701/1070 [04:47<02:26,  2.53it/s]

Training steps: 700 Loss: 0.22893130779266357


Training:  75%|███████▍  | 801/1070 [05:28<01:56,  2.32it/s]

Training steps: 800 Loss: 0.20594686269760132


Training:  84%|████████▍ | 901/1070 [06:09<01:06,  2.53it/s]

Training steps: 900 Loss: 0.2604212164878845


Training:  94%|█████████▎| 1001/1070 [06:50<00:29,  2.31it/s]

Training steps: 1000 Loss: 0.2725217044353485


Training: 100%|██████████| 1070/1070 [07:19<00:00,  2.44it/s]

TRAIN ACC : 0.9042056074766355, TRAIN LOSS : 0.2842928501385673



Training:   1%|          | 2/357 [00:00<00:50,  6.97it/s]

Validation steps: 0 Loss: 0.2601091265678406


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.35it/s]

Validation steps: 100 Loss: 0.48503655195236206


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.33it/s]

Validation steps: 200 Loss: 0.3914349377155304


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.34it/s]

Validation steps: 300 Loss: 0.18650202453136444


Training: 100%|██████████| 357/357 [00:48<00:00,  7.34it/s]


              precision    recall  f1-score   support

        IT과학       0.78      0.93      0.85      1206
          경제       0.87      0.81      0.84      1555
          사회       0.85      0.74      0.79      1841
        생활문화       0.89      0.92      0.91      1483
          세계       0.93      0.94      0.94      1908
         스포츠       0.97      0.98      0.98      1734
          정치       0.91      0.94      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.93283582 0.81157556 0.73546985 0.91773432 0.94077568 0.97635525
 0.94190871]
VALID ACC : 0.8917995444191344, VALID LOSS : 0.32347930327621327
{'epoch': 2, 'train_loss': 0.2842928501385673, 'train_acc': 0.9042056074766355, 'valid_acc': 0.8917995444191344, 'val_loss': 0.32347930327621327, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1070 [00:00<07:18,  2.44it/s]

Training steps: 0 Loss: 0.2835976183414459


Training:   9%|▉         | 101/1070 [00:41<06:24,  2.52it/s]

Training steps: 100 Loss: 0.2679087817668915


Training:  19%|█▉        | 201/1070 [01:22<06:13,  2.33it/s]

Training steps: 200 Loss: 0.23448920249938965


Training:  28%|██▊       | 301/1070 [02:03<05:04,  2.53it/s]

Training steps: 300 Loss: 0.395081102848053


Training:  37%|███▋      | 401/1070 [02:44<04:47,  2.32it/s]

Training steps: 400 Loss: 0.23828832805156708


Training:  47%|████▋     | 501/1070 [03:25<03:45,  2.53it/s]

Training steps: 500 Loss: 0.38742485642433167


Training:  56%|█████▌    | 601/1070 [04:06<03:21,  2.32it/s]

Training steps: 600 Loss: 0.23059052228927612


Training:  66%|██████▌   | 701/1070 [04:47<02:25,  2.53it/s]

Training steps: 700 Loss: 0.4271572530269623


Training:  75%|███████▍  | 801/1070 [05:28<01:55,  2.32it/s]

Training steps: 800 Loss: 0.3597418963909149


Training:  84%|████████▍ | 901/1070 [06:09<01:06,  2.52it/s]

Training steps: 900 Loss: 0.31903210282325745


Training:  94%|█████████▎| 1001/1070 [06:50<00:29,  2.32it/s]

Training steps: 1000 Loss: 0.1840948611497879


Training: 100%|██████████| 1070/1070 [07:19<00:00,  2.44it/s]

TRAIN ACC : 0.915216121495327, TRAIN LOSS : 0.25334874741802704



Training:   1%|          | 2/357 [00:00<00:50,  6.99it/s]

Validation steps: 0 Loss: 0.34620150923728943


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.37it/s]

Validation steps: 100 Loss: 0.4580662250518799


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.34it/s]

Validation steps: 200 Loss: 0.3237476050853729


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.37it/s]

Validation steps: 300 Loss: 0.20688387751579285


Training: 100%|██████████| 357/357 [00:48<00:00,  7.34it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85      1206
          경제       0.84      0.84      0.84      1555
          사회       0.83      0.75      0.79      1841
        생활문화       0.91      0.90      0.91      1483
          세계       0.95      0.93      0.94      1908
         스포츠       0.97      0.99      0.98      1734
          정치       0.90      0.94      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.89220564 0.83987138 0.75230853 0.90492245 0.92872117 0.98904268
 0.94250148]
VALID ACC : 0.8924128263536009, VALID LOSS : 0.319632058237948
{'epoch': 3, 'train_loss': 0.25334874741802704, 'train_acc': 0.915216121495327, 'valid_acc': 0.8924128263536009, 'val_loss': 0.319632058237948, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1070 [00:00<07:14,  2.46it/s]

Training steps: 0 Loss: 0.38700398802757263


Training:   9%|▉         | 101/1070 [00:41<06:23,  2.53it/s]

Training steps: 100 Loss: 0.03200706094503403


Training:  19%|█▉        | 201/1070 [01:22<06:14,  2.32it/s]

Training steps: 200 Loss: 0.32594165205955505


Training:  28%|██▊       | 301/1070 [02:03<05:05,  2.52it/s]

Training steps: 300 Loss: 0.18835771083831787


Training:  37%|███▋      | 401/1070 [02:44<04:48,  2.32it/s]

Training steps: 400 Loss: 0.49308764934539795


Training:  47%|████▋     | 501/1070 [03:25<03:45,  2.52it/s]

Training steps: 500 Loss: 0.16238491237163544


Training:  56%|█████▌    | 601/1070 [04:06<03:21,  2.32it/s]

Training steps: 600 Loss: 0.20787973701953888


Training:  66%|██████▌   | 701/1070 [04:47<02:26,  2.52it/s]

Training steps: 700 Loss: 0.18036021292209625


Training:  75%|███████▍  | 801/1070 [05:28<01:55,  2.32it/s]

Training steps: 800 Loss: 0.26834768056869507


Training:  84%|████████▍ | 901/1070 [06:09<01:06,  2.53it/s]

Training steps: 900 Loss: 0.3588578999042511


Training:  94%|█████████▎| 1001/1070 [06:50<00:29,  2.32it/s]

Training steps: 1000 Loss: 0.1464715600013733


Training: 100%|██████████| 1070/1070 [07:19<00:00,  2.44it/s]

TRAIN ACC : 0.9220794392523365, TRAIN LOSS : 0.22608850513036563



Training:   1%|          | 2/357 [00:00<00:50,  6.99it/s]

Validation steps: 0 Loss: 0.4144173562526703


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.32it/s]

Validation steps: 100 Loss: 0.510734498500824


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.33it/s]

Validation steps: 200 Loss: 0.45657700300216675


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.32it/s]

Validation steps: 300 Loss: 0.21657736599445343


Training: 100%|██████████| 357/357 [00:48<00:00,  7.34it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.92      0.85      1206
          경제       0.85      0.83      0.84      1555
          사회       0.85      0.72      0.78      1841
        생활문화       0.87      0.93      0.90      1483
          세계       0.94      0.93      0.94      1908
         스포츠       0.97      0.99      0.98      1734
          정치       0.92      0.92      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.91542289 0.83151125 0.72406301 0.9312205  0.92976939 0.98904268
 0.92175459]
VALID ACC : 0.8896968635009638, VALID LOSS : 0.33713474080843087
{'epoch': 4, 'train_loss': 0.22608850513036563, 'train_acc': 0.9220794392523365, 'valid_acc': 0.8896968635009638, 'val_loss': 0.33713474080843087, 'learning_rate': 5e-06}


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'cla

Start Training: Epoch 1


Training:   0%|          | 1/1070 [00:00<07:05,  2.51it/s]

Training steps: 0 Loss: 1.8822365999221802


Training:   9%|▉         | 101/1070 [00:41<06:24,  2.52it/s]

Training steps: 100 Loss: 1.8432544469833374


Training:  19%|█▉        | 201/1070 [01:22<06:13,  2.32it/s]

Training steps: 200 Loss: 1.4433252811431885


Training:  28%|██▊       | 301/1070 [02:03<05:04,  2.52it/s]

Training steps: 300 Loss: 0.8179411292076111


Training:  37%|███▋      | 401/1070 [02:44<04:59,  2.23it/s]

Training steps: 400 Loss: 0.5719953775405884


Training:  47%|████▋     | 501/1070 [03:25<03:45,  2.52it/s]

Training steps: 500 Loss: 0.7876120805740356


Training:  56%|█████▌    | 601/1070 [04:06<03:22,  2.32it/s]

Training steps: 600 Loss: 0.3204255998134613


Training:  66%|██████▌   | 701/1070 [04:47<02:26,  2.52it/s]

Training steps: 700 Loss: 0.49042370915412903


Training:  75%|███████▍  | 801/1070 [05:28<01:55,  2.32it/s]

Training steps: 800 Loss: 0.34302273392677307


Training:  84%|████████▍ | 901/1070 [06:09<01:06,  2.52it/s]

Training steps: 900 Loss: 0.643727719783783


Training:  94%|█████████▎| 1001/1070 [06:50<00:29,  2.33it/s]

Training steps: 1000 Loss: 0.22052137553691864


Training: 100%|██████████| 1070/1070 [07:19<00:00,  2.44it/s]

TRAIN ACC : 0.7684871495327102, TRAIN LOSS : 0.7606185519180009



Training:   1%|          | 2/357 [00:00<00:52,  6.77it/s]

Validation steps: 0 Loss: 0.3073192536830902


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.35it/s]

Validation steps: 100 Loss: 0.5347920656204224


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.36it/s]

Validation steps: 200 Loss: 0.34509769082069397


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.33it/s]

Validation steps: 300 Loss: 0.5328586101531982


Training: 100%|██████████| 357/357 [00:48<00:00,  7.34it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.92      0.85      1206
          경제       0.87      0.82      0.84      1556
          사회       0.83      0.72      0.78      1841
        생활문화       0.88      0.93      0.90      1483
          세계       0.92      0.91      0.92      1907
         스포츠       0.94      0.99      0.97      1733
          정치       0.91      0.90      0.90      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.91625207 0.81619537 0.72406301 0.9312205  0.91400105 0.99192152
 0.89691943]
VALID ACC : 0.8818118100578237, VALID LOSS : 0.3698824242602627
{'epoch': 0, 'train_loss': 0.7606185519180009, 'train_acc': 0.7684871495327102, 'valid_acc': 0.8818118100578237, 'val_loss': 0.3698824242602627, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1070 [00:00<07:20,  2.43it/s]

Training steps: 0 Loss: 0.24488061666488647


Training:   9%|▉         | 101/1070 [00:41<06:24,  2.52it/s]

Training steps: 100 Loss: 0.47893545031547546


Training:  19%|█▉        | 201/1070 [01:22<06:15,  2.31it/s]

Training steps: 200 Loss: 0.37551456689834595


Training:  28%|██▊       | 301/1070 [02:03<05:04,  2.52it/s]

Training steps: 300 Loss: 0.2945396602153778


Training:  37%|███▋      | 401/1070 [02:44<04:49,  2.31it/s]

Training steps: 400 Loss: 0.2913450002670288


Training:  47%|████▋     | 501/1070 [03:25<03:45,  2.53it/s]

Training steps: 500 Loss: 0.20080769062042236


Training:  56%|█████▌    | 601/1070 [04:06<03:21,  2.32it/s]

Training steps: 600 Loss: 0.31334200501441956


Training:  66%|██████▌   | 701/1070 [04:47<02:26,  2.52it/s]

Training steps: 700 Loss: 0.3721013069152832


Training:  75%|███████▍  | 801/1070 [05:28<01:55,  2.32it/s]

Training steps: 800 Loss: 0.5475923418998718


Training:  84%|████████▍ | 901/1070 [06:09<01:07,  2.52it/s]

Training steps: 900 Loss: 0.33853790163993835


Training:  94%|█████████▎| 1001/1070 [06:51<00:29,  2.33it/s]

Training steps: 1000 Loss: 0.1634395867586136


Training: 100%|██████████| 1070/1070 [07:19<00:00,  2.43it/s]

TRAIN ACC : 0.8948014018691589, TRAIN LOSS : 0.3292796160433894



Training:   1%|          | 2/357 [00:00<00:50,  7.05it/s]

Validation steps: 0 Loss: 0.3168449103832245


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.31it/s]

Validation steps: 100 Loss: 0.5044248104095459


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.29it/s]

Validation steps: 200 Loss: 0.2876952588558197


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.36it/s]

Validation steps: 300 Loss: 0.49793726205825806


Training: 100%|██████████| 357/357 [00:48<00:00,  7.34it/s]


              precision    recall  f1-score   support

        IT과학       0.78      0.94      0.85      1206
          경제       0.87      0.82      0.84      1556
          사회       0.82      0.76      0.79      1841
        생활문화       0.90      0.92      0.91      1483
          세계       0.93      0.91      0.92      1907
         스포츠       0.95      0.99      0.97      1733
          정치       0.93      0.89      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.93781095 0.81748072 0.76371537 0.91706001 0.91295228 0.98615118
 0.89040284]
VALID ACC : 0.886805677238479, VALID LOSS : 0.3414451199054134
{'epoch': 1, 'train_loss': 0.3292796160433894, 'train_acc': 0.8948014018691589, 'valid_acc': 0.886805677238479, 'val_loss': 0.3414451199054134, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1070 [00:00<07:18,  2.44it/s]

Training steps: 0 Loss: 0.4031710624694824


Training:   9%|▉         | 101/1070 [00:41<06:23,  2.53it/s]

Training steps: 100 Loss: 0.12272361665964127


Training:  19%|█▉        | 201/1070 [01:22<06:14,  2.32it/s]

Training steps: 200 Loss: 0.4612497389316559


Training:  28%|██▊       | 301/1070 [02:03<05:04,  2.52it/s]

Training steps: 300 Loss: 0.2292582094669342


Training:  37%|███▋      | 401/1070 [02:44<04:47,  2.33it/s]

Training steps: 400 Loss: 0.2578479051589966


Training:  47%|████▋     | 501/1070 [03:25<03:45,  2.53it/s]

Training steps: 500 Loss: 0.3315162658691406


Training:  56%|█████▌    | 601/1070 [04:06<03:21,  2.33it/s]

Training steps: 600 Loss: 0.15645276010036469


Training:  66%|██████▌   | 701/1070 [04:47<02:26,  2.52it/s]

Training steps: 700 Loss: 0.19487065076828003


Training:  75%|███████▍  | 801/1070 [05:28<01:56,  2.31it/s]

Training steps: 800 Loss: 0.24647168815135956


Training:  84%|████████▍ | 901/1070 [06:09<01:07,  2.52it/s]

Training steps: 900 Loss: 0.3704179525375366


Training:  94%|█████████▎| 1001/1070 [06:51<00:29,  2.32it/s]

Training steps: 1000 Loss: 0.3822820782661438


Training: 100%|██████████| 1070/1070 [07:19<00:00,  2.43it/s]

TRAIN ACC : 0.9057242990654205, TRAIN LOSS : 0.2864547959276449



Training:   1%|          | 2/357 [00:00<00:50,  6.97it/s]

Validation steps: 0 Loss: 0.325410932302475


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.30it/s]

Validation steps: 100 Loss: 0.594261109828949


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.32it/s]

Validation steps: 200 Loss: 0.20000679790973663


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.36it/s]

Validation steps: 300 Loss: 0.5408589243888855


Training: 100%|██████████| 357/357 [00:48<00:00,  7.34it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.86      0.84      1206
          경제       0.85      0.83      0.84      1556
          사회       0.79      0.79      0.79      1841
        생활문화       0.89      0.93      0.91      1483
          세계       0.96      0.88      0.91      1907
         스포츠       0.95      0.98      0.97      1733
          정치       0.91      0.90      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.86401327 0.83226221 0.79250407 0.92515172 0.87624541 0.98499711
 0.90402844]
VALID ACC : 0.8824250919922901, VALID LOSS : 0.3476631887966678
{'epoch': 2, 'train_loss': 0.2864547959276449, 'train_acc': 0.9057242990654205, 'valid_acc': 0.8824250919922901, 'val_loss': 0.3476631887966678, 'learning_rate': 5e-06}
Start Training: Epoch 4


Training:   0%|          | 1/1070 [00:00<07:08,  2.50it/s]

Training steps: 0 Loss: 0.0971449613571167


Training:   9%|▉         | 101/1070 [00:41<06:24,  2.52it/s]

Training steps: 100 Loss: 0.32577529549598694


Training:  19%|█▉        | 201/1070 [01:22<06:15,  2.32it/s]

Training steps: 200 Loss: 0.10269142687320709


Training:  28%|██▊       | 301/1070 [02:03<05:04,  2.52it/s]

Training steps: 300 Loss: 0.40854936838150024


Training:  37%|███▋      | 401/1070 [02:44<04:48,  2.32it/s]

Training steps: 400 Loss: 0.4992009699344635


Training:  47%|████▋     | 501/1070 [03:25<03:46,  2.52it/s]

Training steps: 500 Loss: 0.2646874189376831


Training:  56%|█████▌    | 601/1070 [04:06<03:22,  2.32it/s]

Training steps: 600 Loss: 0.10390141606330872


Training:  66%|██████▌   | 701/1070 [04:47<02:26,  2.52it/s]

Training steps: 700 Loss: 0.22618980705738068


Training:  75%|███████▍  | 801/1070 [05:29<01:55,  2.32it/s]

Training steps: 800 Loss: 0.2838720381259918


Training:  84%|████████▍ | 901/1070 [06:10<01:07,  2.52it/s]

Training steps: 900 Loss: 0.3025799095630646


Training:  94%|█████████▎| 1001/1070 [06:51<00:29,  2.33it/s]

Training steps: 1000 Loss: 0.3837600350379944


Training: 100%|██████████| 1070/1070 [07:19<00:00,  2.43it/s]

TRAIN ACC : 0.9150700934579439, TRAIN LOSS : 0.25379054059631356



Training:   1%|          | 2/357 [00:00<00:51,  6.96it/s]

Validation steps: 0 Loss: 0.30197179317474365


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.31it/s]

Validation steps: 100 Loss: 0.5200348496437073


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.35it/s]

Validation steps: 200 Loss: 0.28678587079048157


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.42it/s]

Validation steps: 300 Loss: 0.6281578540802002


Training: 100%|██████████| 357/357 [00:48<00:00,  7.34it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85      1206
          경제       0.82      0.87      0.85      1556
          사회       0.83      0.75      0.79      1841
        생활문화       0.92      0.90      0.91      1483
          세계       0.91      0.94      0.92      1907
         스포츠       0.95      0.99      0.97      1733
          정치       0.93      0.88      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.89      0.89     11414

[0.88723051 0.8714653  0.74904943 0.90155091 0.93654955 0.98615118
 0.87796209]
VALID ACC : 0.8865428421237077, VALID LOSS : 0.34362890815012764
{'epoch': 3, 'train_loss': 0.25379054059631356, 'train_acc': 0.9150700934579439, 'valid_acc': 0.8865428421237077, 'val_loss': 0.34362890815012764, 'learning_rate': 5e-06}
Start Training: Epoch 5


Training:   0%|          | 1/1070 [00:00<07:00,  2.54it/s]

Training steps: 0 Loss: 0.15807291865348816


Training:   9%|▉         | 101/1070 [00:41<06:24,  2.52it/s]

Training steps: 100 Loss: 0.3187884986400604


Training:  19%|█▉        | 201/1070 [01:22<06:15,  2.32it/s]

Training steps: 200 Loss: 0.34909895062446594


Training:  28%|██▊       | 301/1070 [02:03<05:05,  2.52it/s]

Training steps: 300 Loss: 0.3023548126220703


Training:  37%|███▋      | 401/1070 [02:44<04:47,  2.33it/s]

Training steps: 400 Loss: 0.3165172338485718


Training:  47%|████▋     | 501/1070 [03:25<03:45,  2.52it/s]

Training steps: 500 Loss: 0.38104841113090515


Training:  56%|█████▌    | 601/1070 [04:06<03:21,  2.33it/s]

Training steps: 600 Loss: 0.38018372654914856


Training:  66%|██████▌   | 701/1070 [04:47<02:26,  2.52it/s]

Training steps: 700 Loss: 0.20064151287078857


Training:  75%|███████▍  | 801/1070 [05:28<01:55,  2.32it/s]

Training steps: 800 Loss: 0.21952573955059052


Training:  84%|████████▍ | 901/1070 [06:09<01:06,  2.53it/s]

Training steps: 900 Loss: 0.28092944622039795


Training:  94%|█████████▎| 1001/1070 [06:51<00:29,  2.33it/s]

Training steps: 1000 Loss: 0.3743228614330292


Training: 100%|██████████| 1070/1070 [07:19<00:00,  2.43it/s]

TRAIN ACC : 0.9228971962616822, TRAIN LOSS : 0.2297581117530571



Training:   1%|          | 2/357 [00:00<00:50,  7.02it/s]

Validation steps: 0 Loss: 0.31636708974838257


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.35it/s]

Validation steps: 100 Loss: 0.510935366153717


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.33it/s]

Validation steps: 200 Loss: 0.2424040138721466


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.38it/s]

Validation steps: 300 Loss: 0.5954723954200745


Training: 100%|██████████| 357/357 [00:48<00:00,  7.35it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.92      0.86      1206
          경제       0.89      0.80      0.84      1556
          사회       0.81      0.79      0.80      1841
        생활문화       0.91      0.92      0.91      1483
          세계       0.93      0.92      0.93      1907
         스포츠       0.96      0.99      0.97      1733
          정치       0.93      0.90      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.92288557 0.80141388 0.78707224 0.9163857  0.92081804 0.98557415
 0.8992891 ]
VALID ACC : 0.8892588049763448, VALID LOSS : 0.3411592319190335
{'epoch': 4, 'train_loss': 0.2297581117530571, 'train_acc': 0.9228971962616822, 'valid_acc': 0.8892588049763448, 'val_loss': 0.3411592319190335, 'learning_rate': 5e-06}
saving model ...


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'cla

Start Training: Epoch 1


Training:   0%|          | 1/1071 [00:00<07:16,  2.45it/s]

Training steps: 0 Loss: 1.956837773323059


Training:   9%|▉         | 101/1071 [00:41<06:24,  2.52it/s]

Training steps: 100 Loss: 1.8610273599624634


Training:  19%|█▉        | 201/1071 [01:22<06:14,  2.32it/s]

Training steps: 200 Loss: 1.5430066585540771


Training:  28%|██▊       | 301/1071 [02:03<05:04,  2.53it/s]

Training steps: 300 Loss: 0.9024349451065063


Training:  37%|███▋      | 401/1071 [02:44<04:48,  2.32it/s]

Training steps: 400 Loss: 0.6399943828582764


Training:  47%|████▋     | 501/1071 [03:25<03:46,  2.52it/s]

Training steps: 500 Loss: 0.4755367934703827


Training:  56%|█████▌    | 601/1071 [04:06<03:22,  2.32it/s]

Training steps: 600 Loss: 0.37942180037498474


Training:  65%|██████▌   | 701/1071 [04:47<02:26,  2.52it/s]

Training steps: 700 Loss: 0.5809451341629028


Training:  75%|███████▍  | 801/1071 [05:28<01:56,  2.33it/s]

Training steps: 800 Loss: 0.6944474577903748


Training:  84%|████████▍ | 901/1071 [06:09<01:07,  2.52it/s]

Training steps: 900 Loss: 0.24627360701560974


Training:  93%|█████████▎| 1001/1071 [06:50<00:30,  2.32it/s]

Training steps: 1000 Loss: 0.38105326890945435


Training: 100%|██████████| 1071/1071 [07:19<00:00,  2.44it/s]

TRAIN ACC : 0.7461522735901405, TRAIN LOSS : 0.797557666932557



Training:   1%|          | 2/357 [00:00<00:51,  6.84it/s]

Validation steps: 0 Loss: 0.4094484746456146


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.31it/s]

Validation steps: 100 Loss: 0.28675708174705505


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.30it/s]

Validation steps: 200 Loss: 0.17227506637573242


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.31it/s]

Validation steps: 300 Loss: 0.8578913807868958


Training: 100%|██████████| 357/357 [00:48<00:00,  7.32it/s]


              precision    recall  f1-score   support

        IT과학       0.77      0.93      0.84      1206
          경제       0.90      0.82      0.86      1556
          사회       0.83      0.75      0.79      1840
        생활문화       0.91      0.90      0.90      1483
          세계       0.93      0.94      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.92      0.92      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.92951907 0.82197943 0.75434783 0.89548213 0.94231778 0.98095788
 0.92476303]
VALID ACC : 0.8914395864365198, VALID LOSS : 0.3384939815629931
{'epoch': 0, 'train_loss': 0.797557666932557, 'train_acc': 0.7461522735901405, 'valid_acc': 0.8914395864365198, 'val_loss': 0.3384939815629931, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1071 [00:00<07:22,  2.42it/s]

Training steps: 0 Loss: 0.34398290514945984


Training:   9%|▉         | 101/1071 [00:41<06:24,  2.52it/s]

Training steps: 100 Loss: 0.5000947117805481


Training:  19%|█▉        | 201/1071 [01:22<06:15,  2.32it/s]

Training steps: 200 Loss: 0.13568612933158875


Training:  28%|██▊       | 301/1071 [02:03<05:05,  2.52it/s]

Training steps: 300 Loss: 0.2673414349555969


Training:  37%|███▋      | 401/1071 [02:44<04:48,  2.32it/s]

Training steps: 400 Loss: 0.18036095798015594


Training:  47%|████▋     | 501/1071 [03:25<03:46,  2.52it/s]

Training steps: 500 Loss: 0.4897521734237671


Training:  56%|█████▌    | 601/1071 [04:06<03:21,  2.33it/s]

Training steps: 600 Loss: 0.3931785225868225


Training:  65%|██████▌   | 701/1071 [04:47<02:26,  2.52it/s]

Training steps: 700 Loss: 0.31846028566360474


Training:  75%|███████▍  | 801/1071 [05:29<01:56,  2.32it/s]

Training steps: 800 Loss: 0.1933211386203766


Training:  84%|████████▍ | 901/1071 [06:10<01:07,  2.52it/s]

Training steps: 900 Loss: 0.15266528725624084


Training:  93%|█████████▎| 1001/1071 [06:51<00:30,  2.32it/s]

Training steps: 1000 Loss: 0.3859158158302307


Training: 100%|██████████| 1071/1071 [07:19<00:00,  2.43it/s]

TRAIN ACC : 0.8918547939604567, TRAIN LOSS : 0.328820114870087



Training:   1%|          | 2/357 [00:00<00:51,  6.86it/s]

Validation steps: 0 Loss: 0.2586648762226105


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.34it/s]

Validation steps: 100 Loss: 0.21566514670848846


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.31it/s]

Validation steps: 200 Loss: 0.12321089953184128


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.33it/s]

Validation steps: 300 Loss: 0.8760500550270081


Training: 100%|██████████| 357/357 [00:48<00:00,  7.32it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.86      0.85      1206
          경제       0.86      0.86      0.86      1556
          사회       0.82      0.78      0.80      1840
        생활문화       0.91      0.90      0.91      1483
          세계       0.94      0.94      0.94      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.91      0.94      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.86318408 0.8566838  0.77934783 0.90424815 0.93707394 0.97518754
 0.93838863]
VALID ACC : 0.8945938841671778, VALID LOSS : 0.3155856727771148
{'epoch': 1, 'train_loss': 0.328820114870087, 'train_acc': 0.8918547939604567, 'valid_acc': 0.8945938841671778, 'val_loss': 0.3155856727771148, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1071 [00:00<07:23,  2.41it/s]

Training steps: 0 Loss: 0.3352780342102051


Training:   9%|▉         | 101/1071 [00:41<06:25,  2.52it/s]

Training steps: 100 Loss: 0.2017354667186737


Training:  19%|█▉        | 201/1071 [01:22<06:13,  2.33it/s]

Training steps: 200 Loss: 0.2389126718044281


Training:  28%|██▊       | 301/1071 [02:03<05:04,  2.53it/s]

Training steps: 300 Loss: 0.4399172365665436


Training:  37%|███▋      | 401/1071 [02:44<04:48,  2.32it/s]

Training steps: 400 Loss: 0.3117353320121765


Training:  47%|████▋     | 501/1071 [03:25<03:45,  2.52it/s]

Training steps: 500 Loss: 0.1566610336303711


Training:  56%|█████▌    | 601/1071 [04:06<03:22,  2.32it/s]

Training steps: 600 Loss: 0.3756045401096344


Training:  65%|██████▌   | 701/1071 [04:47<02:26,  2.53it/s]

Training steps: 700 Loss: 0.4459320604801178


Training:  75%|███████▍  | 801/1071 [05:28<01:56,  2.32it/s]

Training steps: 800 Loss: 0.1758463978767395


Training:  84%|████████▍ | 901/1071 [06:09<01:07,  2.52it/s]

Training steps: 900 Loss: 0.22192829847335815


Training:  93%|█████████▎| 1001/1071 [06:51<00:30,  2.32it/s]

Training steps: 1000 Loss: 0.3042011260986328


Training: 100%|██████████| 1071/1071 [07:19<00:00,  2.44it/s]

TRAIN ACC : 0.9029233959288572, TRAIN LOSS : 0.28839215199027407



Training:   1%|          | 2/357 [00:00<00:51,  6.90it/s]

Validation steps: 0 Loss: 0.36561715602874756


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.37it/s]

Validation steps: 100 Loss: 0.19656574726104736


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.34it/s]

Validation steps: 200 Loss: 0.09171932190656662


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.34it/s]

Validation steps: 300 Loss: 0.8536636233329773


Training: 100%|██████████| 357/357 [00:48<00:00,  7.34it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.92      0.86      1206
          경제       0.87      0.86      0.86      1556
          사회       0.85      0.73      0.79      1840
        생활문화       0.88      0.92      0.90      1483
          세계       0.95      0.92      0.94      1907
         스포츠       0.95      0.99      0.97      1733
          정치       0.92      0.93      0.93      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.90      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.92205638 0.85796915 0.73315217 0.91975725 0.92343996 0.98961339
 0.9271327 ]
VALID ACC : 0.8938053097345132, VALID LOSS : 0.31667900739722893
{'epoch': 2, 'train_loss': 0.28839215199027407, 'train_acc': 0.9029233959288572, 'valid_acc': 0.8938053097345132, 'val_loss': 0.31667900739722893, 'learning_rate': 5e-06}
Start Training: Epoch 4


Training:   0%|          | 1/1071 [00:00<07:04,  2.52it/s]

Training steps: 0 Loss: 0.20385053753852844


Training:   9%|▉         | 101/1071 [00:41<06:24,  2.52it/s]

Training steps: 100 Loss: 0.14621008932590485


Training:  19%|█▉        | 201/1071 [01:22<06:15,  2.31it/s]

Training steps: 200 Loss: 0.16344109177589417


Training:  28%|██▊       | 301/1071 [02:03<05:05,  2.52it/s]

Training steps: 300 Loss: 0.27151888608932495


Training:  37%|███▋      | 401/1071 [02:45<04:49,  2.32it/s]

Training steps: 400 Loss: 0.31833770871162415


Training:  47%|████▋     | 501/1071 [03:26<03:45,  2.53it/s]

Training steps: 500 Loss: 0.21716046333312988


Training:  56%|█████▌    | 601/1071 [04:07<03:22,  2.32it/s]

Training steps: 600 Loss: 0.1661689281463623


Training:  65%|██████▌   | 701/1071 [04:48<02:26,  2.52it/s]

Training steps: 700 Loss: 0.3511841297149658


Training:  75%|███████▍  | 801/1071 [05:29<01:56,  2.32it/s]

Training steps: 800 Loss: 0.25523775815963745


Training:  84%|████████▍ | 901/1071 [06:10<01:07,  2.51it/s]

Training steps: 900 Loss: 0.28002965450286865


Training:  93%|█████████▎| 1001/1071 [06:51<00:30,  2.32it/s]

Training steps: 1000 Loss: 0.21434932947158813


Training: 100%|██████████| 1071/1071 [07:20<00:00,  2.43it/s]

TRAIN ACC : 0.9127946029613621, TRAIN LOSS : 0.2558169614847134



Training:   1%|          | 2/357 [00:00<00:51,  6.89it/s]

Validation steps: 0 Loss: 0.24979761242866516


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.32it/s]

Validation steps: 100 Loss: 0.21344256401062012


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.29it/s]

Validation steps: 200 Loss: 0.08401530236005783


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.30it/s]

Validation steps: 300 Loss: 0.948752760887146


Training: 100%|██████████| 357/357 [00:48<00:00,  7.32it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85      1206
          경제       0.83      0.88      0.86      1556
          사회       0.86      0.74      0.79      1840
        생활문화       0.90      0.91      0.90      1483
          세계       0.93      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.92      0.92      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.88640133 0.88367609 0.74076087 0.91031693 0.92815941 0.98384305
 0.91943128]
VALID ACC : 0.8923157802505914, VALID LOSS : 0.3185067331308828
{'epoch': 3, 'train_loss': 0.2558169614847134, 'train_acc': 0.9127946029613621, 'valid_acc': 0.8923157802505914, 'val_loss': 0.3185067331308828, 'learning_rate': 5e-06}
Start Training: Epoch 5


Training:   0%|          | 1/1071 [00:00<07:01,  2.54it/s]

Training steps: 0 Loss: 0.35974395275115967


Training:   9%|▉         | 101/1071 [00:41<06:25,  2.52it/s]

Training steps: 100 Loss: 0.17491959035396576


Training:  19%|█▉        | 201/1071 [01:22<06:14,  2.32it/s]

Training steps: 200 Loss: 0.183623805642128


Training:  28%|██▊       | 301/1071 [02:03<05:04,  2.53it/s]

Training steps: 300 Loss: 0.1092018261551857


Training:  37%|███▋      | 401/1071 [02:44<04:49,  2.32it/s]

Training steps: 400 Loss: 0.26780226826667786


Training:  47%|████▋     | 501/1071 [03:25<03:46,  2.52it/s]

Training steps: 500 Loss: 0.7392925024032593


Training:  56%|█████▌    | 601/1071 [04:07<03:22,  2.32it/s]

Training steps: 600 Loss: 0.3607128858566284


Training:  65%|██████▌   | 701/1071 [04:48<02:26,  2.52it/s]

Training steps: 700 Loss: 0.2551710307598114


Training:  75%|███████▍  | 801/1071 [05:29<01:56,  2.31it/s]

Training steps: 800 Loss: 0.5372669100761414


Training:  84%|████████▍ | 901/1071 [06:10<01:07,  2.52it/s]

Training steps: 900 Loss: 0.15297706425189972


Training:  93%|█████████▎| 1001/1071 [06:51<00:30,  2.32it/s]

Training steps: 1000 Loss: 0.2173815220594406


Training: 100%|██████████| 1071/1071 [07:20<00:00,  2.43it/s]

TRAIN ACC : 0.9240092287024327, TRAIN LOSS : 0.2285445029695147



Training:   1%|          | 2/357 [00:00<00:51,  6.95it/s]

Validation steps: 0 Loss: 0.2539077401161194


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.31it/s]

Validation steps: 100 Loss: 0.1992075890302658


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.33it/s]

Validation steps: 200 Loss: 0.07870160788297653


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.33it/s]

Validation steps: 300 Loss: 0.9122763276100159


Training: 100%|██████████| 357/357 [00:48<00:00,  7.32it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.88      0.85      1206
          경제       0.87      0.85      0.86      1556
          사회       0.83      0.78      0.80      1840
        생활문화       0.90      0.91      0.90      1483
          세계       0.95      0.92      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.90      0.94      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.88391376 0.8496144  0.77826087 0.90559676 0.91767174 0.98038084
 0.9443128 ]
VALID ACC : 0.8942434066415491, VALID LOSS : 0.3269278191614385
{'epoch': 4, 'train_loss': 0.2285445029695147, 'train_acc': 0.9240092287024327, 'valid_acc': 0.8942434066415491, 'val_loss': 0.3269278191614385, 'learning_rate': 5e-06}


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'cla

Start Training: Epoch 1


Training:   0%|          | 1/1071 [00:00<07:12,  2.47it/s]

Training steps: 0 Loss: 1.9244520664215088


Training:   9%|▉         | 101/1071 [00:41<06:25,  2.52it/s]

Training steps: 100 Loss: 1.9053387641906738


Training:  19%|█▉        | 201/1071 [01:22<06:13,  2.33it/s]

Training steps: 200 Loss: 1.4485756158828735


Training:  28%|██▊       | 301/1071 [02:03<05:05,  2.52it/s]

Training steps: 300 Loss: 0.9036505222320557


Training:  37%|███▋      | 401/1071 [02:44<04:49,  2.32it/s]

Training steps: 400 Loss: 0.34682077169418335


Training:  47%|████▋     | 501/1071 [03:25<03:46,  2.52it/s]

Training steps: 500 Loss: 0.1988418847322464


Training:  56%|█████▌    | 601/1071 [04:07<03:22,  2.32it/s]

Training steps: 600 Loss: 0.4283638596534729


Training:  65%|██████▌   | 701/1071 [04:48<02:26,  2.52it/s]

Training steps: 700 Loss: 0.3648613393306732


Training:  75%|███████▍  | 801/1071 [05:29<01:56,  2.31it/s]

Training steps: 800 Loss: 0.13404467701911926


Training:  84%|████████▍ | 901/1071 [06:10<01:07,  2.52it/s]

Training steps: 900 Loss: 0.34546637535095215


Training:  93%|█████████▎| 1001/1071 [06:51<00:30,  2.32it/s]

Training steps: 1000 Loss: 0.16307270526885986


Training: 100%|██████████| 1071/1071 [07:20<00:00,  2.43it/s]

TRAIN ACC : 0.75497210945942, TRAIN LOSS : 0.7773755312967701



Training:   1%|          | 2/357 [00:00<00:51,  6.88it/s]

Validation steps: 0 Loss: 0.03050794079899788


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.32it/s]

Validation steps: 100 Loss: 0.14788316190242767


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.29it/s]

Validation steps: 200 Loss: 0.43539151549339294


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.31it/s]

Validation steps: 300 Loss: 0.4257816970348358


Training: 100%|██████████| 357/357 [00:48<00:00,  7.32it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.85      0.84      1206
          경제       0.88      0.80      0.84      1555
          사회       0.77      0.78      0.78      1840
        생활문화       0.88      0.90      0.89      1484
          세계       0.92      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.92      0.91      0.92      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.84991708 0.79678457 0.78043478 0.90296496 0.9349764  0.97980381
 0.91291469]
VALID ACC : 0.881626215718917, VALID LOSS : 0.35952748356162667
{'epoch': 0, 'train_loss': 0.7773755312967701, 'train_acc': 0.75497210945942, 'valid_acc': 0.881626215718917, 'val_loss': 0.35952748356162667, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1071 [00:00<07:20,  2.43it/s]

Training steps: 0 Loss: 0.3674084544181824


Training:   9%|▉         | 101/1071 [00:41<06:24,  2.52it/s]

Training steps: 100 Loss: 0.1969630867242813


Training:  19%|█▉        | 201/1071 [01:22<06:14,  2.32it/s]

Training steps: 200 Loss: 0.2913501560688019


Training:  28%|██▊       | 301/1071 [02:03<05:05,  2.52it/s]

Training steps: 300 Loss: 0.4512949287891388


Training:  37%|███▋      | 401/1071 [02:44<04:48,  2.33it/s]

Training steps: 400 Loss: 0.516786515712738


Training:  47%|████▋     | 501/1071 [03:25<03:46,  2.52it/s]

Training steps: 500 Loss: 0.38992249965667725


Training:  56%|█████▌    | 601/1071 [04:06<03:23,  2.31it/s]

Training steps: 600 Loss: 0.4131866693496704


Training:  65%|██████▌   | 701/1071 [04:47<02:26,  2.52it/s]

Training steps: 700 Loss: 0.17816787958145142


Training:  75%|███████▍  | 801/1071 [05:29<01:56,  2.31it/s]

Training steps: 800 Loss: 0.32755306363105774


Training:  84%|████████▍ | 901/1071 [06:10<01:07,  2.53it/s]

Training steps: 900 Loss: 0.3809996545314789


Training:  93%|█████████▎| 1001/1071 [06:51<00:30,  2.32it/s]

Training steps: 1000 Loss: 0.5812877416610718


Training: 100%|██████████| 1071/1071 [07:19<00:00,  2.43it/s]

TRAIN ACC : 0.892847755614614, TRAIN LOSS : 0.32965426054808383



Training:   1%|          | 2/357 [00:00<00:51,  6.95it/s]

Validation steps: 0 Loss: 0.02295546419918537


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.32it/s]

Validation steps: 100 Loss: 0.17245839536190033


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.29it/s]

Validation steps: 200 Loss: 0.3738507628440857


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.30it/s]

Validation steps: 300 Loss: 0.43536731600761414


Training: 100%|██████████| 357/357 [00:48<00:00,  7.32it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.92      0.85      1206
          경제       0.88      0.82      0.85      1555
          사회       0.82      0.76      0.79      1840
        생활문화       0.93      0.88      0.90      1484
          세계       0.92      0.94      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.91      0.93      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.92205638 0.81736334 0.75869565 0.87938005 0.93812271 0.98384305
 0.93187204]
VALID ACC : 0.8894243406641549, VALID LOSS : 0.3313552286601117
{'epoch': 1, 'train_loss': 0.32965426054808383, 'train_acc': 0.892847755614614, 'valid_acc': 0.8894243406641549, 'val_loss': 0.3313552286601117, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1071 [00:00<07:30,  2.37it/s]

Training steps: 0 Loss: 0.3435330390930176


Training:   9%|▉         | 101/1071 [00:41<06:25,  2.52it/s]

Training steps: 100 Loss: 0.14612993597984314


Training:  19%|█▉        | 201/1071 [01:22<06:15,  2.32it/s]

Training steps: 200 Loss: 0.20479238033294678


Training:  28%|██▊       | 301/1071 [02:03<05:05,  2.52it/s]

Training steps: 300 Loss: 0.15150968730449677


Training:  37%|███▋      | 401/1071 [02:44<04:49,  2.32it/s]

Training steps: 400 Loss: 0.10342796891927719


Training:  47%|████▋     | 501/1071 [03:25<03:46,  2.51it/s]

Training steps: 500 Loss: 0.07228174060583115


Training:  56%|█████▌    | 601/1071 [04:07<03:21,  2.33it/s]

Training steps: 600 Loss: 0.5348531603813171


Training:  65%|██████▌   | 701/1071 [04:48<02:27,  2.52it/s]

Training steps: 700 Loss: 0.6177611351013184


Training:  75%|███████▍  | 801/1071 [05:29<01:56,  2.32it/s]

Training steps: 800 Loss: 0.2262924760580063


Training:  84%|████████▍ | 901/1071 [06:10<01:07,  2.52it/s]

Training steps: 900 Loss: 0.16509589552879333


Training:  93%|█████████▎| 1001/1071 [06:51<00:30,  2.31it/s]

Training steps: 1000 Loss: 0.18604160845279694


Training: 100%|██████████| 1071/1071 [07:20<00:00,  2.43it/s]

TRAIN ACC : 0.9042376098828889, TRAIN LOSS : 0.2828648079918866



Training:   1%|          | 2/357 [00:00<00:51,  6.93it/s]

Validation steps: 0 Loss: 0.017895320430397987


Training:  29%|██▊       | 102/357 [00:13<00:35,  7.28it/s]

Validation steps: 100 Loss: 0.13913635909557343


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.28it/s]

Validation steps: 200 Loss: 0.4041043519973755


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.35it/s]

Validation steps: 300 Loss: 0.37089574337005615


Training: 100%|██████████| 357/357 [00:48<00:00,  7.33it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.87      0.85      1206
          경제       0.87      0.82      0.85      1555
          사회       0.80      0.79      0.79      1840
        생활문화       0.91      0.90      0.90      1484
          세계       0.92      0.94      0.93      1907
         스포츠       0.95      0.99      0.97      1733
          정치       0.92      0.92      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.87396352 0.82122186 0.78804348 0.89959569 0.93550079 0.98672822
 0.91883886]
VALID ACC : 0.8903005344782265, VALID LOSS : 0.326231292180237
{'epoch': 2, 'train_loss': 0.2828648079918866, 'train_acc': 0.9042376098828889, 'valid_acc': 0.8903005344782265, 'val_loss': 0.326231292180237, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1071 [00:00<07:23,  2.41it/s]

Training steps: 0 Loss: 0.3409435749053955


Training:   9%|▉         | 101/1071 [00:41<06:25,  2.52it/s]

Training steps: 100 Loss: 0.3779410123825073


Training:  19%|█▉        | 201/1071 [01:22<06:13,  2.33it/s]

Training steps: 200 Loss: 0.14322510361671448


Training:  28%|██▊       | 301/1071 [02:03<05:06,  2.51it/s]

Training steps: 300 Loss: 0.3825286328792572


Training:  37%|███▋      | 401/1071 [02:44<04:49,  2.31it/s]

Training steps: 400 Loss: 0.2025562971830368


Training:  47%|████▋     | 501/1071 [03:25<03:46,  2.52it/s]

Training steps: 500 Loss: 0.43880319595336914


Training:  56%|█████▌    | 601/1071 [04:07<03:22,  2.32it/s]

Training steps: 600 Loss: 0.4023530185222626


Training:  65%|██████▌   | 701/1071 [04:48<02:26,  2.52it/s]

Training steps: 700 Loss: 0.22397255897521973


Training:  75%|███████▍  | 801/1071 [05:29<01:56,  2.32it/s]

Training steps: 800 Loss: 0.2634330093860626


Training:  84%|████████▍ | 901/1071 [06:10<01:07,  2.51it/s]

Training steps: 900 Loss: 0.3072729706764221


Training:  93%|█████████▎| 1001/1071 [06:51<00:30,  2.32it/s]

Training steps: 1000 Loss: 0.3383789360523224


Training: 100%|██████████| 1071/1071 [07:20<00:00,  2.43it/s]

TRAIN ACC : 0.9147513215151427, TRAIN LOSS : 0.2522619930135102



Training:   1%|          | 2/357 [00:00<00:50,  6.97it/s]

Validation steps: 0 Loss: 0.018406962975859642


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.30it/s]

Validation steps: 100 Loss: 0.16137313842773438


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.27it/s]

Validation steps: 200 Loss: 0.42876356840133667


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.26it/s]

Validation steps: 300 Loss: 0.4207557439804077


Training: 100%|██████████| 357/357 [00:48<00:00,  7.31it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85      1206
          경제       0.89      0.80      0.84      1555
          사회       0.81      0.76      0.78      1840
        생활문화       0.91      0.89      0.90      1484
          세계       0.92      0.94      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.88      0.95      0.91      1688

    accuracy                           0.89     11413
   macro avg       0.88      0.89      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.8880597  0.79614148 0.76032609 0.89150943 0.94126901 0.98095788
 0.94727488]
VALID ACC : 0.8871462367475685, VALID LOSS : 0.3459591898827159
{'epoch': 3, 'train_loss': 0.2522619930135102, 'train_acc': 0.9147513215151427, 'valid_acc': 0.8871462367475685, 'val_loss': 0.3459591898827159, 'learning_rate': 5e-06}
Start Training: Epoch 5


Training:   0%|          | 1/1071 [00:00<07:04,  2.52it/s]

Training steps: 0 Loss: 0.3437352478504181


Training:   9%|▉         | 101/1071 [00:41<06:25,  2.52it/s]

Training steps: 100 Loss: 0.2367408126592636


Training:  19%|█▉        | 201/1071 [01:22<06:16,  2.31it/s]

Training steps: 200 Loss: 0.24518553912639618


Training:  28%|██▊       | 301/1071 [02:03<05:05,  2.52it/s]

Training steps: 300 Loss: 0.25653916597366333


Training:  37%|███▋      | 401/1071 [02:44<04:48,  2.32it/s]

Training steps: 400 Loss: 0.22038498520851135


Training:  47%|████▋     | 501/1071 [03:25<03:46,  2.52it/s]

Training steps: 500 Loss: 0.2622781991958618


Training:  56%|█████▌    | 601/1071 [04:07<03:23,  2.30it/s]

Training steps: 600 Loss: 0.15587204694747925


Training:  65%|██████▌   | 701/1071 [04:48<02:26,  2.52it/s]

Training steps: 700 Loss: 0.12769463658332825


Training:  75%|███████▍  | 801/1071 [05:29<01:56,  2.32it/s]

Training steps: 800 Loss: 0.2012621909379959


Training:  84%|████████▍ | 901/1071 [06:10<01:07,  2.51it/s]

Training steps: 900 Loss: 0.14352315664291382


Training:  93%|█████████▎| 1001/1071 [06:51<00:30,  2.32it/s]

Training steps: 1000 Loss: 0.26258981227874756


Training: 100%|██████████| 1071/1071 [07:20<00:00,  2.43it/s]

TRAIN ACC : 0.9235127478753541, TRAIN LOSS : 0.22718843676142889



Training:   1%|          | 2/357 [00:00<00:51,  6.94it/s]

Validation steps: 0 Loss: 0.021124575287103653


Training:  29%|██▊       | 102/357 [00:13<00:34,  7.35it/s]

Validation steps: 100 Loss: 0.14464643597602844


Training:  57%|█████▋    | 202/357 [00:27<00:21,  7.31it/s]

Validation steps: 200 Loss: 0.3866425156593323


Training:  85%|████████▍ | 302/357 [00:41<00:07,  7.28it/s]

Validation steps: 300 Loss: 0.445014089345932


Training: 100%|██████████| 357/357 [00:48<00:00,  7.31it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.90      0.86      1206
          경제       0.87      0.83      0.85      1555
          사회       0.81      0.78      0.79      1840
        생활문화       0.91      0.89      0.90      1484
          세계       0.92      0.94      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.92      0.91      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.9013267  0.83022508 0.77826087 0.89083558 0.93602517 0.97980381
 0.91113744]
VALID ACC : 0.8895995794269692, VALID LOSS : 0.34068066423826693
{'epoch': 4, 'train_loss': 0.22718843676142889, 'train_acc': 0.9235127478753541, 'valid_acc': 0.8895995794269692, 'val_loss': 0.34068066423826693, 'learning_rate': 5e-06}
************************************************** auc_avg ****************************

In [10]:
torch.cuda.empty_cache()

## Inference

In [15]:
def inference_main():
    args = parse_args()
    args.model_name = "temp"
    preprocess = Preprocess(args)
    preprocess.load_test_data()
    test_data = preprocess.test_data

    print(f"size of test data : {len(test_data)}")
    torch.cuda.empty_cache()
    # del model
    inference(args, test_data)

inference_main()

size of test data : 9131
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_1.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'cla

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_1.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:38<00:00,  7.34it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/0730/output_1.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_2.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'cla

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_2.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:38<00:00,  7.33it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/0730/output_2.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_3.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'cla

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_3.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:39<00:00,  7.33it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/0730/output_3.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_4.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'cla

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_4.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:39<00:00,  7.33it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/0730/output_4.csv
writing prediction : /content/drive/MyDrive/KLUE_TC/output/0730/output_softvote.csv
