In [1]:
!nvidia-smi

Thu Jul 29 18:19:52 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    24W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Directory 설정, 구글 드라이브 import

In [2]:
cur_dir = '/content/drive/MyDrive/KLUE_TC'

## Utils

In [3]:
!pip install adamp
!pip install git+https://github.com/GY-Jeong/transformers

Collecting adamp
  Downloading adamp-0.3.0.tar.gz (5.1 kB)
Building wheels for collected packages: adamp
  Building wheel for adamp (setup.py) ... [?25l[?25hdone
  Created wheel for adamp: filename=adamp-0.3.0-py3-none-any.whl size=5998 sha256=20eeea891c2dae64229f14d1b0cda458fa665d85ab04dd6a1c71af21b25443b9
  Stored in directory: /root/.cache/pip/wheels/bb/95/21/ced2d2cb9944e3a72e58fece7958973eed3fd8d0aeb6e2e450
Successfully built adamp
Installing collected packages: adamp
Successfully installed adamp-0.3.0
Collecting git+https://github.com/GY-Jeong/transformers
  Cloning https://github.com/GY-Jeong/transformers to /tmp/pip-req-build-x1ri9wiz
  Running command git clone -q https://github.com/GY-Jeong/transformers /tmp/pip-req-build-x1ri9wiz
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
[K  

In [4]:
import os
import random
import torch
import numpy as np
from torch import nn

from torch.optim import Adam, AdamW, SGD
from adamp import AdamP
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR, ExponentialLR, CosineAnnealingWarmRestarts

from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
from transformers import get_linear_schedule_with_warmup


def set_seeds(seed=42):
    # 랜덤 시드를 설정하여 매 코드를 실행할 때마다 동일한 결과를 얻게 합니다.
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.benchmark = False


def save_checkpoint(state, model_dir, model_filename):
    print('saving model ...')
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    # torch.save(state, os.path.join(model_dir, model_filename))
    torch.save(state, os.path.join(model_filename))


def get_optimizer(model, args):
    if args.optimizer == 'adam':
        optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adamW':
        optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adamP':
        optimizer = AdamP(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'SGD':
        optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    # 모든 parameter들의 grad값을 0으로 초기화
    optimizer.zero_grad()

    return optimizer


def get_scheduler(optimizer, args):
    if args.scheduler == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer, patience=args.plateau_patience, factor=args.plateau_factor, mode='max',
                                      verbose=True)
    elif args.scheduler == 'linear_warmup':
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps,
                                                    num_training_steps=args.total_steps)
    elif args.scheduler == 'step_lr':
        scheduler = StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)
    elif args.scheduler == 'exp_lr':
        scheduler = ExponentialLR(optimizer, gamma=args.gamma)
    elif args.scheduler == 'cosine_annealing':
        scheduler = CosineAnnealingLR(optimizer, T_max=args.t_max, eta_min=args.eta_min)
    elif args.scheduler == 'cosine_annealing_warmstart':
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=args.T_0, T_mult=args.T_mult, eta_min=args.eta_min,
                                                last_epoch=-1)

    return scheduler


def update_params(loss, model, optimizer, batch_idx, max_len, args):
    if args.gradient_accumulation:
        # normalize loss to account for batch accumulation
        loss = loss / args.accum_iter 

        # backward pass
        loss.backward()

        # weights update
        if ((batch_idx + 1) % args.accum_iter == 0) or (batch_idx + 1 == max_len):
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
            optimizer.step()
            optimizer.zero_grad()
    else:
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
        optimizer.step()
        optimizer.zero_grad()


def load_tokenizer(args):
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name
        else args.model_name_or_path,
        use_fast=True,
    )

    return tokenizer


def load_model(args, model_name=None):
    if not model_name:
        model_name = args.model_name
    model_path = os.path.join(args.model_dir, model_name)
    print("Loading Model from:", model_path)
    # load_state = torch.load(model_path)
    load_state = torch.load(model_name)

    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        args.config_name
        if args.config_name
        else args.model_name_or_path,
    )

    config.num_labels = 7

    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    ).to(args.device)

    # model.classifier = nn.Sequential(
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 1024),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 512),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(512, 7),
    # )

    model.load_state_dict(load_state['state_dict'], strict=True)

    # model = model.to(args.device)

    print("Loading Model from:", model_path, "...Finished.")

    return model


def get_model(args):
    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        args.config_name
        if args.config_name
        else args.model_name_or_path,
    )

    config.num_labels = 7
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    ).to(args.device)

    # model.classifier = nn.Sequential(
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 1024),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(1024, 512),
    #     nn.Tanh(),
    #     nn.Dropout(p=0.3, inplace=False),
    #     nn.Linear(512, 7),
    # )

    # print(model)
    #model.classifier.dropout = nn.Dropout(p=0.3, inplace = False)

    model = model.to(args.device)

    return model


def get_loaders(args, train, valid, is_inference=False):
    pin_memory = True
    train_loader, valid_loader = None, None

    if is_inference:
        test_dataset = YNAT_dataset(args, valid, is_inference)
        test_loader = torch.utils.data.DataLoader(test_dataset, num_workers=args.num_workers, shuffle=False,
                                                  batch_size=args.batch_size, pin_memory=pin_memory)
        return test_loader

    if train is not None:
        train_dataset = YNAT_dataset(args, train, is_inference)
        train_loader = torch.utils.data.DataLoader(train_dataset, num_workers=args.num_workers, shuffle=True,
                                                   batch_size=args.batch_size, pin_memory=pin_memory)
    if valid is not None:
        valid_dataset = YNAT_dataset(args, valid, is_inference)
        valid_loader = torch.utils.data.DataLoader(valid_dataset, num_workers=args.num_workers, shuffle=False,
                                                   batch_size=args.batch_size, pin_memory=pin_memory)

    return train_loader, valid_loader


# loss계산하고 parameter update!
def compute_loss(preds, targets, args):
    """
    Args :
        preds   : (batch_size, max_seq_len)
        targets : (batch_size, max_seq_len)
    """
    # print(preds, targets)
    loss = get_criterion(preds, targets, args)
    # 마지막 시퀀스에 대한 값만 loss 계산
    # loss = loss[:, -1]
    # loss = torch.mean(loss)
    return loss


def get_criterion(pred, target, args):
    if args.criterion == 'BCE':
        loss = nn.BCELoss(reduction="none")
    elif args.criterion == "BCELogit":
        loss = nn.BCEWithLogitsLoss(reduction="none")
    elif args.criterion == "MSE":
        loss = nn.MSELoss(reduction="none")
    elif args.criterion == "L1":
        loss = nn.L1Loss(reduction="none")
    elif args.criterion == "CE":
        #weights = [1,1,2,1,1,1,1] #as class distribution
        #class_weights = torch.FloatTensor(weights).cuda()
        #loss = nn.CrossEntropyLoss(weight=class_weights)
        loss = nn.CrossEntropyLoss()
    # NLL, CrossEntropy not available
    return loss(pred, target)


def make_vocab(args):
    print("============ READ VOCABS ============")
    vocabs = []
    for i in range(7):
        vocab = set()
        f = open(args.vocab_dir + str(i) + '.txt', 'r')
        while True:
            line = f.readline()
            if not line: break
            vocab.add(line[:-1])
        f.close()
        vocabs.append(vocab)
        print(f"category {i} reading end, size : {len(vocab)}")
    return vocabs

## Dataloader

In [5]:
import os
import torch
import pandas as pd


class Preprocess:
    def __init__(self, args):
        self.args = args
        self.train_data = None
        self.test_data = None

    def load_data(self, file_name):
        csv_file_name = os.path.join(self.args.data_dir, file_name)
        df = pd.read_csv(csv_file_name)
        #del df['Unnamed: 0']
        return df.values

    def load_train_data(self):
        self.train_data = self.load_data('train_data.csv')

    def load_test_data(self):
        self.test_data = self.load_data('test_data.csv')


class YNAT_dataset(torch.utils.data.Dataset):
    def __init__(self, args, data, is_inference):
        self.args = args
        self.data = data
        self.is_inference = is_inference

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data[index]
        element = [row[i] for i in range(len(row))]
        #print(type(row))
        # np.array -> torch.tensor 형변환
        #for i, col in enumerate(row):
        #    if type(col) == str:
        #        pass
        #    else:
        #        row[i] = torch.tensor(col)

        return element



## Trainer

In [6]:
from sklearn.metrics import accuracy_score
from torch.nn.functional import one_hot
from tqdm import tqdm
from sklearn import metrics


def run(args, tokenizer, train_data, valid_data, cv_count):
    train_loader, valid_loader = get_loaders(args, train_data, valid_data)

    # only when using warmup scheduler
    # args.total_steps = int(len(train_loader.dataset) / args.batch_size) * args.n_epochs
    # args.warmup_steps = int(args.total_steps * args.warmup_ratio)

    model = get_model(args)
    optimizer = get_optimizer(model, args)
    scheduler = get_scheduler(optimizer, args)

    best_acc = -1
    early_stopping_counter = 0
    for epoch in range(args.n_epochs):

        print(f"Start Training: Epoch {epoch + 1}")

        if not args.cv_strategy:
            model_name = args.run_name
        else:
            model_name = f"{args.run_name.split('.pt')[0]}_{cv_count}.pt"

        # TRAIN
        train_acc, train_loss = train(args, model, tokenizer, train_loader, optimizer)

        # VALID
        acc, val_loss = validate(args, model, tokenizer, valid_loader)

        # TODO: model save or early stopping
        if args.scheduler == 'plateau':
            last_lr = optimizer.param_groups[0]['lr']
        else:
            last_lr = scheduler.get_last_lr()[0]

        print({"epoch": epoch, "train_loss": train_loss, "train_acc": train_acc,
                   "valid_acc": acc, "val_loss": val_loss, "learning_rate": last_lr})

        if acc > best_acc:
            best_acc = acc
            # torch.nn.DataParallel로 감싸진 경우 원래의 model을 가져옵니다.
            model_to_save = model.module if hasattr(model, 'module') else model
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model_to_save.state_dict(),
            },
                args.model_dir, model_name,
            )
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= args.patience:
                print(f'EarlyStopping counter: {early_stopping_counter} out of {args.patience}')
                break

        # scheduler
        if args.scheduler == 'plateau':
            scheduler.step(best_acc)
        else:
            scheduler.step()

    return best_acc


def inference(args, test_data):
    # ckpt_file_names = []
    all_fold_preds = []
    all_fold_argmax_preds = []

    if not args.cv_strategy:
        ckpt_file_names = [args.model_name]
    else:
        ckpt_file_names = [f"{args.model_name.split('.pt')[0]}_{i + 1}.pt" for i in range(args.fold_num)]

    tokenizer = load_tokenizer(args)

    for fold_idx, ckpt in enumerate(ckpt_file_names):
        model = load_model(args, ckpt)
        model.eval()
        test_loader = get_loaders(args, None, test_data, True)

        total_preds = []
        total_argmax_preds = []
        total_ids = []

        for step, batch in tqdm(enumerate(test_loader), desc='Inferencing', total=len(test_loader)):
            idx, text = batch
            tokenized_examples = tokenizer(
                text,
                max_length=args.max_seq_len,
                padding="max_length",
                return_tensors="pt"
            ).to(args.device)

            token_label_type_ids = []
            for row in tokenized_examples['input_ids']:
                temp = []
                row = tokenizer.convert_ids_to_tokens(row)
                for token in row:
                    for i, element in enumerate(args.vocab):
                        if token in element:
                            temp.append(i+1)
                            break
                    else:
                        temp.append(0)
                #print(temp)
                token_label_type_ids.append(temp)

            token_label_type_ids = torch.tensor(token_label_type_ids).to(args.device)
            tokenized_examples['token_label_type_ids'] = token_label_type_ids

            preds = model(**tokenized_examples)
            logits = preds['logits']
            #logits = logits[:,0,:]
            argmax_logits = torch.argmax(logits, dim=1)

            if args.device == 'cuda':
                argmax_preds = argmax_logits.to('cpu').detach().numpy()
                preds = logits.to('cpu').detach().numpy()
                token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()
            else:  # cpu
                argmax_preds = argmax_logits.detach().numpy()
                preds = logits.detach().numpy()
                token_label_type_ids = token_label_type_ids.detach().numpy()

            total_preds += list(preds)
            total_argmax_preds += list(argmax_preds)
            total_ids += list(idx)

        all_fold_preds.append(total_preds)
        all_fold_argmax_preds.append(total_argmax_preds)

        output_file_name = "output.csv" if not args.cv_strategy else f"output_{fold_idx + 1}.csv"
        write_path = os.path.join(args.output_dir, output_file_name)
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(write_path, 'w', encoding='utf8') as w:
            print("writing prediction : {}".format(write_path))
            w.write("index,topic_idx\n")
            for index, p in zip(total_ids, total_argmax_preds):
                w.write('{},{}\n'.format(index, p))

    if len(all_fold_preds) > 1:
        # Soft voting ensemble
        votes = np.sum(all_fold_preds, axis=0)
        votes = np.argmax(votes, axis=1)

        write_path = os.path.join(args.output_dir, "output_softvote.csv")
        #write_path = "output_softvote.csv"
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(write_path, 'w', encoding='utf8') as w:
            print("writing prediction : {}".format(write_path))
            w.write("index,topic_idx\n")
            for id, p in zip(total_ids, votes):
                w.write('{},{}\n'.format(id, p))


def train(args, model, tokenizer, train_loader, optimizer):
    model.train()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in tqdm(enumerate(train_loader), desc='Training', total=len(train_loader)):
        idx, text, label = batch
        label = label.to(args.device)
        # print(idx[:10])
        # print(text[:10])
        # print(label[:10])
        tokenized_examples = tokenizer(
            text,
            max_length=args.max_seq_len,
            padding="max_length",
            return_tensors="pt"
        ).to(args.device)
        
        # tokenize
        # 모델의 입력으로
        # label은 one-hot?
        # loss 주고
        # argmax를 golden

        token_label_type_ids = []
        for row in tokenized_examples['input_ids']:
            temp = []
            row = tokenizer.convert_ids_to_tokens(row)
            for token in row:
                for i, element in enumerate(args.vocab):
                    if token in element:
                        temp.append(i+1)
                        break
                else:
                    temp.append(0)
            #print(temp)
            token_label_type_ids.append(temp)

        token_label_type_ids = torch.tensor(token_label_type_ids).to(args.device)
        tokenized_examples['token_label_type_ids'] = token_label_type_ids

        preds = model(**tokenized_examples, labels = label)
        # print(preds)
        logits = preds['logits']
        # logits = logits[:,0,:]
        softmax_logits = nn.Softmax(dim=1)(logits)
        argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        # loss = compute_loss(logits,
        #                     label, args)
        loss = preds['loss']
        # print(loss)

        update_params(loss, model, optimizer, step, len(train_loader), args)

        if step % args.log_steps == 0:
            print(f"Training steps: {step} Loss: {str(loss.item())}")

        if args.device == 'cuda':
            argmax_logits = argmax_logits.to('cpu').detach().numpy()
            label = label.to('cpu').detach().numpy()
            loss = loss.to('cpu').detach().numpy()
            token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()
        else:  # cpu
            argmax_logits = argmax_logits.detach().numpy()
            label = label.detach().numpy()
            loss = loss.detach().numpy()
            token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()

        total_preds.append(argmax_logits)
        total_targets.append(label)
        losses.append(loss)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    acc = accuracy_score(total_targets, total_preds)
    loss_avg = sum(losses) / len(losses)
    print(f'TRAIN ACC : {acc}, TRAIN LOSS : {loss_avg}')
    return acc, loss_avg


def validate(args, model, tokenizer, valid_loader):
    model.eval()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in tqdm(enumerate(valid_loader), desc='Training', total=len(valid_loader)):
        idx, text, label = batch
        label = label.to(args.device)
        tokenized_examples = tokenizer(
            text,
            max_length=args.max_seq_len,
            padding="max_length",
            return_tensors="pt"
        ).to(args.device)

        # tokenize
        # 모델의 입력으로
        # label은 one-hot?
        # loss 주고
        # argmax를 golden
        token_label_type_ids = []
        for row in tokenized_examples['input_ids']:
            temp = []
            row = tokenizer.convert_ids_to_tokens(row)
            for token in row:
                for i, element in enumerate(args.vocab):
                    if token in element:
                        temp.append(i+1)
                        break
                else:
                    temp.append(0)
            #print(temp)
            token_label_type_ids.append(temp)

        token_label_type_ids = torch.tensor(token_label_type_ids).to(args.device)
        tokenized_examples['token_label_type_ids'] = token_label_type_ids

        preds = model(**tokenized_examples, labels = label)
        logits = preds['logits']
        # logits = logits[:,0,:]
        softmax_logits = nn.Softmax(dim=1)(logits)
        argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        # loss = compute_loss(logits,
        #                     label, args)
        loss = preds['loss']

        if step % args.log_steps == 0:
            print(f"Validation steps: {step} Loss: {str(loss.item())}")

        if args.device == 'cuda':
            argmax_logits = argmax_logits.to('cpu').detach().numpy()
            label = label.to('cpu').detach().numpy()
            loss = loss.to('cpu').detach().numpy()
            token_label_type_ids = token_label_type_ids.to('cpu').detach().numpy()
        else:  # cpu
            argmax_logits = argmax_logits.detach().numpy()
            label = label.detach().numpy()
            loss = loss.detach().numpy()
            token_label_type_ids = token_label_type_ids.detach().numpy()

        total_preds.append(argmax_logits)
        total_targets.append(label)
        losses.append(loss)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    target_names = ['IT과학', '경제', '사회', '생활문화', '세계', '스포츠', '정치']
    print(metrics.classification_report(total_targets, total_preds, target_names=target_names))
    matrix = metrics.confusion_matrix(total_targets, total_preds)
    print(matrix.diagonal()/matrix.sum(axis=1))

    acc = accuracy_score(total_targets, total_preds)
    loss_avg = sum(losses) / len(losses)
    print(f'VALID ACC : {acc}, VALID LOSS : {loss_avg}')
    return acc, loss_avg


## Train

In [7]:
import torch
from sklearn.model_selection import KFold, StratifiedKFold
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
from datetime import datetime
from pytz import timezone


def main(args):
    if not args.run_name:
        args.run_name = datetime.now(timezone("Asia/Seoul")).strftime("%Y-%m-%d-%H:%M:%S")

    set_seeds(args.seed)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    args.device = device

    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name
        else args.model_name_or_path,
        use_fast=True,
    )

    preprocess = Preprocess(args)
    preprocess.load_train_data()
    train_data_origin = preprocess.train_data

    print(f"Size of train data : {len(train_data_origin)}")
    # print(f"size of test data : {len(test_data)}")

    if args.cv_strategy == 'random':
        kf = KFold(n_splits=args.fold_num, shuffle=True)
        splits = kf.split(X=train_data_origin)
    else:
        # default
        # 여기 각 label로 바꿔야됨
        train_labels = [sequence[-1] for sequence in train_data_origin]
        skf = StratifiedKFold(n_splits=args.fold_num, shuffle=True)
        splits = skf.split(X=train_data_origin, y=train_labels)

    acc_avg = 0
    for fold_num, (train_index, valid_index) in enumerate(splits):
        train_data = train_data_origin[train_index]
        valid_data = train_data_origin[valid_index]
        best_acc = run(args, tokenizer, train_data, valid_data, fold_num + 1)

        if not args.cv_strategy:
            break

        acc_avg += best_acc

    if args.cv_strategy:
        acc_avg /= args.fold_num

        print("*" * 50, 'auc_avg', "*" * 50)
        print(acc_avg)


## Run

In [8]:
import argparse
import easydict

def parse_args():
    args = easydict.EasyDict({'run_name' : 'temp',
                             'seed':42,
                             'device' :'cuda',
                             'data_dir': cur_dir + '/data/open/',
                             'model_dir' : '/content/drive/MyDrive/KLUE_TC/models/vocab20',
                             'transformers_dir' : '/content/drive/MyDrive/KLUE_TC/transformers/',
                             'model_name_or_path' : 'klue/roberta-large',
                             'config_name' : None,
                             'tokenizer_name' : None,
                             'output_dir' : '/content/drive/MyDrive/KLUE_TC/output/vocab50',
                             'vocab_dir' : '/content/drive/MyDrive/KLUE_TC/data/vocab/new50/',
                             
                             'accum_iter' : 8,
                             'gradient_accumulation' : True,

                             'cv_strategy' : 'stratified',
                             'fold_num' : 4,

                             'num_workers' : 1,

                             # 훈련
                             'n_epochs' : 10,
                             'batch_size' : 32,
                             'lr' : 5e-6,
                             'clip_grad' : 15,
                             'patience' : 3,
                             'max_seq_len' : 40,

                             # Optimizer
                             'optimizer' : 'adamP',

                             # Optimizer-parameters
                             'weight_decay' : 0.05,
                             'momentum' : 0.9,

                             # Scheduler
                             'scheduler' : 'step_lr',

                             # Scheduler-parameters
                             # plateau
                             'plateau_patience' : 10,
                             'plateau_factor' : 0.5,
                              
                             't_max' : 10,
                             'T_0' : 10,
                             'T_mult' : 2,
                             '--eta_min' : 0.01,

                             # linear_warmup
                             'warmup_ratio' : 0.3,

                             # Step LR
                             'step_size' : 50,
                             'gamma' : 0.1,

                             'criterion' : 'CE',

                             'log_steps' : 100})
    
    return args

In [9]:
if __name__ == '__main__':
    args = parse_args()
    args['vocab'] = make_vocab(args)
    main(args)

category 0 reading end, size : 972
category 1 reading end, size : 972
category 2 reading end, size : 1388
category 3 reading end, size : 1875
category 4 reading end, size : 1530
category 5 reading end, size : 1604
category 6 reading end, size : 1897


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=337.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=547.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=248477.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=173.0, style=ProgressStyle(description_…


Size of train data : 45654


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1346854671.0, style=ProgressStyle(descr…




Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'rob

Start Training: Epoch 1


Training:   0%|          | 1/1070 [00:00<06:41,  2.66it/s]

Training steps: 0 Loss: 1.9473210573196411


Training:   9%|▉         | 101/1070 [00:26<04:03,  3.98it/s]

Training steps: 100 Loss: 1.900681972503662


Training:  19%|█▉        | 201/1070 [00:52<03:59,  3.63it/s]

Training steps: 200 Loss: 1.410595417022705


Training:  28%|██▊       | 301/1070 [01:19<03:17,  3.89it/s]

Training steps: 300 Loss: 0.9650253653526306


Training:  37%|███▋      | 401/1070 [01:45<03:04,  3.62it/s]

Training steps: 400 Loss: 0.5025293827056885


Training:  47%|████▋     | 501/1070 [02:11<02:23,  3.98it/s]

Training steps: 500 Loss: 0.7139464616775513


Training:  56%|█████▌    | 601/1070 [02:38<02:08,  3.65it/s]

Training steps: 600 Loss: 0.4915387034416199


Training:  66%|██████▌   | 701/1070 [03:03<01:32,  4.01it/s]

Training steps: 700 Loss: 0.36773645877838135


Training:  75%|███████▍  | 801/1070 [03:29<01:13,  3.66it/s]

Training steps: 800 Loss: 0.21659249067306519


Training:  84%|████████▍ | 901/1070 [03:55<00:42,  4.01it/s]

Training steps: 900 Loss: 0.2931899428367615


Training:  94%|█████████▎| 1001/1070 [04:21<00:19,  3.63it/s]

Training steps: 1000 Loss: 0.3515019118785858


Training: 100%|██████████| 1070/1070 [04:40<00:00,  3.82it/s]

TRAIN ACC : 0.7434871495327103, TRAIN LOSS : 0.7909292776381301



Training:   0%|          | 1/357 [00:00<00:46,  7.66it/s]

Validation steps: 0 Loss: 0.20183677971363068


Training:  29%|██▊       | 102/357 [00:11<00:28,  9.04it/s]

Validation steps: 100 Loss: 0.49893003702163696


Training:  57%|█████▋    | 202/357 [00:22<00:17,  8.95it/s]

Validation steps: 200 Loss: 0.5722138285636902


Training:  85%|████████▍ | 302/357 [00:33<00:05,  9.38it/s]

Validation steps: 300 Loss: 0.27608388662338257


Training: 100%|██████████| 357/357 [00:39<00:00,  9.10it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.91      0.85      1206
          경제       0.77      0.88      0.82      1555
          사회       0.88      0.65      0.75      1841
        생활문화       0.91      0.90      0.90      1483
          세계       0.92      0.94      0.93      1908
         스포츠       0.96      0.99      0.97      1734
          정치       0.92      0.91      0.91      1687

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.91044776 0.87717042 0.65127648 0.90222522 0.93763103 0.98961938
 0.90812092]
VALID ACC : 0.8792710706150342, VALID LOSS : 0.37385286256170075
{'epoch': 0, 'train_loss': 0.7909292776381301, 'train_acc': 0.7434871495327103, 'valid_acc': 0.8792710706150342, 'val_loss': 0.37385286256170075, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1070 [00:00<05:05,  3.50it/s]

Training steps: 0 Loss: 0.47448301315307617


Training:   9%|▉         | 101/1070 [00:27<04:14,  3.81it/s]

Training steps: 100 Loss: 0.3958607614040375


Training:  19%|█▉        | 201/1070 [00:54<04:10,  3.46it/s]

Training steps: 200 Loss: 0.31826069951057434


Training:  28%|██▊       | 301/1070 [01:22<03:22,  3.79it/s]

Training steps: 300 Loss: 0.5399267077445984


Training:  37%|███▋      | 401/1070 [01:49<03:12,  3.47it/s]

Training steps: 400 Loss: 0.13214421272277832


Training:  47%|████▋     | 501/1070 [02:16<02:28,  3.83it/s]

Training steps: 500 Loss: 0.09717386960983276


Training:  56%|█████▌    | 601/1070 [02:43<02:12,  3.55it/s]

Training steps: 600 Loss: 0.44362303614616394


Training:  66%|██████▌   | 701/1070 [03:10<01:37,  3.79it/s]

Training steps: 700 Loss: 0.42930299043655396


Training:  75%|███████▍  | 801/1070 [03:38<01:16,  3.53it/s]

Training steps: 800 Loss: 0.29946863651275635


Training:  84%|████████▍ | 901/1070 [04:04<00:44,  3.83it/s]

Training steps: 900 Loss: 0.3190148174762726


Training:  94%|█████████▎| 1001/1070 [04:32<00:19,  3.52it/s]

Training steps: 1000 Loss: 0.16290882229804993


Training: 100%|██████████| 1070/1070 [04:50<00:00,  3.68it/s]

TRAIN ACC : 0.8893983644859813, TRAIN LOSS : 0.341247796134971



Training:   0%|          | 1/357 [00:00<00:44,  7.99it/s]

Validation steps: 0 Loss: 0.18078528344631195


Training:  29%|██▊       | 102/357 [00:11<00:29,  8.67it/s]

Validation steps: 100 Loss: 0.5515313148498535


Training:  57%|█████▋    | 202/357 [00:23<00:17,  8.67it/s]

Validation steps: 200 Loss: 0.449971467256546


Training:  85%|████████▍ | 302/357 [00:34<00:06,  8.63it/s]

Validation steps: 300 Loss: 0.1717708259820938


Training: 100%|██████████| 357/357 [00:41<00:00,  8.66it/s]


              precision    recall  f1-score   support

        IT과학       0.77      0.94      0.85      1206
          경제       0.88      0.81      0.84      1555
          사회       0.87      0.71      0.78      1841
        생활문화       0.90      0.91      0.91      1483
          세계       0.93      0.94      0.94      1908
         스포츠       0.96      0.99      0.97      1734
          정치       0.88      0.95      0.91      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.94361526 0.81028939 0.70885388 0.908294   0.93710692 0.98788927
 0.94605809]
VALID ACC : 0.8889959698615735, VALID LOSS : 0.33274983385113444
{'epoch': 1, 'train_loss': 0.341247796134971, 'train_acc': 0.8893983644859813, 'valid_acc': 0.8889959698615735, 'val_loss': 0.33274983385113444, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1070 [00:00<05:00,  3.56it/s]

Training steps: 0 Loss: 0.27763694524765015


Training:   9%|▉         | 101/1070 [00:26<04:01,  4.01it/s]

Training steps: 100 Loss: 0.23610764741897583


Training:  19%|█▉        | 201/1070 [00:52<03:59,  3.63it/s]

Training steps: 200 Loss: 0.11722071468830109


Training:  28%|██▊       | 301/1070 [01:18<03:14,  3.95it/s]

Training steps: 300 Loss: 0.3376014530658722


Training:  37%|███▋      | 401/1070 [01:44<03:00,  3.71it/s]

Training steps: 400 Loss: 0.22715406119823456


Training:  47%|████▋     | 501/1070 [02:10<02:21,  4.03it/s]

Training steps: 500 Loss: 0.36941590905189514


Training:  56%|█████▌    | 601/1070 [02:36<02:08,  3.65it/s]

Training steps: 600 Loss: 0.11814755946397781


Training:  66%|██████▌   | 701/1070 [03:01<01:32,  3.99it/s]

Training steps: 700 Loss: 0.24099597334861755


Training:  75%|███████▍  | 801/1070 [03:27<01:13,  3.66it/s]

Training steps: 800 Loss: 0.25925344228744507


Training:  84%|████████▍ | 901/1070 [03:53<00:41,  4.04it/s]

Training steps: 900 Loss: 0.2812104821205139


Training:  94%|█████████▎| 1001/1070 [04:19<00:19,  3.60it/s]

Training steps: 1000 Loss: 0.3482958674430847


Training: 100%|██████████| 1070/1070 [04:38<00:00,  3.84it/s]

TRAIN ACC : 0.9025408878504673, TRAIN LOSS : 0.293943685745803



Training:   0%|          | 1/357 [00:00<00:44,  8.05it/s]

Validation steps: 0 Loss: 0.19199734926223755


Training:  29%|██▊       | 102/357 [00:11<00:28,  8.81it/s]

Validation steps: 100 Loss: 0.5299175381660461


Training:  57%|█████▋    | 202/357 [00:22<00:17,  8.64it/s]

Validation steps: 200 Loss: 0.446382999420166


Training:  85%|████████▍ | 302/357 [00:34<00:06,  8.82it/s]

Validation steps: 300 Loss: 0.12769190967082977


Training: 100%|██████████| 357/357 [00:40<00:00,  8.79it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.92      0.86      1206
          경제       0.85      0.84      0.85      1555
          사회       0.86      0.73      0.79      1841
        생활문화       0.88      0.94      0.91      1483
          세계       0.93      0.94      0.94      1908
         스포츠       0.97      0.98      0.98      1734
          정치       0.92      0.92      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.90      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.91625207 0.84244373 0.73221076 0.93661497 0.94444444 0.98269896
 0.91760522]
VALID ACC : 0.8941650604520764, VALID LOSS : 0.3189002214161371
{'epoch': 2, 'train_loss': 0.293943685745803, 'train_acc': 0.9025408878504673, 'valid_acc': 0.8941650604520764, 'val_loss': 0.3189002214161371, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1070 [00:00<05:06,  3.48it/s]

Training steps: 0 Loss: 0.25717368721961975


Training:   9%|▉         | 101/1070 [00:26<04:06,  3.93it/s]

Training steps: 100 Loss: 0.32110631465911865


Training:  19%|█▉        | 201/1070 [00:53<04:04,  3.56it/s]

Training steps: 200 Loss: 0.4689047932624817


Training:  28%|██▊       | 301/1070 [01:19<03:15,  3.93it/s]

Training steps: 300 Loss: 0.15749406814575195


Training:  37%|███▋      | 401/1070 [01:46<03:04,  3.62it/s]

Training steps: 400 Loss: 0.1487942934036255


Training:  47%|████▋     | 501/1070 [02:12<02:27,  3.86it/s]

Training steps: 500 Loss: 0.22376547753810883


Training:  56%|█████▌    | 601/1070 [02:39<02:09,  3.61it/s]

Training steps: 600 Loss: 0.16154687106609344


Training:  66%|██████▌   | 701/1070 [03:06<01:34,  3.90it/s]

Training steps: 700 Loss: 0.14175660908222198


Training:  75%|███████▍  | 801/1070 [03:32<01:14,  3.60it/s]

Training steps: 800 Loss: 0.14063964784145355


Training:  84%|████████▍ | 901/1070 [03:58<00:42,  3.93it/s]

Training steps: 900 Loss: 0.43351033329963684


Training:  94%|█████████▎| 1001/1070 [04:25<00:19,  3.61it/s]

Training steps: 1000 Loss: 0.11232393234968185


Training: 100%|██████████| 1070/1070 [04:43<00:00,  3.77it/s]

TRAIN ACC : 0.9110981308411215, TRAIN LOSS : 0.2605412893571726



Training:   0%|          | 1/357 [00:00<00:41,  8.50it/s]

Validation steps: 0 Loss: 0.18404337763786316


Training:  29%|██▊       | 102/357 [00:11<00:27,  9.25it/s]

Validation steps: 100 Loss: 0.5945241451263428


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.23it/s]

Validation steps: 200 Loss: 0.46872010827064514


Training:  85%|████████▍ | 302/357 [00:33<00:06,  8.89it/s]

Validation steps: 300 Loss: 0.10132414847612381


Training: 100%|██████████| 357/357 [00:39<00:00,  9.09it/s]

              precision    recall  f1-score   support

        IT과학       0.78      0.94      0.86      1206
          경제       0.87      0.81      0.84      1555
          사회       0.85      0.74      0.80      1841
        생활문화       0.90      0.93      0.92      1483
          세계       0.92      0.95      0.93      1908
         스포츠       0.97      0.98      0.98      1734
          정치       0.93      0.91      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.90      0.89     11414
weighted avg       0.90      0.89      0.89     11414

[0.94444444 0.81350482 0.74307442 0.92919757 0.95073375 0.97981546
 0.91286307]
VALID ACC : 0.893902225337305, VALID LOSS : 0.3259373632590978
{'epoch': 3, 'train_loss': 0.2605412893571726, 'train_acc': 0.9110981308411215, 'valid_acc': 0.893902225337305, 'val_loss': 0.3259373632590978, 'learning_rate': 5e-06}
Start Training: Epoch 5



Training:   0%|          | 1/1070 [00:00<04:33,  3.91it/s]

Training steps: 0 Loss: 0.23320411145687103


Training:   9%|▉         | 101/1070 [00:26<04:07,  3.92it/s]

Training steps: 100 Loss: 0.16269534826278687


Training:  19%|█▉        | 201/1070 [00:53<03:59,  3.63it/s]

Training steps: 200 Loss: 0.2017330825328827


Training:  28%|██▊       | 301/1070 [01:19<03:13,  3.97it/s]

Training steps: 300 Loss: 0.13169462978839874


Training:  37%|███▋      | 401/1070 [01:45<03:06,  3.59it/s]

Training steps: 400 Loss: 0.06698216497898102


Training:  47%|████▋     | 501/1070 [02:11<02:23,  3.96it/s]

Training steps: 500 Loss: 0.14069312810897827


Training:  56%|█████▌    | 601/1070 [02:38<02:10,  3.59it/s]

Training steps: 600 Loss: 0.29138654470443726


Training:  66%|██████▌   | 701/1070 [03:04<01:33,  3.97it/s]

Training steps: 700 Loss: 0.19666212797164917


Training:  75%|███████▍  | 801/1070 [03:30<01:13,  3.64it/s]

Training steps: 800 Loss: 0.27744507789611816


Training:  84%|████████▍ | 901/1070 [03:57<00:42,  3.93it/s]

Training steps: 900 Loss: 0.3838242292404175


Training:  94%|█████████▎| 1001/1070 [04:23<00:18,  3.65it/s]

Training steps: 1000 Loss: 0.119209423661232


Training: 100%|██████████| 1070/1070 [04:41<00:00,  3.80it/s]

TRAIN ACC : 0.9200934579439253, TRAIN LOSS : 0.23578381179614324



Training:   0%|          | 1/357 [00:00<00:42,  8.46it/s]

Validation steps: 0 Loss: 0.17912523448467255


Training:  29%|██▊       | 102/357 [00:11<00:28,  8.96it/s]

Validation steps: 100 Loss: 0.5009822845458984


Training:  57%|█████▋    | 202/357 [00:22<00:17,  9.05it/s]

Validation steps: 200 Loss: 0.40664082765579224


Training:  85%|████████▍ | 302/357 [00:33<00:06,  8.99it/s]

Validation steps: 300 Loss: 0.1569666862487793


Training: 100%|██████████| 357/357 [00:39<00:00,  9.07it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.88      0.86      1206
          경제       0.84      0.86      0.85      1555
          사회       0.83      0.76      0.80      1841
        생활문화       0.92      0.90      0.91      1483
          세계       0.93      0.94      0.94      1908
         스포츠       0.98      0.98      0.98      1734
          정치       0.91      0.93      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.87976783 0.85723473 0.76480174 0.9008766  0.94496855 0.97520185
 0.93123889]
VALID ACC : 0.893902225337305, VALID LOSS : 0.31984020114800676
{'epoch': 4, 'train_loss': 0.23578381179614324, 'train_acc': 0.9200934579439253, 'valid_acc': 0.893902225337305, 'val_loss': 0.31984020114800676, 'learning_rate': 5e-06}
Start Training: Epoch 6



Training:   0%|          | 1/1070 [00:00<04:43,  3.77it/s]

Training steps: 0 Loss: 0.1475912630558014


Training:   9%|▉         | 101/1070 [00:26<04:04,  3.96it/s]

Training steps: 100 Loss: 0.14878278970718384


Training:  19%|█▉        | 201/1070 [00:53<04:00,  3.62it/s]

Training steps: 200 Loss: 0.3050922155380249


Training:  28%|██▊       | 301/1070 [01:19<03:17,  3.89it/s]

Training steps: 300 Loss: 0.33854812383651733


Training:  37%|███▋      | 401/1070 [01:45<03:05,  3.61it/s]

Training steps: 400 Loss: 0.13630764186382294


Training:  47%|████▋     | 501/1070 [02:12<02:25,  3.91it/s]

Training steps: 500 Loss: 0.15271969139575958


Training:  56%|█████▌    | 601/1070 [02:38<02:09,  3.62it/s]

Training steps: 600 Loss: 0.23745077848434448


Training:  66%|██████▌   | 701/1070 [03:04<01:33,  3.95it/s]

Training steps: 700 Loss: 0.09168701618909836


Training:  75%|███████▍  | 801/1070 [03:31<01:14,  3.60it/s]

Training steps: 800 Loss: 0.08709916472434998


Training:  84%|████████▍ | 901/1070 [03:57<00:42,  3.95it/s]

Training steps: 900 Loss: 0.1720392107963562


Training:  94%|█████████▎| 1001/1070 [04:23<00:19,  3.61it/s]

Training steps: 1000 Loss: 0.14287222921848297


Training: 100%|██████████| 1070/1070 [04:42<00:00,  3.79it/s]

TRAIN ACC : 0.92821261682243, TRAIN LOSS : 0.21290134108825542



Training:   0%|          | 1/357 [00:00<00:42,  8.44it/s]

Validation steps: 0 Loss: 0.360939621925354


Training:  29%|██▊       | 102/357 [00:10<00:27,  9.32it/s]

Validation steps: 100 Loss: 0.6150349378585815


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.43it/s]

Validation steps: 200 Loss: 0.5044514536857605


Training:  85%|████████▍ | 302/357 [00:32<00:05,  9.47it/s]

Validation steps: 300 Loss: 0.1918431967496872


Training: 100%|██████████| 357/357 [00:38<00:00,  9.33it/s]


              precision    recall  f1-score   support

        IT과학       0.79      0.92      0.85      1206
          경제       0.86      0.83      0.84      1555
          사회       0.88      0.69      0.78      1841
        생활문화       0.86      0.94      0.90      1483
          세계       0.94      0.92      0.93      1908
         스포츠       0.96      0.99      0.97      1734
          정치       0.88      0.95      0.92      1687

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.92288557 0.82636656 0.69201521 0.94403237 0.92400419 0.98500577
 0.95139301]
VALID ACC : 0.8890835815664972, VALID LOSS : 0.3477158665589216
{'epoch': 5, 'train_loss': 0.21290134108825542, 'train_acc': 0.92821261682243, 'valid_acc': 0.8890835815664972, 'val_loss': 0.3477158665589216, 'learning_rate': 5e-06}
EarlyStopping counter: 3 out of 3


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'rob

Start Training: Epoch 1


Training:   0%|          | 1/1070 [00:00<04:52,  3.66it/s]

Training steps: 0 Loss: 1.995666742324829


Training:   9%|▉         | 101/1070 [00:26<04:01,  4.02it/s]

Training steps: 100 Loss: 1.8609216213226318


Training:  19%|█▉        | 201/1070 [00:52<03:59,  3.62it/s]

Training steps: 200 Loss: 1.516831636428833


Training:  28%|██▊       | 301/1070 [01:18<03:12,  3.99it/s]

Training steps: 300 Loss: 0.9320467710494995


Training:  37%|███▋      | 401/1070 [01:44<03:04,  3.63it/s]

Training steps: 400 Loss: 0.7765706777572632


Training:  47%|████▋     | 501/1070 [02:10<02:21,  4.01it/s]

Training steps: 500 Loss: 0.5670201778411865


Training:  56%|█████▌    | 601/1070 [02:36<02:06,  3.70it/s]

Training steps: 600 Loss: 0.737296462059021


Training:  66%|██████▌   | 701/1070 [03:02<01:32,  3.99it/s]

Training steps: 700 Loss: 0.4075613021850586


Training:  75%|███████▍  | 801/1070 [03:28<01:13,  3.66it/s]

Training steps: 800 Loss: 0.42286837100982666


Training:  84%|████████▍ | 901/1070 [03:53<00:42,  3.96it/s]

Training steps: 900 Loss: 0.3389451801776886


Training:  94%|█████████▎| 1001/1070 [04:20<00:18,  3.66it/s]

Training steps: 1000 Loss: 0.44442763924598694


Training: 100%|██████████| 1070/1070 [04:38<00:00,  3.85it/s]

TRAIN ACC : 0.741588785046729, TRAIN LOSS : 0.8029669985801817



Training:   0%|          | 1/357 [00:00<00:39,  8.91it/s]

Validation steps: 0 Loss: 0.34997236728668213


Training:  29%|██▊       | 102/357 [00:10<00:26,  9.62it/s]

Validation steps: 100 Loss: 0.7197659015655518


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.44it/s]

Validation steps: 200 Loss: 0.578222393989563


Training:  85%|████████▍ | 302/357 [00:31<00:05,  9.54it/s]

Validation steps: 300 Loss: 0.4496941864490509


Training: 100%|██████████| 357/357 [00:37<00:00,  9.50it/s]


              precision    recall  f1-score   support

        IT과학       0.75      0.92      0.83      1206
          경제       0.84      0.82      0.83      1556
          사회       0.82      0.69      0.75      1841
        생활문화       0.86      0.93      0.89      1483
          세계       0.90      0.91      0.91      1907
         스포츠       0.95      0.99      0.97      1733
          정치       0.93      0.84      0.89      1688

    accuracy                           0.87     11414
   macro avg       0.87      0.87      0.87     11414
weighted avg       0.87      0.87      0.87     11414

[0.92039801 0.82390746 0.69418794 0.92582603 0.9124279  0.98903635
 0.84478673]
VALID ACC : 0.8693709479586472, VALID LOSS : 0.39716787697846484
{'epoch': 0, 'train_loss': 0.8029669985801817, 'train_acc': 0.741588785046729, 'valid_acc': 0.8693709479586472, 'val_loss': 0.39716787697846484, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1070 [00:00<05:07,  3.48it/s]

Training steps: 0 Loss: 0.39523881673812866


Training:   9%|▉         | 101/1070 [00:27<04:18,  3.74it/s]

Training steps: 100 Loss: 0.29350098967552185


Training:  19%|█▉        | 201/1070 [00:55<04:08,  3.49it/s]

Training steps: 200 Loss: 0.2580457031726837


Training:  28%|██▊       | 301/1070 [01:22<03:25,  3.75it/s]

Training steps: 300 Loss: 0.4490847587585449


Training:  37%|███▋      | 401/1070 [01:50<03:10,  3.51it/s]

Training steps: 400 Loss: 0.30478227138519287


Training:  47%|████▋     | 501/1070 [02:17<02:30,  3.79it/s]

Training steps: 500 Loss: 0.31036385893821716


Training:  56%|█████▌    | 601/1070 [02:45<02:15,  3.45it/s]

Training steps: 600 Loss: 0.41448351740837097


Training:  66%|██████▌   | 701/1070 [03:12<01:36,  3.80it/s]

Training steps: 700 Loss: 0.08463513106107712


Training:  75%|███████▍  | 801/1070 [03:39<01:17,  3.47it/s]

Training steps: 800 Loss: 0.2531636357307434


Training:  84%|████████▍ | 901/1070 [04:07<00:44,  3.80it/s]

Training steps: 900 Loss: 0.1736891120672226


Training:  94%|█████████▎| 1001/1070 [04:34<00:20,  3.45it/s]

Training steps: 1000 Loss: 0.24354353547096252


Training: 100%|██████████| 1070/1070 [04:53<00:00,  3.65it/s]

TRAIN ACC : 0.8882009345794393, TRAIN LOSS : 0.3426237142775382



Training:   0%|          | 1/357 [00:00<00:39,  8.99it/s]

Validation steps: 0 Loss: 0.2923508286476135


Training:  29%|██▊       | 102/357 [00:10<00:26,  9.71it/s]

Validation steps: 100 Loss: 0.5808919668197632


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.48it/s]

Validation steps: 200 Loss: 0.3051626682281494


Training:  85%|████████▍ | 302/357 [00:31<00:05,  9.42it/s]

Validation steps: 300 Loss: 0.5186997652053833


Training: 100%|██████████| 357/357 [00:37<00:00,  9.52it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.90      0.85      1206
          경제       0.81      0.86      0.84      1556
          사회       0.86      0.71      0.78      1841
        생활문화       0.90      0.92      0.91      1483
          세계       0.93      0.91      0.92      1907
         스포츠       0.95      0.99      0.97      1733
          정치       0.90      0.92      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.89      0.88     11414
weighted avg       0.89      0.88      0.88     11414

[0.90049751 0.86118252 0.71211298 0.92312879 0.90613529 0.98730525
 0.92120853]
VALID ACC : 0.8848782197301559, VALID LOSS : 0.3450079997192745
{'epoch': 1, 'train_loss': 0.3426237142775382, 'train_acc': 0.8882009345794393, 'valid_acc': 0.8848782197301559, 'val_loss': 0.3450079997192745, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1070 [00:00<05:05,  3.50it/s]

Training steps: 0 Loss: 0.22300897538661957


Training:   9%|▉         | 101/1070 [00:27<04:15,  3.79it/s]

Training steps: 100 Loss: 0.25472646951675415


Training:  19%|█▉        | 201/1070 [00:55<04:11,  3.46it/s]

Training steps: 200 Loss: 0.3094918429851532


Training:  28%|██▊       | 301/1070 [01:22<03:23,  3.77it/s]

Training steps: 300 Loss: 0.32715362310409546


Training:  37%|███▋      | 401/1070 [01:49<03:12,  3.47it/s]

Training steps: 400 Loss: 0.21557269990444183


Training:  47%|████▋     | 501/1070 [02:16<02:29,  3.80it/s]

Training steps: 500 Loss: 0.42559337615966797


Training:  56%|█████▌    | 601/1070 [02:44<02:13,  3.51it/s]

Training steps: 600 Loss: 0.3629470467567444


Training:  66%|██████▌   | 701/1070 [03:11<01:38,  3.75it/s]

Training steps: 700 Loss: 0.45345598459243774


Training:  75%|███████▍  | 801/1070 [03:38<01:16,  3.51it/s]

Training steps: 800 Loss: 0.04381674528121948


Training:  84%|████████▍ | 901/1070 [04:06<00:44,  3.78it/s]

Training steps: 900 Loss: 0.17774470150470734


Training:  94%|█████████▎| 1001/1070 [04:33<00:19,  3.51it/s]

Training steps: 1000 Loss: 0.35451167821884155


Training: 100%|██████████| 1070/1070 [04:52<00:00,  3.66it/s]

TRAIN ACC : 0.8995619158878505, TRAIN LOSS : 0.2977887396047884



Training:   0%|          | 1/357 [00:00<00:45,  7.85it/s]

Validation steps: 0 Loss: 0.2544447183609009


Training:  29%|██▊       | 102/357 [00:12<00:30,  8.30it/s]

Validation steps: 100 Loss: 0.5748254656791687


Training:  57%|█████▋    | 202/357 [00:24<00:18,  8.30it/s]

Validation steps: 200 Loss: 0.3050384819507599


Training:  85%|████████▍ | 302/357 [00:35<00:06,  8.57it/s]

Validation steps: 300 Loss: 0.45886653661727905


Training: 100%|██████████| 357/357 [00:42<00:00,  8.41it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.90      0.86      1206
          경제       0.86      0.84      0.85      1556
          사회       0.86      0.72      0.78      1841
        생활문화       0.88      0.93      0.91      1483
          세계       0.90      0.94      0.92      1907
         스포츠       0.95      0.99      0.97      1733
          정치       0.91      0.91      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.88      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.90049751 0.84383033 0.71863118 0.93324343 0.93707394 0.98845932
 0.90580569]
VALID ACC : 0.8879446294024882, VALID LOSS : 0.34125945492222365
{'epoch': 2, 'train_loss': 0.2977887396047884, 'train_acc': 0.8995619158878505, 'valid_acc': 0.8879446294024882, 'val_loss': 0.34125945492222365, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1070 [00:00<05:07,  3.48it/s]

Training steps: 0 Loss: 0.3839973211288452


Training:   9%|▉         | 101/1070 [00:27<04:13,  3.82it/s]

Training steps: 100 Loss: 0.26509976387023926


Training:  19%|█▉        | 201/1070 [00:54<04:06,  3.52it/s]

Training steps: 200 Loss: 0.39019227027893066


Training:  28%|██▊       | 301/1070 [01:22<03:23,  3.78it/s]

Training steps: 300 Loss: 0.06252516061067581


Training:  37%|███▋      | 401/1070 [01:49<03:10,  3.51it/s]

Training steps: 400 Loss: 0.2197473645210266


Training:  47%|████▋     | 501/1070 [02:16<02:31,  3.77it/s]

Training steps: 500 Loss: 0.09258393943309784


Training:  56%|█████▌    | 601/1070 [02:44<02:13,  3.52it/s]

Training steps: 600 Loss: 0.18138998746871948


Training:  66%|██████▌   | 701/1070 [03:11<01:37,  3.79it/s]

Training steps: 700 Loss: 0.43983858823776245


Training:  75%|███████▍  | 801/1070 [03:38<01:16,  3.53it/s]

Training steps: 800 Loss: 0.2932489514350891


Training:  84%|████████▍ | 901/1070 [04:05<00:44,  3.82it/s]

Training steps: 900 Loss: 0.1840178221464157


Training:  94%|█████████▎| 1001/1070 [04:32<00:19,  3.49it/s]

Training steps: 1000 Loss: 0.3416679799556732


Training: 100%|██████████| 1070/1070 [04:51<00:00,  3.67it/s]

TRAIN ACC : 0.9103095794392523, TRAIN LOSS : 0.26122877725234656



Training:   0%|          | 1/357 [00:00<00:47,  7.48it/s]

Validation steps: 0 Loss: 0.26105403900146484


Training:  29%|██▊       | 102/357 [00:12<00:30,  8.33it/s]

Validation steps: 100 Loss: 0.571510910987854


Training:  57%|█████▋    | 202/357 [00:23<00:18,  8.47it/s]

Validation steps: 200 Loss: 0.2399224489927292


Training:  85%|████████▍ | 302/357 [00:35<00:06,  8.64it/s]

Validation steps: 300 Loss: 0.46134042739868164


Training: 100%|██████████| 357/357 [00:42<00:00,  8.49it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.89      0.86      1206
          경제       0.87      0.82      0.84      1556
          사회       0.81      0.77      0.79      1841
        생활문화       0.91      0.92      0.91      1483
          세계       0.92      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.91      0.91      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.88888889 0.82069409 0.77403585 0.91908294 0.92973256 0.98268898
 0.91232227]
VALID ACC : 0.8895216400911162, VALID LOSS : 0.3268872746566431
{'epoch': 3, 'train_loss': 0.26122877725234656, 'train_acc': 0.9103095794392523, 'valid_acc': 0.8895216400911162, 'val_loss': 0.3268872746566431, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1070 [00:00<05:06,  3.49it/s]

Training steps: 0 Loss: 0.20013853907585144


Training:   9%|▉         | 101/1070 [00:27<04:16,  3.78it/s]

Training steps: 100 Loss: 0.23081769049167633


Training:  19%|█▉        | 201/1070 [00:54<04:05,  3.54it/s]

Training steps: 200 Loss: 0.07677746564149857


Training:  28%|██▊       | 301/1070 [01:21<03:19,  3.85it/s]

Training steps: 300 Loss: 0.3880153000354767


Training:  37%|███▋      | 401/1070 [01:48<03:11,  3.49it/s]

Training steps: 400 Loss: 0.26795658469200134


Training:  47%|████▋     | 501/1070 [02:15<02:28,  3.84it/s]

Training steps: 500 Loss: 0.3022311329841614


Training:  56%|█████▌    | 601/1070 [02:43<02:14,  3.47it/s]

Training steps: 600 Loss: 0.21874606609344482


Training:  66%|██████▌   | 701/1070 [03:10<01:35,  3.85it/s]

Training steps: 700 Loss: 0.48655596375465393


Training:  75%|███████▍  | 801/1070 [03:37<01:16,  3.50it/s]

Training steps: 800 Loss: 0.40102896094322205


Training:  84%|████████▍ | 901/1070 [04:04<00:43,  3.84it/s]

Training steps: 900 Loss: 0.38018593192100525


Training:  94%|█████████▎| 1001/1070 [04:31<00:19,  3.56it/s]

Training steps: 1000 Loss: 0.43848294019699097


Training: 100%|██████████| 1070/1070 [04:50<00:00,  3.69it/s]

TRAIN ACC : 0.920268691588785, TRAIN LOSS : 0.2357242724588402



Training:   0%|          | 1/357 [00:00<00:46,  7.73it/s]

Validation steps: 0 Loss: 0.2688038945198059


Training:  29%|██▊       | 102/357 [00:12<00:29,  8.51it/s]

Validation steps: 100 Loss: 0.5649189352989197


Training:  57%|█████▋    | 202/357 [00:23<00:17,  8.79it/s]

Validation steps: 200 Loss: 0.21057026088237762


Training:  85%|████████▍ | 302/357 [00:35<00:06,  8.52it/s]

Validation steps: 300 Loss: 0.4975096881389618


Training: 100%|██████████| 357/357 [00:41<00:00,  8.52it/s]

              precision    recall  f1-score   support

        IT과학       0.84      0.87      0.85      1206
          경제       0.84      0.86      0.85      1556
          사회       0.82      0.75      0.79      1841
        생활문화       0.91      0.92      0.91      1483
          세계       0.94      0.92      0.93      1907
         스포츠       0.94      0.99      0.97      1733
          정치       0.91      0.91      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.8747927  0.85604113 0.75339489 0.91773432 0.91662297 0.99249856
 0.91291469]
VALID ACC : 0.8887331347468022, VALID LOSS : 0.33437672239683924
{'epoch': 4, 'train_loss': 0.2357242724588402, 'train_acc': 0.920268691588785, 'valid_acc': 0.8887331347468022, 'val_loss': 0.33437672239683924, 'learning_rate': 5e-06}
Start Training: Epoch 6



Training:   0%|          | 1/1070 [00:00<04:24,  4.04it/s]

Training steps: 0 Loss: 0.07821807265281677


Training:   9%|▉         | 101/1070 [00:26<04:01,  4.01it/s]

Training steps: 100 Loss: 0.12870796024799347


Training:  19%|█▉        | 201/1070 [00:51<03:57,  3.65it/s]

Training steps: 200 Loss: 0.2336970865726471


Training:  28%|██▊       | 301/1070 [01:17<03:11,  4.01it/s]

Training steps: 300 Loss: 0.17257925868034363


Training:  37%|███▋      | 401/1070 [01:43<03:01,  3.68it/s]

Training steps: 400 Loss: 0.31757134199142456


Training:  47%|████▋     | 501/1070 [02:09<02:23,  3.98it/s]

Training steps: 500 Loss: 0.1319105625152588


Training:  56%|█████▌    | 601/1070 [02:35<02:07,  3.68it/s]

Training steps: 600 Loss: 0.09844201803207397


Training:  66%|██████▌   | 701/1070 [03:01<01:32,  4.00it/s]

Training steps: 700 Loss: 0.1906103640794754


Training:  75%|███████▍  | 801/1070 [03:27<01:12,  3.69it/s]

Training steps: 800 Loss: 0.3411015272140503


Training:  84%|████████▍ | 901/1070 [03:53<00:41,  4.04it/s]

Training steps: 900 Loss: 0.27223262190818787


Training:  94%|█████████▎| 1001/1070 [04:19<00:18,  3.66it/s]

Training steps: 1000 Loss: 0.05931410938501358


Training: 100%|██████████| 1070/1070 [04:37<00:00,  3.86it/s]

TRAIN ACC : 0.9286799065420561, TRAIN LOSS : 0.21256912543708198



Training:   0%|          | 1/357 [00:00<00:45,  7.83it/s]

Validation steps: 0 Loss: 0.2495255470275879


Training:  29%|██▊       | 102/357 [00:12<00:30,  8.37it/s]

Validation steps: 100 Loss: 0.5842236876487732


Training:  57%|█████▋    | 202/357 [00:23<00:18,  8.44it/s]

Validation steps: 200 Loss: 0.21228793263435364


Training:  85%|████████▍ | 302/357 [00:35<00:06,  8.73it/s]

Validation steps: 300 Loss: 0.47876212000846863


Training: 100%|██████████| 357/357 [00:41<00:00,  8.54it/s]

              precision    recall  f1-score   support

        IT과학       0.86      0.80      0.83      1206
          경제       0.81      0.87      0.84      1556
          사회       0.83      0.75      0.79      1841
        생활문화       0.90      0.93      0.91      1483
          세계       0.92      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.91      0.91      0.91      1688

    accuracy                           0.88     11414
   macro avg       0.88      0.88      0.88     11414
weighted avg       0.88      0.88      0.88     11414

[0.79767828 0.87082262 0.7501358  0.92919757 0.93392764 0.98153491
 0.91409953]
VALID ACC : 0.8849658314350797, VALID LOSS : 0.3487992270560074
{'epoch': 5, 'train_loss': 0.21256912543708198, 'train_acc': 0.9286799065420561, 'valid_acc': 0.8849658314350797, 'val_loss': 0.3487992270560074, 'learning_rate': 5e-06}
Start Training: Epoch 7



Training:   0%|          | 1/1070 [00:00<04:42,  3.79it/s]

Training steps: 0 Loss: 0.17315013706684113


Training:   9%|▉         | 101/1070 [00:27<04:12,  3.83it/s]

Training steps: 100 Loss: 0.13584107160568237


Training:  19%|█▉        | 201/1070 [00:54<04:04,  3.56it/s]

Training steps: 200 Loss: 0.13291071355342865


Training:  28%|██▊       | 301/1070 [01:21<03:19,  3.86it/s]

Training steps: 300 Loss: 0.2208179086446762


Training:  37%|███▋      | 401/1070 [01:48<03:10,  3.51it/s]

Training steps: 400 Loss: 0.14392049610614777


Training:  47%|████▋     | 501/1070 [02:15<02:27,  3.86it/s]

Training steps: 500 Loss: 0.17343643307685852


Training:  56%|█████▌    | 601/1070 [02:42<02:14,  3.50it/s]

Training steps: 600 Loss: 0.14124780893325806


Training:  66%|██████▌   | 701/1070 [03:09<01:35,  3.85it/s]

Training steps: 700 Loss: 0.06527316570281982


Training:  75%|███████▍  | 801/1070 [03:36<01:15,  3.57it/s]

Training steps: 800 Loss: 0.07727815210819244


Training:  84%|████████▍ | 901/1070 [04:03<00:44,  3.79it/s]

Training steps: 900 Loss: 0.30106961727142334


Training:  94%|█████████▎| 1001/1070 [04:30<00:19,  3.55it/s]

Training steps: 1000 Loss: 0.3479098081588745


Training: 100%|██████████| 1070/1070 [04:49<00:00,  3.70it/s]

TRAIN ACC : 0.9356308411214953, TRAIN LOSS : 0.19220554011691118



Training:   0%|          | 1/357 [00:00<00:43,  8.11it/s]

Validation steps: 0 Loss: 0.254646360874176


Training:  29%|██▊       | 102/357 [00:11<00:29,  8.58it/s]

Validation steps: 100 Loss: 0.5746532678604126


Training:  57%|█████▋    | 202/357 [00:23<00:18,  8.59it/s]

Validation steps: 200 Loss: 0.2230914682149887


Training:  85%|████████▍ | 302/357 [00:35<00:06,  8.63it/s]

Validation steps: 300 Loss: 0.48914751410484314


Training: 100%|██████████| 357/357 [00:41<00:00,  8.58it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85      1206
          경제       0.88      0.81      0.84      1556
          사회       0.81      0.77      0.79      1841
        생활문화       0.91      0.92      0.91      1483
          세계       0.93      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.90      0.92      0.91      1688

    accuracy                           0.89     11414
   macro avg       0.89      0.89      0.89     11414
weighted avg       0.89      0.89      0.89     11414

[0.89220564 0.80719794 0.77403585 0.91908294 0.92606188 0.97864974
 0.92239336]
VALID ACC : 0.8882950762221833, VALID LOSS : 0.34659200112390165
{'epoch': 6, 'train_loss': 0.19220554011691118, 'train_acc': 0.9356308411214953, 'valid_acc': 0.8882950762221833, 'val_loss': 0.34659200112390165, 'learning_rate': 5e-06}
EarlyStopping counter: 3 out of 3


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'rob

Start Training: Epoch 1


Training:   0%|          | 1/1071 [00:00<04:45,  3.74it/s]

Training steps: 0 Loss: 2.011697292327881


Training:   9%|▉         | 101/1071 [00:26<04:02,  4.00it/s]

Training steps: 100 Loss: 1.86729097366333


Training:  19%|█▉        | 201/1071 [00:51<03:54,  3.70it/s]

Training steps: 200 Loss: 1.667733073234558


Training:  28%|██▊       | 301/1071 [01:17<03:12,  4.00it/s]

Training steps: 300 Loss: 1.0697885751724243


Training:  37%|███▋      | 401/1071 [01:43<03:01,  3.69it/s]

Training steps: 400 Loss: 0.5147751569747925


Training:  47%|████▋     | 501/1071 [02:09<02:23,  3.97it/s]

Training steps: 500 Loss: 0.3598838150501251


Training:  56%|█████▌    | 601/1071 [02:36<02:12,  3.56it/s]

Training steps: 600 Loss: 0.2786266505718231


Training:  65%|██████▌   | 701/1071 [03:02<01:35,  3.88it/s]

Training steps: 700 Loss: 0.6092551946640015


Training:  75%|███████▍  | 801/1071 [03:29<01:16,  3.52it/s]

Training steps: 800 Loss: 0.14196470379829407


Training:  84%|████████▍ | 901/1071 [03:56<00:43,  3.87it/s]

Training steps: 900 Loss: 0.3024614453315735


Training:  93%|█████████▎| 1001/1071 [04:23<00:19,  3.52it/s]

Training steps: 1000 Loss: 0.24816471338272095


Training: 100%|██████████| 1071/1071 [04:42<00:00,  3.79it/s]

TRAIN ACC : 0.7306445489325663, TRAIN LOSS : 0.8196467636666156



Training:   0%|          | 1/357 [00:00<00:43,  8.19it/s]

Validation steps: 0 Loss: 0.4394378364086151


Training:  29%|██▊       | 102/357 [00:11<00:29,  8.67it/s]

Validation steps: 100 Loss: 0.32838672399520874


Training:  57%|█████▋    | 202/357 [00:23<00:18,  8.58it/s]

Validation steps: 200 Loss: 0.22700577974319458


Training:  85%|████████▍ | 302/357 [00:34<00:06,  8.80it/s]

Validation steps: 300 Loss: 0.8544970750808716


Training: 100%|██████████| 357/357 [00:41<00:00,  8.67it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85      1206
          경제       0.85      0.86      0.86      1556
          사회       0.84      0.72      0.78      1840
        생활문화       0.89      0.90      0.90      1483
          세계       0.93      0.93      0.93      1907
         스포츠       0.95      0.98      0.97      1733
          정치       0.91      0.92      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.88      0.89      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.89137645 0.86246787 0.72445652 0.90289953 0.93235448 0.98384305
 0.92061611]
VALID ACC : 0.8872338561289758, VALID LOSS : 0.34241701034875976
{'epoch': 0, 'train_loss': 0.8196467636666156, 'train_acc': 0.7306445489325663, 'valid_acc': 0.8872338561289758, 'val_loss': 0.34241701034875976, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1071 [00:00<05:00,  3.56it/s]

Training steps: 0 Loss: 0.38881027698516846


Training:   9%|▉         | 101/1071 [00:26<04:11,  3.86it/s]

Training steps: 100 Loss: 0.26414620876312256


Training:  19%|█▉        | 201/1071 [00:53<04:02,  3.59it/s]

Training steps: 200 Loss: 0.18084309995174408


Training:  28%|██▊       | 301/1071 [01:20<03:17,  3.91it/s]

Training steps: 300 Loss: 0.340743750333786


Training:  37%|███▋      | 401/1071 [01:46<03:09,  3.54it/s]

Training steps: 400 Loss: 0.3121517598628998


Training:  47%|████▋     | 501/1071 [02:13<02:26,  3.89it/s]

Training steps: 500 Loss: 0.2928623855113983


Training:  56%|█████▌    | 601/1071 [02:40<02:13,  3.53it/s]

Training steps: 600 Loss: 0.20472727715969086


Training:  65%|██████▌   | 701/1071 [03:06<01:35,  3.89it/s]

Training steps: 700 Loss: 0.2882542908191681


Training:  75%|███████▍  | 801/1071 [03:33<01:15,  3.59it/s]

Training steps: 800 Loss: 0.14634399116039276


Training:  84%|████████▍ | 901/1071 [04:00<00:44,  3.85it/s]

Training steps: 900 Loss: 0.4649355709552765


Training:  93%|█████████▎| 1001/1071 [04:26<00:19,  3.56it/s]

Training steps: 1000 Loss: 0.33446964621543884


Training: 100%|██████████| 1071/1071 [04:45<00:00,  3.75it/s]

TRAIN ACC : 0.8877661283256915, TRAIN LOSS : 0.3368049801147285



Training:   0%|          | 1/357 [00:00<00:43,  8.25it/s]

Validation steps: 0 Loss: 0.39960891008377075


Training:  29%|██▊       | 102/357 [00:10<00:26,  9.58it/s]

Validation steps: 100 Loss: 0.27555304765701294


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.50it/s]

Validation steps: 200 Loss: 0.19534491002559662


Training:  85%|████████▍ | 302/357 [00:31<00:05,  9.75it/s]

Validation steps: 300 Loss: 0.8539961576461792


Training: 100%|██████████| 357/357 [00:37<00:00,  9.52it/s]


              precision    recall  f1-score   support

        IT과학       0.78      0.93      0.85      1206
          경제       0.85      0.88      0.86      1556
          사회       0.87      0.71      0.79      1840
        생활문화       0.91      0.90      0.91      1483
          세계       0.95      0.91      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.89      0.95      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.93283582 0.8785347  0.71467391 0.90289953 0.90928159 0.97980381
 0.94668246]
VALID ACC : 0.8916148251993341, VALID LOSS : 0.32714289643329564
{'epoch': 1, 'train_loss': 0.3368049801147285, 'train_acc': 0.8877661283256915, 'valid_acc': 0.8916148251993341, 'val_loss': 0.32714289643329564, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1071 [00:00<05:12,  3.42it/s]

Training steps: 0 Loss: 0.20079943537712097


Training:   9%|▉         | 101/1071 [00:26<04:01,  4.02it/s]

Training steps: 100 Loss: 0.264199823141098


Training:  19%|█▉        | 201/1071 [00:51<03:55,  3.70it/s]

Training steps: 200 Loss: 0.20617496967315674


Training:  28%|██▊       | 301/1071 [01:17<03:13,  3.99it/s]

Training steps: 300 Loss: 0.3948046863079071


Training:  37%|███▋      | 401/1071 [01:43<03:00,  3.71it/s]

Training steps: 400 Loss: 0.1816052794456482


Training:  47%|████▋     | 501/1071 [02:09<02:22,  4.00it/s]

Training steps: 500 Loss: 0.43873924016952515


Training:  56%|█████▌    | 601/1071 [02:35<02:07,  3.70it/s]

Training steps: 600 Loss: 0.3981655240058899


Training:  65%|██████▌   | 701/1071 [03:01<01:31,  4.04it/s]

Training steps: 700 Loss: 0.2714623510837555


Training:  75%|███████▍  | 801/1071 [03:27<01:13,  3.65it/s]

Training steps: 800 Loss: 0.2696036398410797


Training:  84%|████████▍ | 901/1071 [03:53<00:42,  4.01it/s]

Training steps: 900 Loss: 0.11617325246334076


Training:  93%|█████████▎| 1001/1071 [04:18<00:19,  3.68it/s]

Training steps: 1000 Loss: 0.4885365068912506


Training: 100%|██████████| 1071/1071 [04:37<00:00,  3.87it/s]

TRAIN ACC : 0.9016967962384276, TRAIN LOSS : 0.29161693844822495



Training:   0%|          | 1/357 [00:00<00:40,  8.79it/s]

Validation steps: 0 Loss: 0.272814005613327


Training:  29%|██▊       | 102/357 [00:10<00:26,  9.77it/s]

Validation steps: 100 Loss: 0.1684400588274002


Training:  57%|█████▋    | 202/357 [00:20<00:16,  9.67it/s]

Validation steps: 200 Loss: 0.11708660423755646


Training:  85%|████████▍ | 302/357 [00:31<00:05,  9.69it/s]

Validation steps: 300 Loss: 0.8125895261764526


Training: 100%|██████████| 357/357 [00:37<00:00,  9.63it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.88      0.86      1206
          경제       0.88      0.86      0.87      1556
          사회       0.83      0.77      0.80      1840
        생활문화       0.87      0.93      0.90      1483
          세계       0.93      0.93      0.93      1907
         스포츠       0.97      0.97      0.97      1733
          정치       0.92      0.91      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.88308458 0.85732648 0.76847826 0.93324343 0.9318301  0.97230237
 0.90936019]
VALID ACC : 0.8931919740646631, VALID LOSS : 0.3153270325292142
{'epoch': 2, 'train_loss': 0.29161693844822495, 'train_acc': 0.9016967962384276, 'valid_acc': 0.8931919740646631, 'val_loss': 0.3153270325292142, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1071 [00:00<04:49,  3.69it/s]

Training steps: 0 Loss: 0.23872295022010803


Training:   9%|▉         | 101/1071 [00:25<04:00,  4.04it/s]

Training steps: 100 Loss: 0.2081095576286316


Training:  19%|█▉        | 201/1071 [00:51<03:58,  3.65it/s]

Training steps: 200 Loss: 0.19956590235233307


Training:  28%|██▊       | 301/1071 [01:17<03:11,  4.01it/s]

Training steps: 300 Loss: 0.32256096601486206


Training:  37%|███▋      | 401/1071 [01:43<03:01,  3.69it/s]

Training steps: 400 Loss: 0.31400179862976074


Training:  47%|████▋     | 501/1071 [02:09<02:23,  3.97it/s]

Training steps: 500 Loss: 0.34991613030433655


Training:  56%|█████▌    | 601/1071 [02:35<02:07,  3.67it/s]

Training steps: 600 Loss: 0.20671376585960388


Training:  65%|██████▌   | 701/1071 [03:01<01:33,  3.96it/s]

Training steps: 700 Loss: 0.15964755415916443


Training:  75%|███████▍  | 801/1071 [03:27<01:13,  3.68it/s]

Training steps: 800 Loss: 0.42951077222824097


Training:  84%|████████▍ | 901/1071 [03:53<00:42,  4.03it/s]

Training steps: 900 Loss: 0.031138285994529724


Training:  93%|█████████▎| 1001/1071 [04:19<00:19,  3.66it/s]

Training steps: 1000 Loss: 0.2787253260612488


Training: 100%|██████████| 1071/1071 [04:37<00:00,  3.86it/s]

TRAIN ACC : 0.911918460325341, TRAIN LOSS : 0.25777963976907076



Training:   0%|          | 1/357 [00:00<00:40,  8.79it/s]

Validation steps: 0 Loss: 0.2929902970790863


Training:  29%|██▊       | 102/357 [00:10<00:26,  9.58it/s]

Validation steps: 100 Loss: 0.1997509002685547


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.67it/s]

Validation steps: 200 Loss: 0.11968564242124557


Training:  85%|████████▍ | 302/357 [00:31<00:05,  9.68it/s]

Validation steps: 300 Loss: 0.852005124092102


Training: 100%|██████████| 357/357 [00:37<00:00,  9.62it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.89      0.86      1206
          경제       0.90      0.84      0.87      1556
          사회       0.82      0.79      0.81      1840
        생활문화       0.91      0.90      0.90      1483
          세계       0.94      0.93      0.94      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.90      0.95      0.92      1688

    accuracy                           0.90     11413
   macro avg       0.89      0.90      0.90     11413
weighted avg       0.90      0.90      0.90     11413

[0.89303483 0.84254499 0.79184783 0.89683075 0.93078133 0.97691864
 0.94727488]
VALID ACC : 0.8973977043722071, VALID LOSS : 0.309980202461032
{'epoch': 3, 'train_loss': 0.25777963976907076, 'train_acc': 0.911918460325341, 'valid_acc': 0.8973977043722071, 'val_loss': 0.309980202461032, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1071 [00:00<04:50,  3.69it/s]

Training steps: 0 Loss: 0.14769025146961212


Training:   9%|▉         | 101/1071 [00:26<04:02,  3.99it/s]

Training steps: 100 Loss: 0.32245275378227234


Training:  19%|█▉        | 201/1071 [00:52<03:56,  3.68it/s]

Training steps: 200 Loss: 0.2724378705024719


Training:  28%|██▊       | 301/1071 [01:17<03:10,  4.04it/s]

Training steps: 300 Loss: 0.41679584980010986


Training:  37%|███▋      | 401/1071 [01:43<03:03,  3.66it/s]

Training steps: 400 Loss: 0.15574781596660614


Training:  47%|████▋     | 501/1071 [02:09<02:23,  3.98it/s]

Training steps: 500 Loss: 0.2938837707042694


Training:  56%|█████▌    | 601/1071 [02:35<02:08,  3.65it/s]

Training steps: 600 Loss: 0.09974413365125656


Training:  65%|██████▌   | 701/1071 [03:01<01:32,  4.01it/s]

Training steps: 700 Loss: 0.318057656288147


Training:  75%|███████▍  | 801/1071 [03:27<01:13,  3.68it/s]

Training steps: 800 Loss: 0.19388903677463531


Training:  84%|████████▍ | 901/1071 [03:53<00:42,  3.99it/s]

Training steps: 900 Loss: 0.08511732518672943


Training:  93%|█████████▎| 1001/1071 [04:19<00:19,  3.65it/s]

Training steps: 1000 Loss: 0.1592005342245102


Training: 100%|██████████| 1071/1071 [04:37<00:00,  3.85it/s]

TRAIN ACC : 0.9207382961946204, TRAIN LOSS : 0.23088826343980656



Training:   0%|          | 1/357 [00:00<00:40,  8.74it/s]

Validation steps: 0 Loss: 0.29302242398262024


Training:  29%|██▊       | 102/357 [00:10<00:26,  9.51it/s]

Validation steps: 100 Loss: 0.23566125333309174


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.51it/s]

Validation steps: 200 Loss: 0.10341760516166687


Training:  85%|████████▍ | 302/357 [00:31<00:05,  9.64it/s]

Validation steps: 300 Loss: 0.8696642518043518


Training: 100%|██████████| 357/357 [00:37<00:00,  9.50it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85      1206
          경제       0.87      0.86      0.86      1556
          사회       0.84      0.78      0.81      1840
        생활문화       0.91      0.90      0.91      1483
          세계       0.93      0.94      0.93      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.91      0.94      0.92      1688

    accuracy                           0.90     11413
   macro avg       0.89      0.90      0.89     11413
weighted avg       0.90      0.90      0.90     11413

[0.88971808 0.85539846 0.78097826 0.90020229 0.93759832 0.9763416
 0.93720379]
VALID ACC : 0.8970472268465784, VALID LOSS : 0.31336467074496405
{'epoch': 4, 'train_loss': 0.23088826343980656, 'train_acc': 0.9207382961946204, 'valid_acc': 0.8970472268465784, 'val_loss': 0.31336467074496405, 'learning_rate': 5e-06}
Start Training: Epoch 6



Training:   0%|          | 1/1071 [00:00<04:40,  3.81it/s]

Training steps: 0 Loss: 0.1989719122648239


Training:   9%|▉         | 101/1071 [00:26<04:01,  4.01it/s]

Training steps: 100 Loss: 0.04147595167160034


Training:  19%|█▉        | 201/1071 [00:52<03:57,  3.66it/s]

Training steps: 200 Loss: 0.08660531789064407


Training:  28%|██▊       | 301/1071 [01:18<03:13,  3.98it/s]

Training steps: 300 Loss: 0.09026269614696503


Training:  37%|███▋      | 401/1071 [01:44<03:01,  3.68it/s]

Training steps: 400 Loss: 0.1830822229385376


Training:  47%|████▋     | 501/1071 [02:10<02:22,  4.01it/s]

Training steps: 500 Loss: 0.31393176317214966


Training:  56%|█████▌    | 601/1071 [02:36<02:08,  3.66it/s]

Training steps: 600 Loss: 0.1488390415906906


Training:  65%|██████▌   | 701/1071 [03:02<01:32,  4.01it/s]

Training steps: 700 Loss: 0.19219085574150085


Training:  75%|███████▍  | 801/1071 [03:28<01:14,  3.61it/s]

Training steps: 800 Loss: 0.10926555842161179


Training:  84%|████████▍ | 901/1071 [03:54<00:42,  3.98it/s]

Training steps: 900 Loss: 0.08337664604187012


Training:  93%|█████████▎| 1001/1071 [04:20<00:19,  3.68it/s]

Training steps: 1000 Loss: 0.06361791491508484


Training: 100%|██████████| 1071/1071 [04:38<00:00,  3.85it/s]

TRAIN ACC : 0.9292368797640256, TRAIN LOSS : 0.20637796753860949



Training:   0%|          | 1/357 [00:00<00:43,  8.22it/s]

Validation steps: 0 Loss: 0.3853490948677063


Training:  29%|██▊       | 102/357 [00:11<00:28,  9.07it/s]

Validation steps: 100 Loss: 0.20078718662261963


Training:  57%|█████▋    | 202/357 [00:22<00:17,  8.95it/s]

Validation steps: 200 Loss: 0.0868975818157196


Training:  85%|████████▍ | 302/357 [00:33<00:06,  8.95it/s]

Validation steps: 300 Loss: 0.8593953251838684


Training: 100%|██████████| 357/357 [00:39<00:00,  9.00it/s]

              precision    recall  f1-score   support

        IT과학       0.80      0.91      0.85      1206
          경제       0.87      0.86      0.86      1556
          사회       0.85      0.76      0.80      1840
        생활문화       0.90      0.90      0.90      1483
          세계       0.94      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.91      0.94      0.92      1688

    accuracy                           0.90     11413
   macro avg       0.89      0.90      0.89     11413
weighted avg       0.90      0.90      0.89     11413

[0.90547264 0.85796915 0.76032609 0.90357384 0.92763503 0.98211194
 0.93720379]
VALID ACC : 0.8953824585998423, VALID LOSS : 0.3239997281074649
{'epoch': 5, 'train_loss': 0.20637796753860949, 'train_acc': 0.9292368797640256, 'valid_acc': 0.8953824585998423, 'val_loss': 0.3239997281074649, 'learning_rate': 5e-06}
Start Training: Epoch 7



Training:   0%|          | 1/1071 [00:00<04:34,  3.89it/s]

Training steps: 0 Loss: 0.10597226023674011


Training:   9%|▉         | 101/1071 [00:26<04:06,  3.94it/s]

Training steps: 100 Loss: 0.34959253668785095


Training:  19%|█▉        | 201/1071 [00:53<04:02,  3.58it/s]

Training steps: 200 Loss: 0.4616169333457947


Training:  28%|██▊       | 301/1071 [01:19<03:15,  3.94it/s]

Training steps: 300 Loss: 0.29821041226387024


Training:  37%|███▋      | 401/1071 [01:45<03:04,  3.63it/s]

Training steps: 400 Loss: 0.1092957928776741


Training:  47%|████▋     | 501/1071 [02:12<02:25,  3.91it/s]

Training steps: 500 Loss: 0.19568048417568207


Training:  56%|█████▌    | 601/1071 [02:38<02:09,  3.63it/s]

Training steps: 600 Loss: 0.15043991804122925


Training:  65%|██████▌   | 701/1071 [03:05<01:35,  3.88it/s]

Training steps: 700 Loss: 0.14038242399692535


Training:  75%|███████▍  | 801/1071 [03:31<01:14,  3.62it/s]

Training steps: 800 Loss: 0.21593181788921356


Training:  84%|████████▍ | 901/1071 [03:58<00:43,  3.93it/s]

Training steps: 900 Loss: 0.12268584221601486


Training:  93%|█████████▎| 1001/1071 [04:24<00:19,  3.59it/s]

Training steps: 1000 Loss: 0.28645384311676025


Training: 100%|██████████| 1071/1071 [04:43<00:00,  3.78it/s]

TRAIN ACC : 0.9367717064338075, TRAIN LOSS : 0.18300905829664219



Training:   0%|          | 1/357 [00:00<00:41,  8.56it/s]

Validation steps: 0 Loss: 0.24714875221252441


Training:  29%|██▊       | 102/357 [00:11<00:27,  9.16it/s]

Validation steps: 100 Loss: 0.1858624666929245


Training:  57%|█████▋    | 202/357 [00:22<00:16,  9.25it/s]

Validation steps: 200 Loss: 0.057170569896698


Training:  85%|████████▍ | 302/357 [00:33<00:06,  8.95it/s]

Validation steps: 300 Loss: 0.8044257164001465


Training: 100%|██████████| 357/357 [00:39<00:00,  9.10it/s]


              precision    recall  f1-score   support

        IT과학       0.84      0.85      0.84      1206
          경제       0.86      0.86      0.86      1556
          사회       0.85      0.75      0.80      1840
        생활문화       0.86      0.93      0.90      1483
          세계       0.94      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.90      0.94      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.85157546 0.8592545  0.75271739 0.93189481 0.92815941 0.9763416
 0.94075829]
VALID ACC : 0.8920529221063699, VALID LOSS : 0.33677611201062424
{'epoch': 6, 'train_loss': 0.18300905829664219, 'train_acc': 0.9367717064338075, 'valid_acc': 0.8920529221063699, 'val_loss': 0.33677611201062424, 'learning_rate': 5e-06}
EarlyStopping counter: 3 out of 3


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'rob

Start Training: Epoch 1


Training:   0%|          | 1/1071 [00:00<04:50,  3.68it/s]

Training steps: 0 Loss: 2.025378942489624


Training:   9%|▉         | 101/1071 [00:26<04:08,  3.90it/s]

Training steps: 100 Loss: 1.9193154573440552


Training:  19%|█▉        | 201/1071 [00:53<03:58,  3.65it/s]

Training steps: 200 Loss: 1.491222620010376


Training:  28%|██▊       | 301/1071 [01:19<03:15,  3.95it/s]

Training steps: 300 Loss: 0.9973284006118774


Training:  37%|███▋      | 401/1071 [01:45<03:04,  3.63it/s]

Training steps: 400 Loss: 0.8198277354240417


Training:  47%|████▋     | 501/1071 [02:11<02:25,  3.92it/s]

Training steps: 500 Loss: 0.5063483119010925


Training:  56%|█████▌    | 601/1071 [02:38<02:10,  3.61it/s]

Training steps: 600 Loss: 0.4718568027019501


Training:  65%|██████▌   | 701/1071 [03:04<01:33,  3.95it/s]

Training steps: 700 Loss: 0.3906151354312897


Training:  75%|███████▍  | 801/1071 [03:30<01:14,  3.63it/s]

Training steps: 800 Loss: 0.2590884268283844


Training:  84%|████████▍ | 901/1071 [03:57<00:43,  3.92it/s]

Training steps: 900 Loss: 0.41168075799942017


Training:  93%|█████████▎| 1001/1071 [04:23<00:19,  3.62it/s]

Training steps: 1000 Loss: 0.49864816665649414


Training: 100%|██████████| 1071/1071 [04:41<00:00,  3.80it/s]

TRAIN ACC : 0.7397564323471861, TRAIN LOSS : 0.8024374575855908



Training:   0%|          | 1/357 [00:00<00:42,  8.45it/s]

Validation steps: 0 Loss: 0.06449031084775925


Training:  29%|██▊       | 102/357 [00:10<00:26,  9.50it/s]

Validation steps: 100 Loss: 0.15431812405586243


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.52it/s]

Validation steps: 200 Loss: 0.4515983462333679


Training:  85%|████████▍ | 302/357 [00:32<00:05,  9.39it/s]

Validation steps: 300 Loss: 0.4867757260799408


Training: 100%|██████████| 357/357 [00:37<00:00,  9.41it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.87      0.84      1206
          경제       0.88      0.80      0.84      1555
          사회       0.81      0.75      0.77      1840
        생활문화       0.86      0.92      0.89      1484
          세계       0.90      0.95      0.92      1907
         스포츠       0.97      0.96      0.97      1733
          정치       0.91      0.92      0.92      1688

    accuracy                           0.88     11413
   macro avg       0.88      0.88      0.88     11413
weighted avg       0.88      0.88      0.88     11413

[0.86650083 0.79935691 0.74673913 0.92318059 0.94651285 0.96133872
 0.9200237 ]
VALID ACC : 0.881100499430474, VALID LOSS : 0.3698283586160642
{'epoch': 0, 'train_loss': 0.8024374575855908, 'train_acc': 0.7397564323471861, 'valid_acc': 0.881100499430474, 'val_loss': 0.3698283586160642, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1071 [00:00<05:05,  3.50it/s]

Training steps: 0 Loss: 0.7656599283218384


Training:   9%|▉         | 101/1071 [00:26<04:03,  3.98it/s]

Training steps: 100 Loss: 0.452780157327652


Training:  19%|█▉        | 201/1071 [00:52<03:58,  3.65it/s]

Training steps: 200 Loss: 0.7009800672531128


Training:  28%|██▊       | 301/1071 [01:19<03:14,  3.96it/s]

Training steps: 300 Loss: 0.3499804437160492


Training:  37%|███▋      | 401/1071 [01:45<03:03,  3.66it/s]

Training steps: 400 Loss: 0.45262411236763


Training:  47%|████▋     | 501/1071 [02:11<02:24,  3.94it/s]

Training steps: 500 Loss: 0.35173144936561584


Training:  56%|█████▌    | 601/1071 [02:37<02:09,  3.64it/s]

Training steps: 600 Loss: 0.45261484384536743


Training:  65%|██████▌   | 701/1071 [03:03<01:33,  3.96it/s]

Training steps: 700 Loss: 0.24917776882648468


Training:  75%|███████▍  | 801/1071 [03:30<01:14,  3.61it/s]

Training steps: 800 Loss: 0.21840696036815643


Training:  84%|████████▍ | 901/1071 [03:56<00:43,  3.93it/s]

Training steps: 900 Loss: 0.2171553671360016


Training:  93%|█████████▎| 1001/1071 [04:23<00:19,  3.60it/s]

Training steps: 1000 Loss: 0.21689262986183167


Training: 100%|██████████| 1071/1071 [04:41<00:00,  3.81it/s]

TRAIN ACC : 0.8882918139073042, TRAIN LOSS : 0.34197937472124285



Training:   0%|          | 1/357 [00:00<00:42,  8.46it/s]

Validation steps: 0 Loss: 0.05059010162949562


Training:  29%|██▊       | 102/357 [00:11<00:27,  9.27it/s]

Validation steps: 100 Loss: 0.19919824600219727


Training:  57%|█████▋    | 202/357 [00:22<00:17,  9.04it/s]

Validation steps: 200 Loss: 0.310247540473938


Training:  85%|████████▍ | 302/357 [00:32<00:05,  9.30it/s]

Validation steps: 300 Loss: 0.3719213902950287


Training: 100%|██████████| 357/357 [00:38<00:00,  9.18it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85      1206
          경제       0.83      0.86      0.84      1555
          사회       0.84      0.73      0.78      1840
        생활문화       0.90      0.90      0.90      1484
          세계       0.94      0.93      0.93      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.89      0.95      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.88      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.88640133 0.85787781 0.72717391 0.9009434  0.92553749 0.97691864
 0.94609005]
VALID ACC : 0.8878471917988259, VALID LOSS : 0.33419121316626293
{'epoch': 1, 'train_loss': 0.34197937472124285, 'train_acc': 0.8882918139073042, 'valid_acc': 0.8878471917988259, 'val_loss': 0.33419121316626293, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1071 [00:00<05:07,  3.48it/s]

Training steps: 0 Loss: 0.31693336367607117


Training:   9%|▉         | 101/1071 [00:26<04:06,  3.93it/s]

Training steps: 100 Loss: 0.1362704038619995


Training:  19%|█▉        | 201/1071 [00:53<04:00,  3.61it/s]

Training steps: 200 Loss: 0.14066681265830994


Training:  28%|██▊       | 301/1071 [01:19<03:14,  3.95it/s]

Training steps: 300 Loss: 0.4211514890193939


Training:  37%|███▋      | 401/1071 [01:45<03:05,  3.62it/s]

Training steps: 400 Loss: 0.18880979716777802


Training:  47%|████▋     | 501/1071 [02:11<02:23,  3.97it/s]

Training steps: 500 Loss: 0.31071752309799194


Training:  56%|█████▌    | 601/1071 [02:38<02:08,  3.66it/s]

Training steps: 600 Loss: 0.2407471388578415


Training:  65%|██████▌   | 701/1071 [03:04<01:33,  3.96it/s]

Training steps: 700 Loss: 0.3446093201637268


Training:  75%|███████▍  | 801/1071 [03:30<01:13,  3.66it/s]

Training steps: 800 Loss: 0.22504377365112305


Training:  84%|████████▍ | 901/1071 [03:56<00:43,  3.94it/s]

Training steps: 900 Loss: 0.36210137605667114


Training:  93%|█████████▎| 1001/1071 [04:22<00:19,  3.66it/s]

Training steps: 1000 Loss: 0.12724074721336365


Training: 100%|██████████| 1071/1071 [04:40<00:00,  3.81it/s]

TRAIN ACC : 0.9016383867293596, TRAIN LOSS : 0.2936445356778849



Training:   0%|          | 1/357 [00:00<00:41,  8.58it/s]

Validation steps: 0 Loss: 0.018291473388671875


Training:  29%|██▊       | 102/357 [00:10<00:27,  9.23it/s]

Validation steps: 100 Loss: 0.18381302058696747


Training:  57%|█████▋    | 202/357 [00:21<00:17,  9.04it/s]

Validation steps: 200 Loss: 0.42263686656951904


Training:  85%|████████▍ | 302/357 [00:32<00:05,  9.22it/s]

Validation steps: 300 Loss: 0.49149203300476074


Training: 100%|██████████| 357/357 [00:38<00:00,  9.23it/s]

              precision    recall  f1-score   support

        IT과학       0.76      0.95      0.84      1206
          경제       0.92      0.77      0.83      1555
          사회       0.84      0.73      0.78      1840
        생활문화       0.90      0.90      0.90      1484
          세계       0.90      0.95      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.91      0.93      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.88      0.89      0.88     11413
weighted avg       0.89      0.89      0.88     11413

[0.95190713 0.76720257 0.72608696 0.90229111 0.95070792 0.98268898
 0.9306872 ]
VALID ACC : 0.8852186103566109, VALID LOSS : 0.3435448925023904
{'epoch': 2, 'train_loss': 0.2936445356778849, 'train_acc': 0.9016383867293596, 'valid_acc': 0.8852186103566109, 'val_loss': 0.3435448925023904, 'learning_rate': 5e-06}
Start Training: Epoch 4



Training:   0%|          | 1/1071 [00:00<04:35,  3.88it/s]

Training steps: 0 Loss: 0.2342461496591568


Training:   9%|▉         | 101/1071 [00:26<04:03,  3.98it/s]

Training steps: 100 Loss: 0.40047094225883484


Training:  19%|█▉        | 201/1071 [00:52<03:59,  3.63it/s]

Training steps: 200 Loss: 0.3262191712856293


Training:  28%|██▊       | 301/1071 [01:18<03:14,  3.95it/s]

Training steps: 300 Loss: 0.18826644122600555


Training:  37%|███▋      | 401/1071 [01:45<03:03,  3.65it/s]

Training steps: 400 Loss: 0.11617396026849747


Training:  47%|████▋     | 501/1071 [02:11<02:23,  3.98it/s]

Training steps: 500 Loss: 0.3786877691745758


Training:  56%|█████▌    | 601/1071 [02:37<02:10,  3.59it/s]

Training steps: 600 Loss: 0.3044520616531372


Training:  65%|██████▌   | 701/1071 [03:03<01:33,  3.96it/s]

Training steps: 700 Loss: 0.4839288592338562


Training:  75%|███████▍  | 801/1071 [03:29<01:15,  3.60it/s]

Training steps: 800 Loss: 0.2928539514541626


Training:  84%|████████▍ | 901/1071 [03:56<00:42,  3.96it/s]

Training steps: 900 Loss: 0.24013692140579224


Training:  93%|█████████▎| 1001/1071 [04:22<00:19,  3.66it/s]

Training steps: 1000 Loss: 0.18386630713939667


Training: 100%|██████████| 1071/1071 [04:40<00:00,  3.82it/s]

TRAIN ACC : 0.9116556175345346, TRAIN LOSS : 0.26267410819283943



Training:   0%|          | 1/357 [00:00<00:42,  8.37it/s]

Validation steps: 0 Loss: 0.02192993275821209


Training:  29%|██▊       | 102/357 [00:11<00:28,  9.10it/s]

Validation steps: 100 Loss: 0.17874906957149506


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.44it/s]

Validation steps: 200 Loss: 0.41030099987983704


Training:  85%|████████▍ | 302/357 [00:32<00:05,  9.50it/s]

Validation steps: 300 Loss: 0.3463136553764343


Training: 100%|██████████| 357/357 [00:38<00:00,  9.36it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.92      0.86      1206
          경제       0.87      0.82      0.85      1555
          사회       0.84      0.76      0.80      1840
        생활문화       0.90      0.90      0.90      1484
          세계       0.92      0.94      0.93      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.92      0.93      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.92205638 0.8244373  0.75543478 0.90498652 0.94336654 0.97576457
 0.92535545]
VALID ACC : 0.8918776833435555, VALID LOSS : 0.32521629958030057
{'epoch': 3, 'train_loss': 0.26267410819283943, 'train_acc': 0.9116556175345346, 'valid_acc': 0.8918776833435555, 'val_loss': 0.32521629958030057, 'learning_rate': 5e-06}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1071 [00:00<05:12,  3.42it/s]

Training steps: 0 Loss: 0.07042724639177322


Training:   9%|▉         | 101/1071 [00:26<04:05,  3.95it/s]

Training steps: 100 Loss: 0.36542925238609314


Training:  19%|█▉        | 201/1071 [00:52<04:00,  3.62it/s]

Training steps: 200 Loss: 0.21754741668701172


Training:  28%|██▊       | 301/1071 [01:18<03:12,  4.00it/s]

Training steps: 300 Loss: 0.19995367527008057


Training:  37%|███▋      | 401/1071 [01:44<03:02,  3.66it/s]

Training steps: 400 Loss: 0.16895711421966553


Training:  47%|████▋     | 501/1071 [02:10<02:23,  3.96it/s]

Training steps: 500 Loss: 0.24794648587703705


Training:  56%|█████▌    | 601/1071 [02:37<02:07,  3.67it/s]

Training steps: 600 Loss: 0.18233586847782135


Training:  65%|██████▌   | 701/1071 [03:03<01:33,  3.95it/s]

Training steps: 700 Loss: 0.3138408064842224


Training:  75%|███████▍  | 801/1071 [03:29<01:13,  3.67it/s]

Training steps: 800 Loss: 0.3402853012084961


Training:  84%|████████▍ | 901/1071 [03:55<00:42,  4.00it/s]

Training steps: 900 Loss: 0.23787088692188263


Training:  93%|█████████▎| 1001/1071 [04:21<00:19,  3.63it/s]

Training steps: 1000 Loss: 0.06535185873508453


Training: 100%|██████████| 1071/1071 [04:39<00:00,  3.83it/s]

TRAIN ACC : 0.918839987149908, TRAIN LOSS : 0.23817503455605438



Training:   0%|          | 1/357 [00:00<00:42,  8.41it/s]

Validation steps: 0 Loss: 0.03271256014704704


Training:  29%|██▊       | 102/357 [00:10<00:26,  9.48it/s]

Validation steps: 100 Loss: 0.17623934149742126


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.34it/s]

Validation steps: 200 Loss: 0.43842411041259766


Training:  85%|████████▍ | 302/357 [00:32<00:05,  9.27it/s]

Validation steps: 300 Loss: 0.5203626155853271


Training: 100%|██████████| 357/357 [00:38<00:00,  9.33it/s]

              precision    recall  f1-score   support

        IT과학       0.78      0.94      0.85      1206
          경제       0.89      0.79      0.84      1555
          사회       0.85      0.73      0.79      1840
        생활문화       0.89      0.92      0.90      1484
          세계       0.90      0.95      0.92      1907
         스포츠       0.97      0.97      0.97      1733
          정치       0.92      0.93      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.89      0.89     11413
weighted avg       0.89      0.89      0.89     11413

[0.94444444 0.79421222 0.73315217 0.91509434 0.94965915 0.96999423
 0.92654028]
VALID ACC : 0.8881976693244545, VALID LOSS : 0.34292031455972866
{'epoch': 4, 'train_loss': 0.23817503455605438, 'train_acc': 0.918839987149908, 'valid_acc': 0.8881976693244545, 'val_loss': 0.34292031455972866, 'learning_rate': 5e-06}
Start Training: Epoch 6



Training:   0%|          | 1/1071 [00:00<04:34,  3.90it/s]

Training steps: 0 Loss: 0.10002041608095169


Training:   9%|▉         | 101/1071 [00:26<04:06,  3.94it/s]

Training steps: 100 Loss: 0.1736917495727539


Training:  19%|█▉        | 201/1071 [00:52<03:58,  3.65it/s]

Training steps: 200 Loss: 0.13739527761936188


Training:  28%|██▊       | 301/1071 [01:18<03:13,  3.98it/s]

Training steps: 300 Loss: 0.2265651524066925


Training:  37%|███▋      | 401/1071 [01:44<03:05,  3.61it/s]

Training steps: 400 Loss: 0.4125451147556305


Training:  47%|████▋     | 501/1071 [02:11<02:22,  3.99it/s]

Training steps: 500 Loss: 0.14777129888534546


Training:  56%|█████▌    | 601/1071 [02:37<02:10,  3.60it/s]

Training steps: 600 Loss: 0.07137890160083771


Training:  65%|██████▌   | 701/1071 [03:03<01:33,  3.96it/s]

Training steps: 700 Loss: 0.12298361957073212


Training:  75%|███████▍  | 801/1071 [03:29<01:13,  3.66it/s]

Training steps: 800 Loss: 0.4013804495334625


Training:  84%|████████▍ | 901/1071 [03:55<00:42,  3.96it/s]

Training steps: 900 Loss: 0.17768095433712006


Training:  93%|█████████▎| 1001/1071 [04:21<00:19,  3.65it/s]

Training steps: 1000 Loss: 0.20581647753715515


Training: 100%|██████████| 1071/1071 [04:40<00:00,  3.82it/s]

TRAIN ACC : 0.9279518705645279, TRAIN LOSS : 0.21307345527901647



Training:   0%|          | 1/357 [00:00<00:42,  8.34it/s]

Validation steps: 0 Loss: 0.01734730787575245


Training:  29%|██▊       | 102/357 [00:10<00:27,  9.16it/s]

Validation steps: 100 Loss: 0.18288657069206238


Training:  57%|█████▋    | 202/357 [00:21<00:16,  9.27it/s]

Validation steps: 200 Loss: 0.38393527269363403


Training:  85%|████████▍ | 302/357 [00:32<00:05,  9.38it/s]

Validation steps: 300 Loss: 0.40464794635772705


Training: 100%|██████████| 357/357 [00:38<00:00,  9.25it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.87      0.85      1206
          경제       0.81      0.87      0.84      1555
          사회       0.85      0.72      0.78      1840
        생활문화       0.92      0.89      0.91      1484
          세계       0.90      0.95      0.92      1907
         스포츠       0.97      0.98      0.97      1733
          정치       0.92      0.92      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.88      0.89      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.86981758 0.87459807 0.71956522 0.89487871 0.94861038 0.97749567
 0.92061611]
VALID ACC : 0.8865329010777184, VALID LOSS : 0.3507954023115817
{'epoch': 5, 'train_loss': 0.21307345527901647, 'train_acc': 0.9279518705645279, 'valid_acc': 0.8865329010777184, 'val_loss': 0.3507954023115817, 'learning_rate': 5e-06}
Start Training: Epoch 7



Training:   0%|          | 1/1071 [00:00<04:31,  3.93it/s]

Training steps: 0 Loss: 0.029858697205781937


Training:   9%|▉         | 101/1071 [00:26<04:06,  3.94it/s]

Training steps: 100 Loss: 0.12058883905410767


Training:  19%|█▉        | 201/1071 [00:52<04:01,  3.61it/s]

Training steps: 200 Loss: 0.4237842559814453


Training:  28%|██▊       | 301/1071 [01:18<03:14,  3.95it/s]

Training steps: 300 Loss: 0.13857698440551758


Training:  37%|███▋      | 401/1071 [01:45<03:04,  3.63it/s]

Training steps: 400 Loss: 0.2731251120567322


Training:  47%|████▋     | 501/1071 [02:11<02:23,  3.97it/s]

Training steps: 500 Loss: 0.07605326920747757


Training:  56%|█████▌    | 601/1071 [02:37<02:10,  3.59it/s]

Training steps: 600 Loss: 0.25701674818992615


Training:  65%|██████▌   | 701/1071 [03:04<01:34,  3.94it/s]

Training steps: 700 Loss: 0.28287389874458313


Training:  75%|███████▍  | 801/1071 [03:30<01:14,  3.60it/s]

Training steps: 800 Loss: 0.11168372631072998


Training:  84%|████████▍ | 901/1071 [03:56<00:42,  3.96it/s]

Training steps: 900 Loss: 0.25524625182151794


Training:  93%|█████████▎| 1001/1071 [04:23<00:19,  3.62it/s]

Training steps: 1000 Loss: 0.14013470709323883


Training: 100%|██████████| 1071/1071 [04:41<00:00,  3.80it/s]

TRAIN ACC : 0.9348149878800268, TRAIN LOSS : 0.1930846900263511



Training:   0%|          | 1/357 [00:00<00:44,  7.97it/s]

Validation steps: 0 Loss: 0.014652907848358154


Training:  29%|██▊       | 102/357 [00:11<00:27,  9.11it/s]

Validation steps: 100 Loss: 0.1423112154006958


Training:  57%|█████▋    | 202/357 [00:22<00:16,  9.17it/s]

Validation steps: 200 Loss: 0.4159090518951416


Training:  85%|████████▍ | 302/357 [00:32<00:05,  9.29it/s]

Validation steps: 300 Loss: 0.31723037362098694


Training: 100%|██████████| 357/357 [00:38<00:00,  9.20it/s]


              precision    recall  f1-score   support

        IT과학       0.84      0.86      0.85      1206
          경제       0.86      0.82      0.84      1555
          사회       0.80      0.78      0.79      1840
        생활문화       0.93      0.88      0.90      1484
          세계       0.90      0.95      0.92      1907
         스포츠       0.96      0.98      0.97      1733
          정치       0.91      0.93      0.92      1688

    accuracy                           0.89     11413
   macro avg       0.89      0.88      0.88     11413
weighted avg       0.89      0.89      0.89     11413

[0.85820896 0.82186495 0.78043478 0.87533693 0.94756162 0.98038084
 0.93009479]
VALID ACC : 0.8870586173661614, VALID LOSS : 0.3513739476350843
{'epoch': 6, 'train_loss': 0.1930846900263511, 'train_acc': 0.9348149878800268, 'valid_acc': 0.8870586173661614, 'val_loss': 0.3513739476350843, 'learning_rate': 5e-06}
EarlyStopping counter: 3 out of 3
************************************************** auc_a

In [10]:
torch.cuda.empty_cache()

## Inference

In [11]:
def inference_main():
    args = parse_args()
    args['vocab'] = make_vocab(args)
    args.model_name = "temp"
    preprocess = Preprocess(args)
    preprocess.load_test_data()
    test_data = preprocess.test_data

    print(f"size of test data : {len(test_data)}")
    torch.cuda.empty_cache()
    # del model
    inference(args, test_data)

inference_main()

category 0 reading end, size : 972
category 1 reading end, size : 972
category 2 reading end, size : 1388
category 3 reading end, size : 1875
category 4 reading end, size : 1530
category 5 reading end, size : 1604
category 6 reading end, size : 1897
size of test data : 9131
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/vocab20/temp_1.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'rob

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/vocab20/temp_1.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:31<00:00,  9.14it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/vocab50/output_1.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/vocab20/temp_2.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'rob

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/vocab20/temp_2.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:31<00:00,  8.99it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/vocab50/output_2.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/vocab20/temp_3.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'rob

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/vocab20/temp_3.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:31<00:00,  9.20it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/vocab50/output_3.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/vocab20/temp_4.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'rob

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/vocab20/temp_4.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:30<00:00,  9.24it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/vocab50/output_4.csv
writing prediction : /content/drive/MyDrive/KLUE_TC/output/vocab50/output_softvote.csv
