In [1]:
!nvidia-smi

Sun Aug  8 05:46:28 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Directory 설정, 구글 드라이브 import

In [2]:
cur_dir = '/content/drive/MyDrive/KLUE_TC'

In [3]:
!pip install adamp
!pip install transformers

Collecting adamp
  Downloading adamp-0.3.0.tar.gz (5.1 kB)
Building wheels for collected packages: adamp
  Building wheel for adamp (setup.py) ... [?25l[?25hdone
  Created wheel for adamp: filename=adamp-0.3.0-py3-none-any.whl size=5998 sha256=ebc6d88f258f79830d54b4c9e97be0995f4cf4edc28a73f5de208af6ea1e85f8
  Stored in directory: /root/.cache/pip/wheels/bb/95/21/ced2d2cb9944e3a72e58fece7958973eed3fd8d0aeb6e2e450
Successfully built adamp
Installing collected packages: adamp
Successfully installed adamp-0.3.0
Collecting transformers
  Downloading transformers-4.9.1-py3-none-any.whl (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 13.3 MB/s 
[?25hCollecting huggingface-hub==0.0.12
  Downloading huggingface_hub-0.0.12-py3-none-any.whl (37 kB)
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 43.5 MB/s 
Col

## CNN Classifier

In [4]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

class TextClassifier(nn.ModuleList):
	def __init__(self, args):
		super(TextClassifier, self).__init__()

		# Parameters regarding text preprocessing
		self.seq_len = args.max_seq_len
		# self.num_words = params.num_words
		self.embedding_size = 1024
		
		# Dropout definition
		self.dropout = nn.Dropout(0.3)
		
		# CNN parameters definition
		# Kernel sizes
		self.kernel_1 = 2
		self.kernel_2 = 3
		self.kernel_3 = 4
		self.kernel_4 = 5
		
		# Output size for each convolution
		self.out_size = args.out_size
		# Number of strides for each convolution
		self.stride = args.stride
		self.activation = nn.ELU()

		# Embedding layer definition
		# self.embedding = nn.Embedding(self.num_words + 1, self.embedding_size, padding_idx=0)
		
		# Convolution layers definition
		self.conv_1 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_1, self.stride)
		self.conv_2 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_2, self.stride)
		self.conv_3 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_3, self.stride)
		self.conv_4 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_4, self.stride)
		
		# Max pooling layers definition
		self.pool_1 = nn.MaxPool1d(self.kernel_1, self.stride)
		self.pool_2 = nn.MaxPool1d(self.kernel_2, self.stride)
		self.pool_3 = nn.MaxPool1d(self.kernel_3, self.stride)
		self.pool_4 = nn.MaxPool1d(self.kernel_4, self.stride)
		
		# Fully connected layer definition
		self.fc = nn.Linear(self.in_features_fc(), 7)

		
	def in_features_fc(self):
		'''Calculates the number of output features after Convolution + Max pooling
			
		Convolved_Features = ((embedding_size + (2 * padding) - dilation * (kernel - 1) - 1) / stride) + 1
		Pooled_Features = ((embedding_size + (2 * padding) - dilation * (kernel - 1) - 1) / stride) + 1
		
		source: https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
		'''
		# Calcualte size of convolved/pooled features for convolution_1/max_pooling_1 features
		out_conv_1 = ((self.embedding_size - 1 * (self.kernel_1 - 1) - 1) / self.stride) + 1
		out_conv_1 = math.floor(out_conv_1)
		out_pool_1 = ((out_conv_1 - 1 * (self.kernel_1 - 1) - 1) / self.stride) + 1
		out_pool_1 = math.floor(out_pool_1)
		
		# Calcualte size of convolved/pooled features for convolution_2/max_pooling_2 features
		out_conv_2 = ((self.embedding_size - 1 * (self.kernel_2 - 1) - 1) / self.stride) + 1
		out_conv_2 = math.floor(out_conv_2)
		out_pool_2 = ((out_conv_2 - 1 * (self.kernel_2 - 1) - 1) / self.stride) + 1
		out_pool_2 = math.floor(out_pool_2)
		
		# Calcualte size of convolved/pooled features for convolution_3/max_pooling_3 features
		out_conv_3 = ((self.embedding_size - 1 * (self.kernel_3 - 1) - 1) / self.stride) + 1
		out_conv_3 = math.floor(out_conv_3)
		out_pool_3 = ((out_conv_3 - 1 * (self.kernel_3 - 1) - 1) / self.stride) + 1
		out_pool_3 = math.floor(out_pool_3)
		
		# Calcualte size of convolved/pooled features for convolution_4/max_pooling_4 features
		out_conv_4 = ((self.embedding_size - 1 * (self.kernel_4 - 1) - 1) / self.stride) + 1
		out_conv_4 = math.floor(out_conv_4)
		out_pool_4 = ((out_conv_4 - 1 * (self.kernel_4 - 1) - 1) / self.stride) + 1
		out_pool_4 = math.floor(out_pool_4)
		
		# Returns "flattened" vector (input for fully connected layer)
		return (out_pool_1 + out_pool_2 + out_pool_3 + out_pool_4) * self.out_size
		
		
		
	def forward(self, x):

		# Sequence of tokes is filterd through an embedding layer
		# x = self.embedding(x)
		
		# Convolution layer 1 is applied
		x1 = self.conv_1(x)
		x1 = torch.tanh(x1)
		# x1 = self.activation(x1)
		x1 = self.pool_1(x1)
		
		# Convolution layer 2 is applied
		x2 = self.conv_2(x)
		x2 = torch.tanh(x2)
		# x2 = self.activation(x2)
		x2 = self.pool_2(x2)
	
		# Convolution layer 3 is applied
		x3 = self.conv_3(x)
		x3 = torch.tanh(x3)
		# x3 = self.activation(x3)
		x3 = self.pool_3(x3)
		
		# Convolution layer 4 is applied
		x4 = self.conv_4(x)
		x4 = torch.tanh(x4)
		# x4 = self.activation(x4)
		x4 = self.pool_4(x4)
		
		# The output of each convolutional layer is concatenated into a unique vector
		union = torch.cat((x1, x2, x3, x4), 2)
		union = union.reshape(union.size(0), -1)

		# The "flattened" vector is passed through a fully connected layer
		out = self.fc(union)
		# Dropout is applied		
		out = self.dropout(out)
		# Activation function is applied
		out = torch.tanh(out)
		# out = self.activation(out)
		
		return out.squeeze()

## Utils

In [5]:
import os
import random
import torch
import numpy as np
from torch import nn

from torch.optim import Adam, AdamW, SGD
from adamp import AdamP
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR, ExponentialLR, \
    CosineAnnealingWarmRestarts
from transformers import get_linear_schedule_with_warmup
from transformers import AutoModel, AutoConfig, AutoTokenizer, AutoModelForSequenceClassification


def set_seeds(seed=42):
    # 랜덤 시드를 설정하여 매 코드를 실행할 때마다 동일한 결과를 얻게 합니다.
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.benchmark = False


def save_checkpoint(state, model_dir, model_filename):
    print('saving model ...')
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    # torch.save(state, os.path.join(model_dir, model_filename))
    torch.save(state, os.path.join(model_filename))


def get_optimizer(model, args):
    if args.optimizer == 'adam':
        optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adamW':
        optimizer = AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'adamP':
        optimizer = AdamP(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer == 'SGD':
        optimizer = SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    # 모든 parameter들의 grad값을 0으로 초기화
    optimizer.zero_grad()

    return optimizer


def get_scheduler(optimizer, args):
    if args.scheduler == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer, patience=args.plateau_patience, factor=args.plateau_factor, mode='max',
                                      verbose=True)
    elif args.scheduler == 'linear_warmup':
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps,
                                                    num_training_steps=args.total_steps)
    elif args.scheduler == 'step_lr':
        scheduler = StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)
    elif args.scheduler == 'exp_lr':
        scheduler = ExponentialLR(optimizer, gamma=args.gamma)
    elif args.scheduler == 'cosine_annealing':
        scheduler = CosineAnnealingLR(optimizer, T_max=args.t_max, eta_min=args.eta_min)
    elif args.scheduler == 'cosine_annealing_warmstart':
        scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=args.T_0, T_mult=args.T_mult, eta_min=args.eta_min,
                                                last_epoch=-1)

    return scheduler


def update_params(loss, model, optimizer, batch_idx, max_len, args):
    if args.gradient_accumulation:
        # normalize loss to account for batch accumulation
        loss = loss / args.accum_iter 

        # backward pass
        loss.backward()

        # weights update
        if ((batch_idx + 1) % args.accum_iter == 0) or (batch_idx + 1 == max_len):
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
            optimizer.step()
            optimizer.zero_grad()
    else:
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
        optimizer.step()
        optimizer.zero_grad()


def load_tokenizer(args):
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name
        else args.model_name_or_path,
        use_fast=True,
    )

    return tokenizer


def load_model(args, model_name=None):
    if not model_name:
        model_name = args.model_name
    model_path = os.path.join(args.model_dir, model_name)
    print("Loading Model from:", model_path)
    # load_state = torch.load(model_path)
    load_state = torch.load(model_name)

    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        args.config_name
        if args.config_name
        else args.model_name_or_path,
    )

    config.num_labels = 7

    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    )

    model.classifier = TextClassifier(args)

    model.load_state_dict(load_state['state_dict'], strict=True)

    model = model.to(args.device)

    print("Loading Model from:", model_path, "...Finished.")

    return model


def get_model(args):
    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(
        args.config_name
        if args.config_name
        else args.model_name_or_path,
    )

    config.num_labels = 7
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
    )

    model.classifier = TextClassifier(args)

    # print(model)
    #model.classifier.dropout = nn.Dropout(p=0.3, inplace = False)

    model = model.to(args.device)

    return model


def get_loaders(args, train, valid, is_inference=False):
    pin_memory = True
    train_loader, valid_loader = None, None

    if is_inference:
        test_dataset = YNAT_dataset(args, valid, is_inference)
        test_loader = torch.utils.data.DataLoader(test_dataset, num_workers=args.num_workers, shuffle=False,
                                                  batch_size=args.batch_size, pin_memory=pin_memory)
        return test_loader

    if train is not None:
        train_dataset = YNAT_dataset(args, train, is_inference)
        train_loader = torch.utils.data.DataLoader(train_dataset, num_workers=args.num_workers, shuffle=True,
                                                   batch_size=args.batch_size, pin_memory=pin_memory)
    if valid is not None:
        valid_dataset = YNAT_dataset(args, valid, is_inference)
        valid_loader = torch.utils.data.DataLoader(valid_dataset, num_workers=args.num_workers, shuffle=False,
                                                   batch_size=args.batch_size, pin_memory=pin_memory)

    return train_loader, valid_loader


# loss계산하고 parameter update!
def compute_loss(preds, targets, args):
    """
    Args :
        preds   : (batch_size, max_seq_len)
        targets : (batch_size, max_seq_len)
    """
    # print(preds, targets)
    loss = get_criterion(preds, targets, args)
    # 마지막 시퀀스에 대한 값만 loss 계산
    # loss = loss[:, -1]
    # loss = torch.mean(loss)
    return loss


def get_criterion(pred, target, args):
    if args.criterion == 'BCE':
        loss = nn.BCELoss(reduction="none")
    elif args.criterion == "BCELogit":
        loss = nn.BCEWithLogitsLoss(reduction="none")
    elif args.criterion == "MSE":
        loss = nn.MSELoss(reduction="none")
    elif args.criterion == "L1":
        loss = nn.L1Loss(reduction="none")
    elif args.criterion == "CE":
        weights = [1,1,2,1,1,1,1] #as class distribution
        class_weights = torch.FloatTensor(weights).cuda()
        loss = nn.CrossEntropyLoss(weight=class_weights)
        # loss = nn.CrossEntropyLoss()
    # NLL, CrossEntropy not available
    return loss(pred, target)


## Dataloader

In [6]:
import os
import torch
import pandas as pd


class Preprocess:
    def __init__(self, args):
        self.args = args
        self.train_data = None
        self.test_data = None

    def load_data(self, file_name):
        csv_file_name = os.path.join(self.args.data_dir, file_name)
        df = pd.read_csv(csv_file_name)
        #del df['Unnamed: 0']
        return df.values

    def load_train_data(self):
        self.train_data = self.load_data('train_data.csv')

    def load_test_data(self):
        self.test_data = self.load_data('test_data.csv')


class YNAT_dataset(torch.utils.data.Dataset):
    def __init__(self, args, data, is_inference):
        self.args = args
        self.data = data
        self.is_inference = is_inference

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        row = self.data[index]
        element = [row[i] for i in range(len(row))]
        #print(type(row))
        # np.array -> torch.tensor 형변환
        #for i, col in enumerate(row):
        #    if type(col) == str:
        #        pass
        #    else:
        #        row[i] = torch.tensor(col)

        return element



## Trainer

In [7]:
from sklearn.metrics import accuracy_score
from torch.nn.functional import one_hot
from tqdm import tqdm
from sklearn import metrics


def run(args, tokenizer, train_data, valid_data, cv_count):
    train_loader, valid_loader = get_loaders(args, train_data, valid_data)

    # only when using warmup scheduler
    # args.total_steps = int(len(train_loader.dataset) / args.batch_size) * args.n_epochs
    # args.warmup_steps = int(args.total_steps * args.warmup_ratio)

    model = get_model(args)
    optimizer = get_optimizer(model, args)
    scheduler = get_scheduler(optimizer, args)

    best_acc = -1
    early_stopping_counter = 0
    for epoch in range(args.n_epochs):

        print(f"Start Training: Epoch {epoch + 1}")

        if not args.cv_strategy:
            model_name = args.run_name
        else:
            model_name = f"{args.run_name.split('.pt')[0]}_{cv_count}.pt"

        # TRAIN
        train_acc, train_loss = train(args, model, tokenizer, train_loader, optimizer)

        # VALID
        acc, val_loss = validate(args, model, tokenizer, valid_loader)

        # TODO: model save or early stopping
        if args.scheduler == 'plateau':
            last_lr = optimizer.param_groups[0]['lr']
        else:
            last_lr = scheduler.get_last_lr()[0]

        print({"epoch": epoch+1, "train_loss": train_loss, "train_acc": train_acc,
                   "valid_acc": acc, "val_loss": val_loss, "learning_rate": last_lr})

        if acc > best_acc:
            best_acc = acc
            # torch.nn.DataParallel로 감싸진 경우 원래의 model을 가져옵니다.
            model_to_save = model.module if hasattr(model, 'module') else model
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model_to_save.state_dict(),
            },
                args.model_dir, model_name,
            )
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= args.patience:
                print(f'EarlyStopping counter: {early_stopping_counter} out of {args.patience}')
                break

        # scheduler
        if args.scheduler == 'plateau':
            scheduler.step(best_acc)
        else:
            scheduler.step()

    return best_acc


def train(args, model, tokenizer, train_loader, optimizer):
    model.train()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in tqdm(enumerate(train_loader), desc='Training', total=len(train_loader)):
        idx, text, label = batch
        label = label.to(args.device)
        
        tokenized_examples = tokenizer(
            text,
            max_length=args.max_seq_len,
            padding="max_length",
            return_tensors="pt"
        ).to(args.device)
        
        # tokenize
        # 모델의 입력으로
        # label은 one-hot?
        # loss 주고
        # argmax를 golden

        logits = model(**tokenized_examples)['logits']
        #print(model_output)
        #print(model_output.shape)
        # logits = classifier.forward(model_output)
        # print(logits)
        if len(list(logits.shape)) == 1:
            logits = torch.unsqueeze(logits, 0)
        # print(preds)
        #logits = preds['logits']
        # logits = logits[:,0,:]
        #softmax_logits = nn.Softmax(dim=1)(logits)
        argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        loss = compute_loss(logits,
                             label, args)
        #loss = preds['loss']
        #print(loss)

        update_params(loss, model, optimizer, step, len(train_loader), args)

        if step % args.log_steps == 0:
            print(f"Training steps: {step} Loss: {str(loss.item())}")

        if args.device == 'cuda':
            argmax_logits = argmax_logits.to('cpu').detach().numpy()
            label = label.to('cpu').detach().numpy()
            loss = loss.to('cpu').detach().numpy()
        else:  # cpu
            argmax_logits = argmax_logits.detach().numpy()
            label = label.detach().numpy()
            loss = loss.detach().numpy()

        total_preds.append(argmax_logits)
        total_targets.append(label)
        losses.append(loss)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    acc = accuracy_score(total_targets, total_preds)
    loss_avg = sum(losses) / len(losses)
    print(f'TRAIN ACC : {acc}, TRAIN LOSS : {loss_avg}')
    return acc, loss_avg


def validate(args, model, tokenizer, valid_loader):
    model.eval()

    total_preds = []
    total_targets = []
    losses = []
    for step, batch in tqdm(enumerate(valid_loader), desc='Training', total=len(valid_loader)):
        idx, text, label = batch
        label = label.to(args.device)
        tokenized_examples = tokenizer(
            text,
            max_length=args.max_seq_len,
            padding="max_length",
            return_tensors="pt"
        ).to(args.device)

        # tokenize
        # 모델의 입력으로
        # label은 one-hot?
        # loss 주고
        # argmax를 golden

        logits = model(**tokenized_examples)['logits']
        
        if len(list(logits.shape)) == 1:
            logits = torch.unsqueeze(logits, 0)
        #print(model_output)
        #print(model_output.shape)
        # logits = classifier.forward(model_output)
        #print(logits)
        #print(logits.shape)
        # print(preds)
        #logits = preds['logits']
        # logits = logits[:,0,:]
        #softmax_logits = nn.Softmax(dim=1)(logits)
        argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        loss = compute_loss(logits,
                             label, args)
        #loss = preds['loss']
        #print(loss)


        # preds = model(**tokenized_examples, labels = label)
        # logits = preds['logits']
        # logits = logits[:,0,:]
        # softmax_logits = nn.Softmax(dim=1)(logits)
        # argmax_logits = torch.argmax(logits, dim=1)

        # one_hot_logits = one_hot(argmax_logits, num_classes=7).float()
        # print(one_hot(argmax_logits, num_classes=7).type(torch.FloatTensor))
        # loss = compute_loss(logits,
        #                     label, args)
        # loss = preds['loss']
        if step % args.log_steps == 0:
            print(f"Validation steps: {step} Loss: {str(loss.item())}")

        if args.device == 'cuda':
            argmax_logits = argmax_logits.to('cpu').detach().numpy()
            label = label.to('cpu').detach().numpy()
            loss = loss.to('cpu').detach().numpy()
        else:  # cpu
            argmax_logits = argmax_logits.detach().numpy()
            label = label.detach().numpy()
            loss = loss.detach().numpy()

        total_preds.append(argmax_logits)
        total_targets.append(label)
        losses.append(loss)

    total_preds = np.concatenate(total_preds)
    total_targets = np.concatenate(total_targets)

    # Train AUC / ACC
    target_names = ['IT과학', '경제', '사회', '생활문화', '세계', '스포츠', '정치']
    print(metrics.classification_report(total_targets, total_preds, target_names=target_names))
    matrix = metrics.confusion_matrix(total_targets, total_preds)
    print(matrix.diagonal()/matrix.sum(axis=1))

    acc = accuracy_score(total_targets, total_preds)
    loss_avg = sum(losses) / len(losses)
    print(f'VALID ACC : {acc}, VALID LOSS : {loss_avg}')
    return acc, loss_avg


def inference(args, test_data):
    # ckpt_file_names = []
    all_fold_preds = []
    all_fold_argmax_preds = []

    if not args.cv_strategy:
        ckpt_file_names = [args.model_name]
    else:
        ckpt_file_names = [f"{args.model_name.split('.pt')[0]}_{i + 1}.pt" for i in range(args.fold_num)]

    tokenizer = load_tokenizer(args)

    for fold_idx, ckpt in enumerate(ckpt_file_names):
        model = load_model(args, ckpt)
        model.eval()
        test_loader = get_loaders(args, None, test_data, True)

        total_preds = []
        total_argmax_preds = []
        total_ids = []

        for step, batch in tqdm(enumerate(test_loader), desc='Inferencing', total=len(test_loader)):
            idx, text = batch
            tokenized_examples = tokenizer(
                text,
                max_length=args.max_seq_len,
                padding="max_length",
                return_tensors="pt"
            ).to(args.device)

            # preds = model(**tokenized_examples)

            logits = model(**tokenized_examples)['logits']

            if len(list(logits.shape)) == 1:
                logits = torch.unsqueeze(logits, 0)
            # logits = preds['logits']
            # logits = logits[:,0,:]
            argmax_logits = torch.argmax(logits, dim=1)

            if args.device == 'cuda':
                argmax_preds = argmax_logits.to('cpu').detach().numpy()
                preds = logits.to('cpu').detach().numpy()
            else:  # cpu
                argmax_preds = argmax_logits.detach().numpy()
                preds = logits.detach().numpy()

            total_preds += list(preds)
            total_argmax_preds += list(argmax_preds)
            total_ids += list(idx)

        all_fold_preds.append(total_preds)
        all_fold_argmax_preds.append(total_argmax_preds)

        output_file_name = "output.csv" if not args.cv_strategy else f"output_{fold_idx + 1}.csv"
        write_path = os.path.join(args.output_dir, output_file_name)
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(write_path, 'w', encoding='utf8') as w:
            print("writing prediction : {}".format(write_path))
            w.write("index,topic_idx\n")
            for index, p in zip(total_ids, total_argmax_preds):
                w.write('{},{}\n'.format(index, p))

    if len(all_fold_preds) > 1:
        # Soft voting ensemble
        votes = np.sum(all_fold_preds, axis=0)
        votes = np.argmax(votes, axis=1)

        write_path = os.path.join(args.output_dir, "output_softvote.csv")
        #write_path = "output_softvote.csv"
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        with open(write_path, 'w', encoding='utf8') as w:
            print("writing prediction : {}".format(write_path))
            w.write("index,topic_idx\n")
            for id, p in zip(total_ids, votes):
                w.write('{},{}\n'.format(id, p))

## Train

In [8]:
import torch
from sklearn.model_selection import KFold, StratifiedKFold
from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
from datetime import datetime
from pytz import timezone


def main(args):
    if not args.run_name:
        args.run_name = datetime.now(timezone("Asia/Seoul")).strftime("%Y-%m-%d-%H:%M:%S")

    set_seeds(args.seed)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    args.device = device

    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name
        else args.model_name_or_path,
        use_fast=True,
    )

    preprocess = Preprocess(args)
    preprocess.load_train_data()
    train_data_origin = preprocess.train_data

    print(f"Size of train data : {len(train_data_origin)}")
    # print(f"size of test data : {len(test_data)}")

    if args.cv_strategy == 'random':
        kf = KFold(n_splits=args.fold_num, shuffle=True)
        splits = kf.split(X=train_data_origin)
    else:
        # default
        # 여기 각 label로 바꿔야됨
        train_labels = [sequence[-1] for sequence in train_data_origin]
        skf = StratifiedKFold(n_splits=args.fold_num, shuffle=True)
        splits = skf.split(X=train_data_origin, y=train_labels)

    acc_avg = 0
    for fold_num, (train_index, valid_index) in enumerate(splits):
        train_data = train_data_origin[train_index]
        valid_data = train_data_origin[valid_index]
        best_acc = run(args, tokenizer, train_data, valid_data, fold_num + 1)

        if not args.cv_strategy:
            break

        acc_avg += best_acc

    if args.cv_strategy:
        acc_avg /= args.fold_num

        print("*" * 50, 'auc_acc', "*" * 50)
        print(acc_avg)


## Run

In [9]:
import argparse
import easydict

def parse_args():
    args = easydict.EasyDict({'run_name' : 'temp',
                             'seed':42,
                             'device' :'cuda',
                             'data_dir': cur_dir + '/data/open/',
                             'model_dir' : '/content/drive/MyDrive/KLUE_TC/models/',
                             'model_name_or_path' : 'klue/roberta-large',
                             'config_name' : None,
                             'tokenizer_name' : None,
                             'output_dir' : '/content/drive/MyDrive/KLUE_TC/output/cnn_activation',
                             
                             'accum_iter' : 16,
                             'gradient_accumulation' : True,

                             'cv_strategy' : 'stratified',
                             'fold_num' : 5,

                             'num_workers' : 1,

                             # 훈련
                             'n_epochs' : 20,
                             'batch_size' : 32,
                             'lr' : 1e-5,
                             'clip_grad' : 15,
                             'patience' : 5,
                             'max_seq_len' : 40,
                              
                             # CNN config
                             'out_size' : 32,
                             'stride' : 2,
                             # Optimizer
                             'optimizer' : 'adamP',

                             # Optimizer-parameters
                             'weight_decay' : 0.05,
                             'momentum' : 0.9,

                             # Scheduler
                             'scheduler' : 'step_lr',

                             # Scheduler-parameters
                             # plateau
                             'plateau_patience' : 10,
                             'plateau_factor' : 0.5,
                              
                             't_max' : 100,
                             'T_0' : 10,
                             'T_mult' : 2,
                             'eta_min' : 0,

                             # linear_warmup
                             'warmup_ratio' : 0.3,

                             # Step LR
                             'step_size' : 50,
                             'gamma' : 0.2,

                             'criterion' : 'CE',

                             'log_steps' : 100})
    
    return args

In [10]:
if __name__ == '__main__':
    args = parse_args()
    main(args)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=337.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=547.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=248477.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=173.0, style=ProgressStyle(description_…


Size of train data : 45654


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1346854671.0, style=ProgressStyle(descr…




Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'cla

Start Training: Epoch 1


  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)
Training:   0%|          | 1/1142 [00:00<11:10,  1.70it/s]

Training steps: 0 Loss: 1.947268009185791


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.7503725290298462


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.785266399383545


Training:  26%|██▋       | 301/1142 [02:07<05:44,  2.44it/s]

Training steps: 300 Loss: 1.6429709196090698


Training:  35%|███▌      | 401/1142 [02:49<05:34,  2.22it/s]

Training steps: 400 Loss: 1.4316831827163696


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.39it/s]

Training steps: 500 Loss: 1.0903270244598389


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.1945501565933228


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.2772032022476196


Training:  70%|███████   | 801/1142 [05:37<02:34,  2.21it/s]

Training steps: 800 Loss: 1.2071503400802612


Training:  79%|███████▉  | 901/1142 [06:20<01:41,  2.38it/s]

Training steps: 900 Loss: 1.206471562385559


Training:  88%|████████▊ | 1001/1142 [07:02<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.4079869985580444


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.1697410345077515


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.565424527010377, TRAIN LOSS : 1.3803622774818942



Training:   1%|          | 2/286 [00:00<00:41,  6.87it/s]

Validation steps: 0 Loss: 0.779270589351654


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.28it/s]

Validation steps: 100 Loss: 0.8678604960441589


Training:  71%|███████   | 202/286 [00:27<00:11,  7.28it/s]

Validation steps: 200 Loss: 0.7067420482635498


Training: 100%|██████████| 286/286 [00:39<00:00,  7.28it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.82      0.83       965
          경제       0.88      0.78      0.83      1244
          사회       0.70      0.86      0.77      1472
        생활문화       0.92      0.86      0.89      1187
          세계       0.95      0.90      0.92      1526
         스포츠       0.98      0.96      0.97      1387
          정치       0.92      0.90      0.91      1350

    accuracy                           0.87      9131
   macro avg       0.88      0.87      0.87      9131
weighted avg       0.88      0.87      0.88      9131

[0.82487047 0.77733119 0.85869565 0.8609941  0.90432503 0.96467195
 0.89851852]
VALID ACC : 0.8739458985872303, VALID LOSS : 0.8430824125563348
{'epoch': 1, 'train_loss': 1.3803622774818942, 'train_acc': 0.565424527010377, 'valid_acc': 0.8739458985872303, 'val_loss': 0.8430824125563348, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1142 [00:00<08:06,  2.34it/s]

Training steps: 0 Loss: 1.1766269207000732


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.190548062324524


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.43it/s]

Training steps: 200 Loss: 1.2778171300888062


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 1.1234455108642578


Training:  35%|███▌      | 401/1142 [02:48<05:35,  2.21it/s]

Training steps: 400 Loss: 0.9985120296478271


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.38it/s]

Training steps: 500 Loss: 1.1525198221206665


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 0.8984723091125488


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.0266412496566772


Training:  70%|███████   | 801/1142 [05:37<02:33,  2.21it/s]

Training steps: 800 Loss: 1.1177709102630615


Training:  79%|███████▉  | 901/1142 [06:19<01:41,  2.39it/s]

Training steps: 900 Loss: 1.1151779890060425


Training:  88%|████████▊ | 1001/1142 [07:01<00:57,  2.43it/s]

Training steps: 1000 Loss: 1.1791460514068604


Training:  96%|█████████▋| 1101/1142 [07:43<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.2747886180877686


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.724064288256715, TRAIN LOSS : 1.1372615266493449



Training:   1%|          | 2/286 [00:00<00:41,  6.92it/s]

Validation steps: 0 Loss: 0.7245973348617554


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.22it/s]

Validation steps: 100 Loss: 0.7855159640312195


Training:  71%|███████   | 202/286 [00:27<00:11,  7.30it/s]

Validation steps: 200 Loss: 0.6468273997306824


Training: 100%|██████████| 286/286 [00:39<00:00,  7.27it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.87      0.85       965
          경제       0.87      0.82      0.84      1244
          사회       0.80      0.79      0.79      1472
        생활문화       0.90      0.90      0.90      1187
          세계       0.94      0.92      0.93      1526
         스포츠       0.97      0.97      0.97      1387
          정치       0.90      0.94      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.87150259 0.81672026 0.78532609 0.89637742 0.92398427 0.97332372
 0.94444444]
VALID ACC : 0.888402146533786, VALID LOSS : 0.8108708585475708
{'epoch': 2, 'train_loss': 1.1372615266493449, 'train_acc': 0.724064288256715, 'valid_acc': 0.888402146533786, 'val_loss': 0.8108708585475708, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1142 [00:00<08:16,  2.30it/s]

Training steps: 0 Loss: 0.9527279734611511


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.2228755950927734


Training:  18%|█▊        | 201/1142 [01:24<06:28,  2.42it/s]

Training steps: 200 Loss: 1.1614001989364624


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.1459037065505981


Training:  35%|███▌      | 401/1142 [02:49<05:34,  2.21it/s]

Training steps: 400 Loss: 1.019123911857605


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.38it/s]

Training steps: 500 Loss: 1.0567371845245361


Training:  53%|█████▎    | 601/1142 [04:13<03:43,  2.42it/s]

Training steps: 600 Loss: 1.0742173194885254


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.3037996292114258


Training:  70%|███████   | 801/1142 [05:37<02:33,  2.21it/s]

Training steps: 800 Loss: 1.2178921699523926


Training:  79%|███████▉  | 901/1142 [06:19<01:41,  2.38it/s]

Training steps: 900 Loss: 1.146314024925232


Training:  88%|████████▊ | 1001/1142 [07:02<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.1217091083526611


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.0749894380569458


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.7338389507981272, TRAIN LOSS : 1.116210252516324



Training:   1%|          | 2/286 [00:00<00:41,  6.91it/s]

Validation steps: 0 Loss: 0.7286977171897888


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.23it/s]

Validation steps: 100 Loss: 0.7954023480415344


Training:  71%|███████   | 202/286 [00:27<00:11,  7.25it/s]

Validation steps: 200 Loss: 0.6333651542663574


Training: 100%|██████████| 286/286 [00:39<00:00,  7.27it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85       965
          경제       0.87      0.81      0.84      1244
          사회       0.82      0.78      0.80      1472
        생활문화       0.90      0.91      0.90      1187
          세계       0.93      0.94      0.94      1526
         스포츠       0.97      0.99      0.98      1387
          정치       0.91      0.92      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.88911917 0.81430868 0.77581522 0.91322662 0.94036697 0.98558039
 0.9237037 ]
VALID ACC : 0.8921257255503231, VALID LOSS : 0.7975959252644252
{'epoch': 3, 'train_loss': 1.116210252516324, 'train_acc': 0.7338389507981272, 'valid_acc': 0.8921257255503231, 'val_loss': 0.7975959252644252, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1142 [00:00<08:23,  2.27it/s]

Training steps: 0 Loss: 1.0080962181091309


Training:   9%|▉         | 101/1142 [00:42<07:17,  2.38it/s]

Training steps: 100 Loss: 1.1132662296295166


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.001815676689148


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.44it/s]

Training steps: 300 Loss: 1.2073644399642944


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.0602999925613403


Training:  44%|████▍     | 501/1142 [03:31<04:29,  2.38it/s]

Training steps: 500 Loss: 1.3443211317062378


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.18630850315094


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.0845142602920532


Training:  70%|███████   | 801/1142 [05:38<02:34,  2.20it/s]

Training steps: 800 Loss: 1.1174789667129517


Training:  79%|███████▉  | 901/1142 [06:20<01:41,  2.38it/s]

Training steps: 900 Loss: 1.0583293437957764


Training:  88%|████████▊ | 1001/1142 [07:02<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.1250876188278198


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.2202695608139038


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.742983873175807, TRAIN LOSS : 1.1019253846120918



Training:   1%|          | 2/286 [00:00<00:41,  6.81it/s]

Validation steps: 0 Loss: 0.7324300408363342


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.26it/s]

Validation steps: 100 Loss: 0.7897526621818542


Training:  71%|███████   | 202/286 [00:27<00:11,  7.23it/s]

Validation steps: 200 Loss: 0.6283009648323059


Training: 100%|██████████| 286/286 [00:39<00:00,  7.24it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.86       965
          경제       0.89      0.79      0.84      1244
          사회       0.78      0.80      0.79      1472
        생활문화       0.90      0.91      0.91      1187
          세계       0.93      0.94      0.93      1526
         스포츠       0.97      0.98      0.98      1387
          정치       0.92      0.91      0.91      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.89015544 0.79421222 0.80434783 0.90817186 0.94102228 0.98125451
 0.90666667]
VALID ACC : 0.8903734530719527, VALID LOSS : 0.7912018922242251
{'epoch': 4, 'train_loss': 1.1019253846120918, 'train_acc': 0.742983873175807, 'valid_acc': 0.8903734530719527, 'val_loss': 0.7912018922242251, 'learning_rate': 1e-05}
Start Training: Epoch 5



Training:   0%|          | 1/1142 [00:00<08:01,  2.37it/s]

Training steps: 0 Loss: 1.010549545288086


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.37it/s]

Training steps: 100 Loss: 1.1082916259765625


Training:  18%|█▊        | 201/1142 [01:24<06:28,  2.42it/s]

Training steps: 200 Loss: 1.1291275024414062


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.44it/s]

Training steps: 300 Loss: 1.1288652420043945


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.293139934539795


Training:  44%|████▍     | 501/1142 [03:31<04:29,  2.38it/s]

Training steps: 500 Loss: 1.038652777671814


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.1501049995422363


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.45it/s]

Training steps: 700 Loss: 1.0547785758972168


Training:  70%|███████   | 801/1142 [05:38<02:34,  2.20it/s]

Training steps: 800 Loss: 1.2169166803359985


Training:  79%|███████▉  | 901/1142 [06:20<01:40,  2.39it/s]

Training steps: 900 Loss: 1.1769542694091797


Training:  88%|████████▊ | 1001/1142 [07:02<00:57,  2.43it/s]

Training steps: 1000 Loss: 1.174588680267334


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.0993257761001587


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.7467896941653205, TRAIN LOSS : 1.0954809373606733



Training:   0%|          | 0/286 [00:00<?, ?it/s]

Validation steps: 0 Loss: 0.7389377355575562


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.32it/s]

Validation steps: 100 Loss: 0.7725960612297058


Training:  71%|███████   | 202/286 [00:27<00:11,  7.38it/s]

Validation steps: 200 Loss: 0.6208676695823669


Training: 100%|██████████| 286/286 [00:38<00:00,  7.36it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.88      0.85       965
          경제       0.87      0.82      0.84      1244
          사회       0.78      0.80      0.79      1472
        생활문화       0.92      0.89      0.90      1187
          세계       0.94      0.92      0.93      1526
         스포츠       0.97      0.98      0.98      1387
          정치       0.91      0.93      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.87564767 0.82073955 0.80163043 0.88711036 0.92332896 0.97909156
 0.92814815]
VALID ACC : 0.889168765743073, VALID LOSS : 0.7885973903682683
{'epoch': 5, 'train_loss': 1.0954809373606733, 'train_acc': 0.7467896941653205, 'valid_acc': 0.889168765743073, 'val_loss': 0.7885973903682683, 'learning_rate': 1e-05}
Start Training: Epoch 6



Training:   0%|          | 1/1142 [00:00<07:56,  2.40it/s]

Training steps: 0 Loss: 1.1494874954223633


Training:   9%|▉         | 101/1142 [00:42<07:14,  2.40it/s]

Training steps: 100 Loss: 0.975829005241394


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.44it/s]

Training steps: 200 Loss: 0.9571357369422913


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.0918629169464111


Training:  35%|███▌      | 401/1142 [02:48<05:34,  2.22it/s]

Training steps: 400 Loss: 1.2339234352111816


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.40it/s]

Training steps: 500 Loss: 1.222674012184143


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 1.0642653703689575


Training:  61%|██████▏   | 701/1142 [04:54<02:59,  2.45it/s]

Training steps: 700 Loss: 1.0220260620117188


Training:  70%|███████   | 801/1142 [05:36<02:34,  2.21it/s]

Training steps: 800 Loss: 1.1123336553573608


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.40it/s]

Training steps: 900 Loss: 1.2403254508972168


Training:  88%|████████▊ | 1001/1142 [07:00<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.0653713941574097


Training:  96%|█████████▋| 1101/1142 [07:42<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.2114847898483276


Training: 100%|██████████| 1142/1142 [07:59<00:00,  2.38it/s]

TRAIN ACC : 0.751553815403992, TRAIN LOSS : 1.0863007968340572



Training:   1%|          | 2/286 [00:00<00:40,  7.00it/s]

Validation steps: 0 Loss: 0.7072815895080566


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.36it/s]

Validation steps: 100 Loss: 0.772260308265686


Training:  71%|███████   | 202/286 [00:27<00:11,  7.33it/s]

Validation steps: 200 Loss: 0.6104211211204529


Training: 100%|██████████| 286/286 [00:38<00:00,  7.35it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.87      0.85       965
          경제       0.89      0.80      0.84      1244
          사회       0.79      0.80      0.79      1472
        생활문화       0.92      0.89      0.90      1187
          세계       0.94      0.93      0.93      1526
         스포츠       0.97      0.98      0.98      1387
          정치       0.89      0.95      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.86735751 0.80385852 0.80298913 0.88963774 0.9266055  0.98125451
 0.94592593]
VALID ACC : 0.8900449019822583, VALID LOSS : 0.7919680069793354
{'epoch': 6, 'train_loss': 1.0863007968340572, 'train_acc': 0.751553815403992, 'valid_acc': 0.8900449019822583, 'val_loss': 0.7919680069793354, 'learning_rate': 1e-05}
Start Training: Epoch 7



Training:   0%|          | 1/1142 [00:00<07:58,  2.38it/s]

Training steps: 0 Loss: 1.2014555931091309


Training:   9%|▉         | 101/1142 [00:42<07:15,  2.39it/s]

Training steps: 100 Loss: 1.0596299171447754


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.43it/s]

Training steps: 200 Loss: 1.0609149932861328


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 1.0004781484603882


Training:  35%|███▌      | 401/1142 [02:48<05:34,  2.21it/s]

Training steps: 400 Loss: 1.0701463222503662


Training:  44%|████▍     | 501/1142 [03:30<04:28,  2.39it/s]

Training steps: 500 Loss: 0.8910226225852966


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 0.9838020205497742


Training:  61%|██████▏   | 701/1142 [04:54<03:00,  2.45it/s]

Training steps: 700 Loss: 1.0803560018539429


Training:  70%|███████   | 801/1142 [05:36<02:34,  2.21it/s]

Training steps: 800 Loss: 0.9991598129272461


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.39it/s]

Training steps: 900 Loss: 0.9827499389648438


Training:  88%|████████▊ | 1001/1142 [07:00<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.017526626586914


Training:  96%|█████████▋| 1101/1142 [07:42<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.107848048210144


Training: 100%|██████████| 1142/1142 [07:59<00:00,  2.38it/s]

TRAIN ACC : 0.760123757632177, TRAIN LOSS : 1.079455091688136



Training:   1%|          | 2/286 [00:00<00:41,  6.92it/s]

Validation steps: 0 Loss: 0.7200267910957336


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.35it/s]

Validation steps: 100 Loss: 0.8008188009262085


Training:  71%|███████   | 202/286 [00:27<00:11,  7.30it/s]

Validation steps: 200 Loss: 0.6166026592254639


Training: 100%|██████████| 286/286 [00:38<00:00,  7.35it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.88      0.85       965
          경제       0.89      0.81      0.85      1244
          사회       0.76      0.83      0.80      1472
        생활문화       0.93      0.87      0.90      1187
          세계       0.94      0.94      0.94      1526
         스포츠       0.96      0.99      0.97      1387
          정치       0.94      0.88      0.91      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.88290155 0.81189711 0.83491848 0.87278854 0.93905636 0.98846431
 0.88074074]
VALID ACC : 0.8892782827729712, VALID LOSS : 0.7851228101270182
{'epoch': 7, 'train_loss': 1.079455091688136, 'train_acc': 0.760123757632177, 'valid_acc': 0.8892782827729712, 'val_loss': 0.7851228101270182, 'learning_rate': 1e-05}
Start Training: Epoch 8



Training:   0%|          | 1/1142 [00:00<07:55,  2.40it/s]

Training steps: 0 Loss: 1.034605622291565


Training:   9%|▉         | 101/1142 [00:42<07:14,  2.40it/s]

Training steps: 100 Loss: 1.1786400079727173


Training:  18%|█▊        | 201/1142 [01:24<06:25,  2.44it/s]

Training steps: 200 Loss: 1.2142865657806396


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 1.0280787944793701


Training:  35%|███▌      | 401/1142 [02:48<05:34,  2.22it/s]

Training steps: 400 Loss: 1.190422773361206


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.39it/s]

Training steps: 500 Loss: 1.1331126689910889


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 1.2374202013015747


Training:  61%|██████▏   | 701/1142 [04:54<03:00,  2.45it/s]

Training steps: 700 Loss: 1.0109919309616089


Training:  70%|███████   | 801/1142 [05:36<02:33,  2.22it/s]

Training steps: 800 Loss: 0.9984375834465027


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.39it/s]

Training steps: 900 Loss: 1.0007280111312866


Training:  88%|████████▊ | 1001/1142 [07:00<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.1112561225891113


Training:  96%|█████████▋| 1101/1142 [07:42<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.1199686527252197


Training: 100%|██████████| 1142/1142 [07:59<00:00,  2.38it/s]

TRAIN ACC : 0.7694877200668072, TRAIN LOSS : 1.0661377199581332



Training:   1%|          | 2/286 [00:00<00:41,  6.88it/s]

Validation steps: 0 Loss: 0.7097046971321106


Training:  36%|███▌      | 102/286 [00:13<00:24,  7.36it/s]

Validation steps: 100 Loss: 0.7852455973625183


Training:  71%|███████   | 202/286 [00:27<00:11,  7.37it/s]

Validation steps: 200 Loss: 0.6139625310897827


Training: 100%|██████████| 286/286 [00:38<00:00,  7.35it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.88      0.85       965
          경제       0.88      0.80      0.84      1244
          사회       0.77      0.82      0.79      1472
        생활문화       0.93      0.87      0.90      1187
          세계       0.93      0.94      0.94      1526
         스포츠       0.97      0.98      0.97      1387
          정치       0.92      0.92      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.87772021 0.79823151 0.81861413 0.87194608 0.94102228 0.98341745
 0.91703704]
VALID ACC : 0.8890592487131749, VALID LOSS : 0.7898639603094622
{'epoch': 8, 'train_loss': 1.0661377199581332, 'train_acc': 0.7694877200668072, 'valid_acc': 0.8890592487131749, 'val_loss': 0.7898639603094622, 'learning_rate': 1e-05}
EarlyStopping counter: 5 out of 5


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'cla

Start Training: Epoch 1


Training:   0%|          | 1/1142 [00:00<08:02,  2.37it/s]

Training steps: 0 Loss: 1.9642407894134521


Training:   9%|▉         | 101/1142 [00:42<07:14,  2.40it/s]

Training steps: 100 Loss: 1.9054608345031738


Training:  18%|█▊        | 201/1142 [01:24<06:25,  2.44it/s]

Training steps: 200 Loss: 1.7550957202911377


Training:  26%|██▋       | 301/1142 [02:06<05:42,  2.45it/s]

Training steps: 300 Loss: 1.5237271785736084


Training:  35%|███▌      | 401/1142 [02:48<05:32,  2.23it/s]

Training steps: 400 Loss: 1.4706867933273315


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.40it/s]

Training steps: 500 Loss: 1.357208013534546


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 1.3390989303588867


Training:  61%|██████▏   | 701/1142 [04:54<02:59,  2.45it/s]

Training steps: 700 Loss: 1.158407211303711


Training:  70%|███████   | 801/1142 [05:36<02:33,  2.22it/s]

Training steps: 800 Loss: 1.1206669807434082


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.40it/s]

Training steps: 900 Loss: 1.2705025672912598


Training:  88%|████████▊ | 1001/1142 [06:59<00:57,  2.45it/s]

Training steps: 1000 Loss: 1.186385154724121


Training:  96%|█████████▋| 1101/1142 [07:41<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.1966795921325684


Training: 100%|██████████| 1142/1142 [07:59<00:00,  2.38it/s]

TRAIN ACC : 0.5757467896941654, TRAIN LOSS : 1.3745669500213997



Training:   1%|          | 2/286 [00:00<00:40,  6.98it/s]

Validation steps: 0 Loss: 0.8017823696136475


Training:  36%|███▌      | 102/286 [00:13<00:24,  7.38it/s]

Validation steps: 100 Loss: 0.8671383857727051


Training:  71%|███████   | 202/286 [00:27<00:11,  7.42it/s]

Validation steps: 200 Loss: 0.6860080361366272


Training: 100%|██████████| 286/286 [00:38<00:00,  7.38it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.84      0.83       965
          경제       0.83      0.84      0.83      1244
          사회       0.75      0.78      0.77      1472
        생활문화       0.93      0.88      0.90      1187
          세계       0.94      0.90      0.92      1526
         스포츠       0.95      0.99      0.97      1387
          정치       0.90      0.91      0.90      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.84145078 0.835209   0.78192935 0.87700084 0.89842726 0.98630137
 0.90518519]
VALID ACC : 0.8765743073047859, VALID LOSS : 0.8513236068762265
{'epoch': 1, 'train_loss': 1.3745669500213997, 'train_acc': 0.5757467896941654, 'valid_acc': 0.8765743073047859, 'val_loss': 0.8513236068762265, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1142 [00:00<08:13,  2.31it/s]

Training steps: 0 Loss: 0.9983851909637451


Training:   9%|▉         | 101/1142 [00:42<07:14,  2.39it/s]

Training steps: 100 Loss: 1.054282546043396


Training:  18%|█▊        | 201/1142 [01:24<06:25,  2.44it/s]

Training steps: 200 Loss: 1.0260775089263916


Training:  26%|██▋       | 301/1142 [02:06<05:42,  2.46it/s]

Training steps: 300 Loss: 1.1141738891601562


Training:  35%|███▌      | 401/1142 [02:48<05:32,  2.23it/s]

Training steps: 400 Loss: 1.0948976278305054


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.39it/s]

Training steps: 500 Loss: 1.286458134651184


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 1.0626122951507568


Training:  61%|██████▏   | 701/1142 [04:53<02:59,  2.45it/s]

Training steps: 700 Loss: 1.1856861114501953


Training:  70%|███████   | 801/1142 [05:36<02:34,  2.21it/s]

Training steps: 800 Loss: 0.970942497253418


Training:  79%|███████▉  | 901/1142 [06:17<01:40,  2.39it/s]

Training steps: 900 Loss: 1.19792640209198


Training:  88%|████████▊ | 1001/1142 [06:59<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.03737473487854


Training:  96%|█████████▋| 1101/1142 [07:41<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.1098332405090332


Training: 100%|██████████| 1142/1142 [07:59<00:00,  2.38it/s]

TRAIN ACC : 0.72806176929606, TRAIN LOSS : 1.1316511029224263



Training:   1%|          | 2/286 [00:00<00:41,  6.89it/s]

Validation steps: 0 Loss: 0.687444269657135


Training:  36%|███▌      | 102/286 [00:13<00:24,  7.38it/s]

Validation steps: 100 Loss: 0.807049036026001


Training:  71%|███████   | 202/286 [00:27<00:11,  7.38it/s]

Validation steps: 200 Loss: 0.6482462286949158


Training: 100%|██████████| 286/286 [00:38<00:00,  7.38it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.87      0.84       965
          경제       0.90      0.74      0.81      1244
          사회       0.68      0.85      0.76      1472
        생활문화       0.96      0.83      0.89      1187
          세계       0.92      0.93      0.92      1526
         스포츠       0.96      0.97      0.97      1387
          정치       0.93      0.87      0.90      1350

    accuracy                           0.87      9131
   macro avg       0.88      0.87      0.87      9131
weighted avg       0.88      0.87      0.87      9131

[0.87150259 0.73874598 0.85258152 0.82813816 0.93184797 0.9704398
 0.86888889]
VALID ACC : 0.8694557003614062, VALID LOSS : 0.8195259364751669
{'epoch': 2, 'train_loss': 1.1316511029224263, 'train_acc': 0.72806176929606, 'valid_acc': 0.8694557003614062, 'val_loss': 0.8195259364751669, 'learning_rate': 1e-05}
Start Training: Epoch 3



Training:   0%|          | 1/1142 [00:00<07:58,  2.38it/s]

Training steps: 0 Loss: 1.202979326248169


Training:   9%|▉         | 101/1142 [00:42<07:13,  2.40it/s]

Training steps: 100 Loss: 1.0218809843063354


Training:  18%|█▊        | 201/1142 [01:24<06:24,  2.45it/s]

Training steps: 200 Loss: 1.0580254793167114


Training:  26%|██▋       | 301/1142 [02:06<05:42,  2.46it/s]

Training steps: 300 Loss: 1.113777995109558


Training:  35%|███▌      | 401/1142 [02:48<05:33,  2.22it/s]

Training steps: 400 Loss: 1.1919556856155396


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.40it/s]

Training steps: 500 Loss: 1.171482801437378


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 1.0046265125274658


Training:  61%|██████▏   | 701/1142 [04:53<02:59,  2.45it/s]

Training steps: 700 Loss: 1.1318429708480835


Training:  70%|███████   | 801/1142 [05:36<02:33,  2.23it/s]

Training steps: 800 Loss: 1.04160475730896


Training:  79%|███████▉  | 901/1142 [06:17<01:40,  2.40it/s]

Training steps: 900 Loss: 1.1953767538070679


Training:  88%|████████▊ | 1001/1142 [06:59<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.0262956619262695


Training:  96%|█████████▋| 1101/1142 [07:41<00:16,  2.46it/s]

Training steps: 1100 Loss: 1.0265716314315796


Training: 100%|██████████| 1142/1142 [07:58<00:00,  2.38it/s]

TRAIN ACC : 0.7395613722859568, TRAIN LOSS : 1.110153541360344



Training:   1%|          | 2/286 [00:00<00:41,  6.93it/s]

Validation steps: 0 Loss: 0.6959016919136047


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.35it/s]

Validation steps: 100 Loss: 0.8322285413742065


Training:  71%|███████   | 202/286 [00:27<00:11,  7.37it/s]

Validation steps: 200 Loss: 0.6384927034378052


Training: 100%|██████████| 286/286 [00:38<00:00,  7.34it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85       965
          경제       0.89      0.76      0.82      1244
          사회       0.75      0.82      0.79      1472
        생활문화       0.93      0.89      0.91      1187
          세계       0.93      0.93      0.93      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.92      0.91      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.89      0.88      0.88      9131
weighted avg       0.89      0.88      0.88      9131

[0.89326425 0.7596463  0.82336957 0.89132266 0.92529489 0.98053353
 0.90592593]
VALID ACC : 0.8840214653378601, VALID LOSS : 0.801969164318138
{'epoch': 3, 'train_loss': 1.110153541360344, 'train_acc': 0.7395613722859568, 'valid_acc': 0.8840214653378601, 'val_loss': 0.801969164318138, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1142 [00:00<08:19,  2.29it/s]

Training steps: 0 Loss: 1.0734554529190063


Training:   9%|▉         | 101/1142 [00:42<07:14,  2.40it/s]

Training steps: 100 Loss: 0.9890831708908081


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.44it/s]

Training steps: 200 Loss: 1.1374119520187378


Training:  26%|██▋       | 301/1142 [02:06<05:42,  2.46it/s]

Training steps: 300 Loss: 1.0912259817123413


Training:  35%|███▌      | 401/1142 [02:48<05:33,  2.22it/s]

Training steps: 400 Loss: 1.2125357389450073


Training:  44%|████▍     | 501/1142 [03:30<04:28,  2.39it/s]

Training steps: 500 Loss: 1.0674868822097778


Training:  53%|█████▎    | 601/1142 [04:11<03:41,  2.45it/s]

Training steps: 600 Loss: 1.0332646369934082


Training:  61%|██████▏   | 701/1142 [04:53<02:59,  2.46it/s]

Training steps: 700 Loss: 1.2395989894866943


Training:  70%|███████   | 801/1142 [05:35<02:33,  2.23it/s]

Training steps: 800 Loss: 1.0197702646255493


Training:  79%|███████▉  | 901/1142 [06:17<01:40,  2.40it/s]

Training steps: 900 Loss: 1.018550992012024


Training:  88%|████████▊ | 1001/1142 [06:59<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.0117695331573486


Training:  96%|█████████▋| 1101/1142 [07:41<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.0996017456054688


Training: 100%|██████████| 1142/1142 [07:58<00:00,  2.39it/s]

TRAIN ACC : 0.7445171535744599, TRAIN LOSS : 1.0992416237365135



Training:   1%|          | 2/286 [00:00<00:41,  6.90it/s]

Validation steps: 0 Loss: 0.7441459894180298


Training:  36%|███▌      | 102/286 [00:13<00:24,  7.39it/s]

Validation steps: 100 Loss: 0.8501980304718018


Training:  71%|███████   | 202/286 [00:27<00:11,  7.36it/s]

Validation steps: 200 Loss: 0.6206143498420715


Training: 100%|██████████| 286/286 [00:38<00:00,  7.39it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.85      0.84       965
          경제       0.82      0.85      0.83      1244
          사회       0.80      0.78      0.79      1472
        생활문화       0.90      0.92      0.91      1187
          세계       0.93      0.92      0.92      1526
         스포츠       0.95      0.99      0.97      1387
          정치       0.94      0.87      0.90      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.85284974 0.84565916 0.77785326 0.91743892 0.91677588 0.98990627
 0.87407407]
VALID ACC : 0.8828167780089804, VALID LOSS : 0.8077766945312074
{'epoch': 4, 'train_loss': 1.0992416237365135, 'train_acc': 0.7445171535744599, 'valid_acc': 0.8828167780089804, 'val_loss': 0.8077766945312074, 'learning_rate': 1e-05}
Start Training: Epoch 5



Training:   0%|          | 1/1142 [00:00<07:58,  2.39it/s]

Training steps: 0 Loss: 1.2291418313980103


Training:   9%|▉         | 101/1142 [00:42<07:14,  2.39it/s]

Training steps: 100 Loss: 1.2200723886489868


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.1077466011047363


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 1.0937836170196533


Training:  35%|███▌      | 401/1142 [02:48<05:33,  2.22it/s]

Training steps: 400 Loss: 1.0540499687194824


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.39it/s]

Training steps: 500 Loss: 1.1197582483291626


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 1.0469311475753784


Training:  61%|██████▏   | 701/1142 [04:54<03:00,  2.44it/s]

Training steps: 700 Loss: 1.0446882247924805


Training:  70%|███████   | 801/1142 [05:36<02:33,  2.22it/s]

Training steps: 800 Loss: 1.0633577108383179


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.39it/s]

Training steps: 900 Loss: 1.1415700912475586


Training:  88%|████████▊ | 1001/1142 [07:00<00:57,  2.43it/s]

Training steps: 1000 Loss: 0.9971631169319153


Training:  96%|█████████▋| 1101/1142 [07:42<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.144512414932251


Training: 100%|██████████| 1142/1142 [08:00<00:00,  2.38it/s]

TRAIN ACC : 0.7585904772335241, TRAIN LOSS : 1.0869164694420301



Training:   1%|          | 2/286 [00:00<00:41,  6.88it/s]

Validation steps: 0 Loss: 0.6830787062644958


Training:  36%|███▌      | 102/286 [00:13<00:24,  7.36it/s]

Validation steps: 100 Loss: 0.81160569190979


Training:  71%|███████   | 202/286 [00:27<00:11,  7.37it/s]

Validation steps: 200 Loss: 0.6257688999176025


Training: 100%|██████████| 286/286 [00:38<00:00,  7.37it/s]

              precision    recall  f1-score   support

        IT과학       0.86      0.78      0.82       965
          경제       0.87      0.76      0.81      1244
          사회       0.71      0.83      0.77      1472
        생활문화       0.92      0.90      0.91      1187
          세계       0.91      0.94      0.92      1526
         스포츠       0.95      0.99      0.97      1387
          정치       0.94      0.88      0.91      1350

    accuracy                           0.87      9131
   macro avg       0.88      0.87      0.87      9131
weighted avg       0.88      0.87      0.87      9131

[0.78341969 0.76125402 0.83491848 0.89806234 0.93709043 0.98774333
 0.87925926]
VALID ACC : 0.8744934837367211, VALID LOSS : 0.7997109133463639
{'epoch': 5, 'train_loss': 1.0869164694420301, 'train_acc': 0.7585904772335241, 'valid_acc': 0.8744934837367211, 'val_loss': 0.7997109133463639, 'learning_rate': 1e-05}
Start Training: Epoch 6



Training:   0%|          | 1/1142 [00:00<07:59,  2.38it/s]

Training steps: 0 Loss: 1.1939574480056763


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.2497166395187378


Training:  18%|█▊        | 201/1142 [01:24<06:25,  2.44it/s]

Training steps: 200 Loss: 0.9555972218513489


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 0.9993033409118652


Training:  35%|███▌      | 401/1142 [02:48<05:33,  2.22it/s]

Training steps: 400 Loss: 1.1190208196640015


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.39it/s]

Training steps: 500 Loss: 1.023155689239502


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 1.0377753973007202


Training:  61%|██████▏   | 701/1142 [04:54<02:59,  2.45it/s]

Training steps: 700 Loss: 1.1620732545852661


Training:  70%|███████   | 801/1142 [05:36<02:33,  2.22it/s]

Training steps: 800 Loss: 1.1100821495056152


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.39it/s]

Training steps: 900 Loss: 1.1011747121810913


Training:  88%|████████▊ | 1001/1142 [07:00<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.2539339065551758


Training:  96%|█████████▋| 1101/1142 [07:42<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.0277971029281616


Training: 100%|██████████| 1142/1142 [08:00<00:00,  2.38it/s]

TRAIN ACC : 0.7627248583084631, TRAIN LOSS : 1.0787547434274036



Training:   1%|          | 2/286 [00:00<00:40,  6.95it/s]

Validation steps: 0 Loss: 0.7617617845535278


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.36it/s]

Validation steps: 100 Loss: 0.8543241024017334


Training:  71%|███████   | 202/286 [00:27<00:11,  7.36it/s]

Validation steps: 200 Loss: 0.620648980140686


Training: 100%|██████████| 286/286 [00:38<00:00,  7.36it/s]

              precision    recall  f1-score   support

        IT과학       0.78      0.93      0.85       965
          경제       0.88      0.77      0.82      1244
          사회       0.76      0.79      0.78      1472
        생활문화       0.89      0.93      0.91      1187
          세계       0.95      0.89      0.92      1526
         스포츠       0.95      0.98      0.97      1387
          정치       0.93      0.88      0.90      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.93160622 0.77250804 0.79211957 0.93091828 0.8938401  0.98485941
 0.87555556]
VALID ACC : 0.8808454714708137, VALID LOSS : 0.8108034715369031
{'epoch': 6, 'train_loss': 1.0787547434274036, 'train_acc': 0.7627248583084631, 'valid_acc': 0.8808454714708137, 'val_loss': 0.8108034715369031, 'learning_rate': 1e-05}
Start Training: Epoch 7



Training:   0%|          | 1/1142 [00:00<08:02,  2.37it/s]

Training steps: 0 Loss: 1.148687720298767


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.0358229875564575


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.43it/s]

Training steps: 200 Loss: 0.9879999160766602


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.0683315992355347


Training:  35%|███▌      | 401/1142 [02:48<05:34,  2.22it/s]

Training steps: 400 Loss: 1.0925862789154053


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.39it/s]

Training steps: 500 Loss: 1.2056725025177002


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 1.080783724784851


Training:  61%|██████▏   | 701/1142 [04:54<02:59,  2.45it/s]

Training steps: 700 Loss: 1.0444458723068237


Training:  70%|███████   | 801/1142 [05:36<02:34,  2.21it/s]

Training steps: 800 Loss: 0.9657320976257324


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.39it/s]

Training steps: 900 Loss: 1.1892296075820923


Training:  88%|████████▊ | 1001/1142 [07:00<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.0968166589736938


Training:  96%|█████████▋| 1101/1142 [07:42<00:16,  2.45it/s]

Training steps: 1100 Loss: 0.9873191714286804


Training: 100%|██████████| 1142/1142 [08:00<00:00,  2.38it/s]

TRAIN ACC : 0.7643950387427101, TRAIN LOSS : 1.0721429611985196



Training:   1%|          | 2/286 [00:00<00:41,  6.93it/s]

Validation steps: 0 Loss: 0.6686868667602539


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.31it/s]

Validation steps: 100 Loss: 0.8613244295120239


Training:  71%|███████   | 202/286 [00:27<00:11,  7.25it/s]

Validation steps: 200 Loss: 0.607044517993927


Training: 100%|██████████| 286/286 [00:39<00:00,  7.31it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.88      0.85       965
          경제       0.86      0.80      0.83      1244
          사회       0.79      0.78      0.79      1472
        생활문화       0.92      0.90      0.91      1187
          세계       0.91      0.94      0.92      1526
         스포츠       0.95      0.99      0.97      1387
          정치       0.92      0.89      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.88186528 0.80466238 0.78328804 0.89721988 0.93774574 0.98918529
 0.89333333]
VALID ACC : 0.8847880845471471, VALID LOSS : 0.8059143217710348
{'epoch': 7, 'train_loss': 1.0721429611985196, 'train_acc': 0.7643950387427101, 'valid_acc': 0.8847880845471471, 'val_loss': 0.8059143217710348, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 8


Training:   0%|          | 1/1142 [00:00<08:23,  2.27it/s]

Training steps: 0 Loss: 1.161171793937683


Training:   9%|▉         | 101/1142 [00:42<07:14,  2.39it/s]

Training steps: 100 Loss: 1.1798619031906128


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.43it/s]

Training steps: 200 Loss: 0.8390673398971558


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 1.1021442413330078


Training:  35%|███▌      | 401/1142 [02:48<05:33,  2.22it/s]

Training steps: 400 Loss: 1.141762375831604


Training:  44%|████▍     | 501/1142 [03:30<04:28,  2.39it/s]

Training steps: 500 Loss: 1.047084927558899


Training:  53%|█████▎    | 601/1142 [04:12<03:42,  2.43it/s]

Training steps: 600 Loss: 1.0838998556137085


Training:  61%|██████▏   | 701/1142 [04:54<03:00,  2.45it/s]

Training steps: 700 Loss: 1.0908221006393433


Training:  70%|███████   | 801/1142 [05:36<02:34,  2.21it/s]

Training steps: 800 Loss: 1.026660442352295


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.39it/s]

Training steps: 900 Loss: 0.9921618103981018


Training:  88%|████████▊ | 1001/1142 [07:00<00:57,  2.43it/s]

Training steps: 1000 Loss: 1.093803882598877


Training:  96%|█████████▋| 1101/1142 [07:42<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.116465449333191


Training: 100%|██████████| 1142/1142 [08:00<00:00,  2.38it/s]

TRAIN ACC : 0.7717602606576678, TRAIN LOSS : 1.0658986361243887



Training:   1%|          | 2/286 [00:00<00:41,  6.91it/s]

Validation steps: 0 Loss: 0.6743261814117432


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.32it/s]

Validation steps: 100 Loss: 0.8344399333000183


Training:  71%|███████   | 202/286 [00:27<00:11,  7.37it/s]

Validation steps: 200 Loss: 0.6201891899108887


Training: 100%|██████████| 286/286 [00:38<00:00,  7.36it/s]

              precision    recall  f1-score   support

        IT과학       0.84      0.81      0.82       965
          경제       0.88      0.77      0.82      1244
          사회       0.73      0.82      0.77      1472
        생활문화       0.93      0.89      0.91      1187
          세계       0.91      0.93      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.92      0.90      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.87      0.87      9131
weighted avg       0.88      0.88      0.88      9131

[0.80932642 0.77250804 0.81725543 0.89216512 0.9344692  0.98269647
 0.8962963 ]
VALID ACC : 0.8764647902748878, VALID LOSS : 0.8090493977069855
{'epoch': 8, 'train_loss': 1.0658986361243887, 'train_acc': 0.7717602606576678, 'valid_acc': 0.8764647902748878, 'val_loss': 0.8090493977069855, 'learning_rate': 1e-05}
Start Training: Epoch 9



Training:   0%|          | 1/1142 [00:00<07:57,  2.39it/s]

Training steps: 0 Loss: 1.1059287786483765


Training:   9%|▉         | 101/1142 [00:42<07:15,  2.39it/s]

Training steps: 100 Loss: 1.1203501224517822


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.44it/s]

Training steps: 200 Loss: 0.9824450016021729


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 1.0944026708602905


Training:  35%|███▌      | 401/1142 [02:48<05:32,  2.23it/s]

Training steps: 400 Loss: 0.9687995910644531


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.40it/s]

Training steps: 500 Loss: 1.054253101348877


Training:  53%|█████▎    | 601/1142 [04:12<03:42,  2.43it/s]

Training steps: 600 Loss: 1.064218282699585


Training:  61%|██████▏   | 701/1142 [04:54<03:00,  2.44it/s]

Training steps: 700 Loss: 1.0943609476089478


Training:  70%|███████   | 801/1142 [05:36<02:33,  2.23it/s]

Training steps: 800 Loss: 0.9415847659111023


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.39it/s]

Training steps: 900 Loss: 0.9357951283454895


Training:  88%|████████▊ | 1001/1142 [07:00<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.0598253011703491


Training:  96%|█████████▋| 1101/1142 [07:42<00:16,  2.45it/s]

Training steps: 1100 Loss: 0.9982209205627441


Training: 100%|██████████| 1142/1142 [08:00<00:00,  2.38it/s]

TRAIN ACC : 0.771732880650549, TRAIN LOSS : 1.0649629857410692



Training:   1%|          | 2/286 [00:00<00:42,  6.75it/s]

Validation steps: 0 Loss: 0.6901550889015198


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.34it/s]

Validation steps: 100 Loss: 0.8307098150253296


Training:  71%|███████   | 202/286 [00:27<00:11,  7.31it/s]

Validation steps: 200 Loss: 0.6039970517158508


Training: 100%|██████████| 286/286 [00:38<00:00,  7.36it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.83      0.83       965
          경제       0.82      0.83      0.83      1244
          사회       0.79      0.76      0.78      1472
        생활문화       0.91      0.91      0.91      1187
          세계       0.92      0.93      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.91      0.91      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.83316062 0.83118971 0.76494565 0.91069924 0.92529489 0.97981255
 0.91111111]
VALID ACC : 0.8811740225605081, VALID LOSS : 0.8170427315301828
{'epoch': 9, 'train_loss': 1.0649629857410692, 'train_acc': 0.771732880650549, 'valid_acc': 0.8811740225605081, 'val_loss': 0.8170427315301828, 'learning_rate': 1e-05}
Start Training: Epoch 10



Training:   0%|          | 1/1142 [00:00<08:00,  2.37it/s]

Training steps: 0 Loss: 1.1782355308532715


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.066666603088379


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.44it/s]

Training steps: 200 Loss: 1.0337536334991455


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.44it/s]

Training steps: 300 Loss: 1.0649504661560059


Training:  35%|███▌      | 401/1142 [02:48<05:33,  2.22it/s]

Training steps: 400 Loss: 0.9815161824226379


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.40it/s]

Training steps: 500 Loss: 1.0299159288406372


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 1.0287775993347168


Training:  61%|██████▏   | 701/1142 [04:54<03:00,  2.45it/s]

Training steps: 700 Loss: 1.0231066942214966


Training:  70%|███████   | 801/1142 [05:36<02:33,  2.23it/s]

Training steps: 800 Loss: 1.0815848112106323


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.40it/s]

Training steps: 900 Loss: 1.1291391849517822


Training:  88%|████████▊ | 1001/1142 [07:00<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.073215126991272


Training:  96%|█████████▋| 1101/1142 [07:42<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.03058922290802


Training: 100%|██████████| 1142/1142 [07:59<00:00,  2.38it/s]

TRAIN ACC : 0.7822741833912877, TRAIN LOSS : 1.0523969793069174



Training:   1%|          | 2/286 [00:00<00:40,  6.99it/s]

Validation steps: 0 Loss: 0.7654987573623657


Training:  36%|███▌      | 102/286 [00:13<00:24,  7.37it/s]

Validation steps: 100 Loss: 0.8446341753005981


Training:  71%|███████   | 202/286 [00:27<00:11,  7.35it/s]

Validation steps: 200 Loss: 0.6084038019180298


Training: 100%|██████████| 286/286 [00:38<00:00,  7.38it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85       965
          경제       0.83      0.82      0.82      1244
          사회       0.77      0.79      0.78      1472
        생활문화       0.90      0.92      0.91      1187
          세계       0.95      0.89      0.92      1526
         스포츠       0.95      0.99      0.97      1387
          정치       0.93      0.87      0.90      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.88704663 0.81511254 0.78532609 0.92080876 0.8938401  0.98558039
 0.86962963]
VALID ACC : 0.8787646479027489, VALID LOSS : 0.8178522632672236
{'epoch': 10, 'train_loss': 1.0523969793069174, 'train_acc': 0.7822741833912877, 'valid_acc': 0.8787646479027489, 'val_loss': 0.8178522632672236, 'learning_rate': 1e-05}
Start Training: Epoch 11



Training:   0%|          | 1/1142 [00:00<07:56,  2.40it/s]

Training steps: 0 Loss: 1.1789538860321045


Training:   9%|▉         | 101/1142 [00:42<07:14,  2.40it/s]

Training steps: 100 Loss: 0.9866645336151123


Training:  18%|█▊        | 201/1142 [01:24<06:25,  2.44it/s]

Training steps: 200 Loss: 1.0345321893692017


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 1.1291053295135498


Training:  35%|███▌      | 401/1142 [02:48<05:33,  2.22it/s]

Training steps: 400 Loss: 1.045975685119629


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.39it/s]

Training steps: 500 Loss: 1.14761221408844


Training:  53%|█████▎    | 601/1142 [04:11<03:41,  2.44it/s]

Training steps: 600 Loss: 1.1289955377578735


Training:  61%|██████▏   | 701/1142 [04:53<02:59,  2.45it/s]

Training steps: 700 Loss: 1.116148829460144


Training:  70%|███████   | 801/1142 [05:35<02:33,  2.23it/s]

Training steps: 800 Loss: 1.2207266092300415


Training:  79%|███████▉  | 901/1142 [06:17<01:40,  2.40it/s]

Training steps: 900 Loss: 1.026496171951294


Training:  88%|████████▊ | 1001/1142 [06:59<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.189889669418335


Training:  96%|█████████▋| 1101/1142 [07:41<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.034438133239746


Training: 100%|██████████| 1142/1142 [07:58<00:00,  2.38it/s]

TRAIN ACC : 0.784026503846891, TRAIN LOSS : 1.0518253637696315



Training:   1%|          | 2/286 [00:00<00:41,  6.90it/s]

Validation steps: 0 Loss: 0.7114232778549194


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.36it/s]

Validation steps: 100 Loss: 0.8536045551300049


Training:  71%|███████   | 202/286 [00:27<00:11,  7.33it/s]

Validation steps: 200 Loss: 0.5999807715415955


Training: 100%|██████████| 286/286 [00:38<00:00,  7.38it/s]

              precision    recall  f1-score   support

        IT과학       0.85      0.82      0.83       965
          경제       0.81      0.84      0.83      1244
          사회       0.78      0.77      0.78      1472
        생활문화       0.92      0.89      0.91      1187
          세계       0.92      0.92      0.92      1526
         스포츠       0.95      0.99      0.97      1387
          정치       0.91      0.90      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.81865285 0.84485531 0.77309783 0.89300758 0.92267366 0.98702235
 0.90296296]
VALID ACC : 0.8799693352316286, VALID LOSS : 0.8208200291736977
{'epoch': 11, 'train_loss': 1.0518253637696315, 'train_acc': 0.784026503846891, 'valid_acc': 0.8799693352316286, 'val_loss': 0.8208200291736977, 'learning_rate': 1e-05}
Start Training: Epoch 12



Training:   0%|          | 1/1142 [00:00<07:55,  2.40it/s]

Training steps: 0 Loss: 0.9756888151168823


Training:   9%|▉         | 101/1142 [00:42<07:14,  2.40it/s]

Training steps: 100 Loss: 1.0547581911087036


Training:  18%|█▊        | 201/1142 [01:24<06:25,  2.44it/s]

Training steps: 200 Loss: 1.1652449369430542


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 0.9988837242126465


Training:  35%|███▌      | 401/1142 [02:48<05:32,  2.23it/s]

Training steps: 400 Loss: 1.0788971185684204


Training:  44%|████▍     | 501/1142 [03:30<04:27,  2.40it/s]

Training steps: 500 Loss: 1.0761820077896118


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.45it/s]

Training steps: 600 Loss: 1.0268208980560303


Training:  61%|██████▏   | 701/1142 [04:53<02:59,  2.46it/s]

Training steps: 700 Loss: 1.0562961101531982


Training:  70%|███████   | 801/1142 [05:36<02:33,  2.22it/s]

Training steps: 800 Loss: 1.0785880088806152


Training:  79%|███████▉  | 901/1142 [06:18<01:40,  2.39it/s]

Training steps: 900 Loss: 1.2183154821395874


Training:  88%|████████▊ | 1001/1142 [06:59<00:57,  2.44it/s]

Training steps: 1000 Loss: 1.0314123630523682


Training:  96%|█████████▋| 1101/1142 [07:41<00:16,  2.46it/s]

Training steps: 1100 Loss: 0.9940925240516663


Training: 100%|██████████| 1142/1142 [07:59<00:00,  2.38it/s]

TRAIN ACC : 0.7822741833912877, TRAIN LOSS : 1.0510262013421168



Training:   1%|          | 2/286 [00:00<00:41,  6.93it/s]

Validation steps: 0 Loss: 0.6753327250480652


Training:  36%|███▌      | 102/286 [00:13<00:24,  7.38it/s]

Validation steps: 100 Loss: 0.83404541015625


Training:  71%|███████   | 202/286 [00:27<00:11,  7.40it/s]

Validation steps: 200 Loss: 0.601109504699707


Training: 100%|██████████| 286/286 [00:38<00:00,  7.38it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.85      0.84       965
          경제       0.84      0.81      0.83      1244
          사회       0.78      0.78      0.78      1472
        생활문화       0.92      0.91      0.91      1187
          세계       0.91      0.93      0.92      1526
         스포츠       0.95      0.99      0.97      1387
          정치       0.93      0.89      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.85388601 0.81109325 0.78125    0.9115417  0.93250328 0.98774333
 0.88740741]
VALID ACC : 0.8822691928594897, VALID LOSS : 0.8161517740129591
{'epoch': 12, 'train_loss': 1.0510262013421168, 'train_acc': 0.7822741833912877, 'valid_acc': 0.8822691928594897, 'val_loss': 0.8161517740129591, 'learning_rate': 1e-05}
EarlyStopping counter: 5 out of 5


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'cla

Start Training: Epoch 1


Training:   0%|          | 1/1142 [00:00<08:05,  2.35it/s]

Training steps: 0 Loss: 1.9324411153793335


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.9550044536590576


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.43it/s]

Training steps: 200 Loss: 1.697466492652893


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.5274080038070679


Training:  35%|███▌      | 401/1142 [02:49<05:33,  2.22it/s]

Training steps: 400 Loss: 1.343903660774231


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.38it/s]

Training steps: 500 Loss: 1.3310291767120361


Training:  53%|█████▎    | 601/1142 [04:13<03:43,  2.42it/s]

Training steps: 600 Loss: 1.1378328800201416


Training:  61%|██████▏   | 701/1142 [04:55<03:01,  2.43it/s]

Training steps: 700 Loss: 1.1548824310302734


Training:  70%|███████   | 801/1142 [05:38<02:34,  2.20it/s]

Training steps: 800 Loss: 1.2494014501571655


Training:  79%|███████▉  | 901/1142 [06:20<01:41,  2.38it/s]

Training steps: 900 Loss: 1.1231486797332764


Training:  88%|████████▊ | 1001/1142 [07:02<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.159562349319458


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.1684324741363525


Training: 100%|██████████| 1142/1142 [08:02<00:00,  2.37it/s]

TRAIN ACC : 0.5717219286477014, TRAIN LOSS : 1.37181448832076



Training:   1%|          | 2/286 [00:00<00:41,  6.76it/s]

Validation steps: 0 Loss: 0.881833553314209


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.23it/s]

Validation steps: 100 Loss: 0.8554469347000122


Training:  71%|███████   | 202/286 [00:28<00:11,  7.24it/s]

Validation steps: 200 Loss: 0.7136609554290771


Training: 100%|██████████| 286/286 [00:39<00:00,  7.23it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.87      0.84       964
          경제       0.91      0.75      0.83      1245
          사회       0.71      0.82      0.76      1473
        생활문화       0.93      0.84      0.88      1186
          세계       0.94      0.88      0.91      1526
         스포츠       0.94      0.99      0.96      1387
          정치       0.88      0.93      0.90      1350

    accuracy                           0.87      9131
   macro avg       0.88      0.87      0.87      9131
weighted avg       0.88      0.87      0.87      9131

[0.86825726 0.75421687 0.81873727 0.83642496 0.88335518 0.98630137
 0.92740741]
VALID ACC : 0.8697842514511006, VALID LOSS : 0.8527725905805201
{'epoch': 1, 'train_loss': 1.37181448832076, 'train_acc': 0.5717219286477014, 'valid_acc': 0.8697842514511006, 'val_loss': 0.8527725905805201, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1142 [00:00<08:28,  2.24it/s]

Training steps: 0 Loss: 1.0595736503601074


Training:   9%|▉         | 101/1142 [00:42<07:14,  2.39it/s]

Training steps: 100 Loss: 1.192981481552124


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.43it/s]

Training steps: 200 Loss: 1.1652714014053345


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 1.1451408863067627


Training:  35%|███▌      | 401/1142 [02:48<05:35,  2.21it/s]

Training steps: 400 Loss: 1.0829823017120361


Training:  44%|████▍     | 501/1142 [03:30<04:28,  2.39it/s]

Training steps: 500 Loss: 1.1016936302185059


Training:  53%|█████▎    | 601/1142 [04:12<03:41,  2.44it/s]

Training steps: 600 Loss: 1.2367393970489502


Training:  61%|██████▏   | 701/1142 [04:54<03:00,  2.44it/s]

Training steps: 700 Loss: 1.125901222229004


Training:  70%|███████   | 801/1142 [05:37<02:34,  2.21it/s]

Training steps: 800 Loss: 1.033016562461853


Training:  79%|███████▉  | 901/1142 [06:19<01:40,  2.39it/s]

Training steps: 900 Loss: 1.2067071199417114


Training:  88%|████████▊ | 1001/1142 [07:01<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.1265205144882202


Training:  96%|█████████▋| 1101/1142 [07:43<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.1374707221984863


Training: 100%|██████████| 1142/1142 [08:00<00:00,  2.37it/s]

TRAIN ACC : 0.7262546888262191, TRAIN LOSS : 1.136183891738985



Training:   1%|          | 2/286 [00:00<00:41,  6.86it/s]

Validation steps: 0 Loss: 0.8179359436035156


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.18it/s]

Validation steps: 100 Loss: 0.8305918574333191


Training:  71%|███████   | 202/286 [00:28<00:11,  7.24it/s]

Validation steps: 200 Loss: 0.6764821410179138


Training: 100%|██████████| 286/286 [00:39<00:00,  7.24it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.85      0.84       964
          경제       0.90      0.79      0.84      1245
          사회       0.72      0.84      0.77      1473
        생활문화       0.92      0.85      0.89      1186
          세계       0.94      0.89      0.92      1526
         스포츠       0.95      0.98      0.96      1387
          정치       0.91      0.91      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.87      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.84751037 0.7935743  0.83910387 0.85160202 0.89121887 0.9776496
 0.91259259]
VALID ACC : 0.8760267221552952, VALID LOSS : 0.820665661896859
{'epoch': 2, 'train_loss': 1.136183891738985, 'train_acc': 0.7262546888262191, 'valid_acc': 0.8760267221552952, 'val_loss': 0.820665661896859, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1142 [00:00<08:24,  2.26it/s]

Training steps: 0 Loss: 1.0011963844299316


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.38it/s]

Training steps: 100 Loss: 1.1217573881149292


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.0776145458221436


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.0921767950057983


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.1521934270858765


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.39it/s]

Training steps: 500 Loss: 1.2358652353286743


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.1753515005111694


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.0575121641159058


Training:  70%|███████   | 801/1142 [05:37<02:34,  2.21it/s]

Training steps: 800 Loss: 1.246276617050171


Training:  79%|███████▉  | 901/1142 [06:19<01:41,  2.38it/s]

Training steps: 900 Loss: 1.0976982116699219


Training:  88%|████████▊ | 1001/1142 [07:01<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.037825107574463


Training:  96%|█████████▋| 1101/1142 [07:43<00:16,  2.45it/s]

Training steps: 1100 Loss: 0.9954817891120911


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.7330723105988007, TRAIN LOSS : 1.1155168553471775



Training:   1%|          | 2/286 [00:00<00:42,  6.75it/s]

Validation steps: 0 Loss: 0.8027603626251221


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.33it/s]

Validation steps: 100 Loss: 0.8128278255462646


Training:  71%|███████   | 202/286 [00:27<00:11,  7.33it/s]

Validation steps: 200 Loss: 0.6462736129760742


Training: 100%|██████████| 286/286 [00:39<00:00,  7.32it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.86      0.85       964
          경제       0.93      0.73      0.82      1245
          사회       0.71      0.86      0.78      1473
        생활문화       0.92      0.88      0.90      1186
          세계       0.93      0.92      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.93      0.89      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.89      0.88      0.88      9131
weighted avg       0.89      0.88      0.88      9131

[0.86410788 0.73253012 0.8628649  0.88111298 0.91874181 0.97548666
 0.89259259]
VALID ACC : 0.8784360968130545, VALID LOSS : 0.8048730921912026
{'epoch': 3, 'train_loss': 1.1155168553471775, 'train_acc': 0.7330723105988007, 'valid_acc': 0.8784360968130545, 'val_loss': 0.8048730921912026, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1142 [00:00<08:24,  2.26it/s]

Training steps: 0 Loss: 1.1743820905685425


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.38it/s]

Training steps: 100 Loss: 1.2357901334762573


Training:  18%|█▊        | 201/1142 [01:24<06:28,  2.42it/s]

Training steps: 200 Loss: 1.263724684715271


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.2048155069351196


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.073651909828186


Training:  44%|████▍     | 501/1142 [03:31<04:29,  2.38it/s]

Training steps: 500 Loss: 1.1151049137115479


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.1813868284225464


Training:  61%|██████▏   | 701/1142 [04:55<03:01,  2.43it/s]

Training steps: 700 Loss: 1.1740199327468872


Training:  70%|███████   | 801/1142 [05:37<02:33,  2.22it/s]

Training steps: 800 Loss: 1.0878238677978516


Training:  79%|███████▉  | 901/1142 [06:19<01:41,  2.39it/s]

Training steps: 900 Loss: 1.0186835527420044


Training:  88%|████████▊ | 1001/1142 [07:01<00:57,  2.43it/s]

Training steps: 1000 Loss: 1.0756498575210571


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.032510757446289


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.746296854037182, TRAIN LOSS : 1.1015532960929302



Training:   1%|          | 2/286 [00:00<00:41,  6.83it/s]

Validation steps: 0 Loss: 0.7962388396263123


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.36it/s]

Validation steps: 100 Loss: 0.7888989448547363


Training:  71%|███████   | 202/286 [00:27<00:11,  7.33it/s]

Validation steps: 200 Loss: 0.6248399019241333


Training: 100%|██████████| 286/286 [00:38<00:00,  7.34it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.88      0.85       964
          경제       0.90      0.81      0.85      1245
          사회       0.79      0.78      0.78      1473
        생활문화       0.91      0.88      0.90      1186
          세계       0.92      0.92      0.92      1526
         스포츠       0.95      0.98      0.97      1387
          정치       0.90      0.93      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.89      0.89      0.89      9131

[0.88174274 0.81124498 0.78411405 0.8836425  0.92070773 0.98197549
 0.92962963]
VALID ACC : 0.8854451867265359, VALID LOSS : 0.8016892625735357
{'epoch': 4, 'train_loss': 1.1015532960929302, 'train_acc': 0.746296854037182, 'valid_acc': 0.8854451867265359, 'val_loss': 0.8016892625735357, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1142 [00:00<08:27,  2.25it/s]

Training steps: 0 Loss: 1.0463120937347412


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.0632212162017822


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.1676751375198364


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 0.9275772571563721


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.093467354774475


Training:  44%|████▍     | 501/1142 [03:31<04:30,  2.37it/s]

Training steps: 500 Loss: 0.9241899251937866


Training:  53%|█████▎    | 601/1142 [04:13<03:44,  2.41it/s]

Training steps: 600 Loss: 1.1131376028060913


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 0.8893595337867737


Training:  70%|███████   | 801/1142 [05:38<02:34,  2.20it/s]

Training steps: 800 Loss: 1.0275583267211914


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.0968358516693115


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.0930650234222412


Training:  96%|█████████▋| 1101/1142 [07:45<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.1815710067749023


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7498836349697451, TRAIN LOSS : 1.090719650902389



Training:   1%|          | 2/286 [00:00<00:41,  6.82it/s]

Validation steps: 0 Loss: 0.8056249618530273


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.20it/s]

Validation steps: 100 Loss: 0.7873870730400085


Training:  71%|███████   | 202/286 [00:28<00:11,  7.13it/s]

Validation steps: 200 Loss: 0.619070827960968


Training: 100%|██████████| 286/286 [00:39<00:00,  7.19it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.87      0.85       964
          경제       0.86      0.84      0.85      1245
          사회       0.79      0.81      0.80      1473
        생활문화       0.91      0.89      0.90      1186
          세계       0.93      0.91      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.93      0.91      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.86618257 0.84497992 0.8071962  0.89123103 0.9095675  0.97692862
 0.90666667]
VALID ACC : 0.8870879421750082, VALID LOSS : 0.800721858556454
{'epoch': 5, 'train_loss': 1.090719650902389, 'train_acc': 0.7498836349697451, 'valid_acc': 0.8870879421750082, 'val_loss': 0.800721858556454, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 6


Training:   0%|          | 1/1142 [00:00<08:17,  2.29it/s]

Training steps: 0 Loss: 1.1588844060897827


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.37it/s]

Training steps: 100 Loss: 1.035374641418457


Training:  18%|█▊        | 201/1142 [01:25<06:28,  2.42it/s]

Training steps: 200 Loss: 1.1557896137237549


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.44it/s]

Training steps: 300 Loss: 1.017505168914795


Training:  35%|███▌      | 401/1142 [02:49<05:37,  2.20it/s]

Training steps: 400 Loss: 1.086267352104187


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.097821831703186


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.1179641485214233


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.1089836359024048


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 1.0456432104110718


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.38it/s]

Training steps: 900 Loss: 1.1370984315872192


Training:  88%|████████▊ | 1001/1142 [07:04<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.2088385820388794


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.149086356163025


Training: 100%|██████████| 1142/1142 [08:04<00:00,  2.36it/s]

TRAIN ACC : 0.7570298168277524, TRAIN LOSS : 1.0822543716368032



Training:   1%|          | 2/286 [00:00<00:41,  6.83it/s]

Validation steps: 0 Loss: 0.7666959762573242


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.18it/s]

Validation steps: 100 Loss: 0.7758896350860596


Training:  71%|███████   | 202/286 [00:28<00:11,  7.16it/s]

Validation steps: 200 Loss: 0.6204308867454529


Training: 100%|██████████| 286/286 [00:39<00:00,  7.18it/s]

              precision    recall  f1-score   support

        IT과학       0.84      0.84      0.84       964
          경제       0.89      0.81      0.85      1245
          사회       0.74      0.84      0.78      1473
        생활문화       0.92      0.86      0.89      1186
          세계       0.92      0.92      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.93      0.90      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.89      0.88      0.88      9131
weighted avg       0.89      0.88      0.88      9131

[0.84024896 0.81445783 0.8377461  0.85834739 0.92201835 0.97909156
 0.89925926]
VALID ACC : 0.8821596758295915, VALID LOSS : 0.7953488958048653
{'epoch': 6, 'train_loss': 1.0822543716368032, 'train_acc': 0.7570298168277524, 'valid_acc': 0.8821596758295915, 'val_loss': 0.7953488958048653, 'learning_rate': 1e-05}
Start Training: Epoch 7



Training:   0%|          | 1/1142 [00:00<08:01,  2.37it/s]

Training steps: 0 Loss: 1.0734553337097168


Training:   9%|▉         | 101/1142 [00:42<07:19,  2.37it/s]

Training steps: 100 Loss: 1.103520393371582


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 1.0085625648498535


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.42it/s]

Training steps: 300 Loss: 0.9164629578590393


Training:  35%|███▌      | 401/1142 [02:50<05:38,  2.19it/s]

Training steps: 400 Loss: 1.0834860801696777


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.1439541578292847


Training:  53%|█████▎    | 601/1142 [04:14<03:44,  2.40it/s]

Training steps: 600 Loss: 1.105981707572937


Training:  61%|██████▏   | 701/1142 [04:57<03:01,  2.43it/s]

Training steps: 700 Loss: 1.002479910850525


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 1.0872485637664795


Training:  79%|███████▉  | 901/1142 [06:22<01:41,  2.37it/s]

Training steps: 900 Loss: 1.0791549682617188


Training:  88%|████████▊ | 1001/1142 [07:04<00:58,  2.41it/s]

Training steps: 1000 Loss: 0.9709848165512085


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.42it/s]

Training steps: 1100 Loss: 0.9999435544013977


Training: 100%|██████████| 1142/1142 [08:04<00:00,  2.36it/s]

TRAIN ACC : 0.7636283985433836, TRAIN LOSS : 1.0734047904323571



Training:   1%|          | 2/286 [00:00<00:42,  6.75it/s]

Validation steps: 0 Loss: 0.7681818008422852


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.18it/s]

Validation steps: 100 Loss: 0.8018758296966553


Training:  71%|███████   | 202/286 [00:28<00:11,  7.17it/s]

Validation steps: 200 Loss: 0.6096216440200806


Training: 100%|██████████| 286/286 [00:39<00:00,  7.17it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.87      0.85       964
          경제       0.89      0.81      0.85      1245
          사회       0.76      0.81      0.78      1473
        생활문화       0.91      0.88      0.89      1186
          세계       0.92      0.92      0.92      1526
         스포츠       0.97      0.97      0.97      1387
          정치       0.92      0.90      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.87033195 0.80803213 0.81398506 0.88026981 0.92398427 0.96899784
 0.90148148]
VALID ACC : 0.8825977439491841, VALID LOSS : 0.7990786918810198
{'epoch': 7, 'train_loss': 1.0734047904323571, 'train_acc': 0.7636283985433836, 'valid_acc': 0.8825977439491841, 'val_loss': 0.7990786918810198, 'learning_rate': 1e-05}
Start Training: Epoch 8



Training:   0%|          | 1/1142 [00:00<08:01,  2.37it/s]

Training steps: 0 Loss: 0.972973108291626


Training:   9%|▉         | 101/1142 [00:42<07:20,  2.36it/s]

Training steps: 100 Loss: 1.0224263668060303


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 1.1639305353164673


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.43it/s]

Training steps: 300 Loss: 1.1446619033813477


Training:  35%|███▌      | 401/1142 [02:50<05:38,  2.19it/s]

Training steps: 400 Loss: 1.1006888151168823


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.0540704727172852


Training:  53%|█████▎    | 601/1142 [04:14<03:44,  2.41it/s]

Training steps: 600 Loss: 1.101085901260376


Training:  61%|██████▏   | 701/1142 [04:57<03:01,  2.42it/s]

Training steps: 700 Loss: 1.1319208145141602


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.19it/s]

Training steps: 800 Loss: 0.9222059845924377


Training:  79%|███████▉  | 901/1142 [06:22<01:41,  2.37it/s]

Training steps: 900 Loss: 1.1180853843688965


Training:  88%|████████▊ | 1001/1142 [07:04<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.0729901790618896


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.0492016077041626


Training: 100%|██████████| 1142/1142 [08:04<00:00,  2.36it/s]

TRAIN ACC : 0.7641486186786408, TRAIN LOSS : 1.069763832355339



Training:   1%|          | 2/286 [00:00<00:43,  6.60it/s]

Validation steps: 0 Loss: 0.7771922945976257


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.13it/s]

Validation steps: 100 Loss: 0.8033353090286255


Training:  71%|███████   | 202/286 [00:28<00:11,  7.21it/s]

Validation steps: 200 Loss: 0.6268274188041687


Training: 100%|██████████| 286/286 [00:39<00:00,  7.18it/s]

              precision    recall  f1-score   support

        IT과학       0.86      0.81      0.84       964
          경제       0.87      0.83      0.85      1245
          사회       0.74      0.83      0.79      1473
        생활문화       0.91      0.88      0.89      1186
          세계       0.93      0.91      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.92      0.91      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.89      0.88      0.88      9131
weighted avg       0.89      0.88      0.88      9131

[0.80809129 0.83212851 0.83299389 0.8777403  0.9102228  0.98341745
 0.90666667]
VALID ACC : 0.8827072609790823, VALID LOSS : 0.8027889322150837
{'epoch': 8, 'train_loss': 1.069763832355339, 'train_acc': 0.7641486186786408, 'valid_acc': 0.8827072609790823, 'val_loss': 0.8027889322150837, 'learning_rate': 1e-05}
Start Training: Epoch 9



Training:   0%|          | 1/1142 [00:00<08:00,  2.38it/s]

Training steps: 0 Loss: 1.1330548524856567


Training:   9%|▉         | 101/1142 [00:42<07:19,  2.37it/s]

Training steps: 100 Loss: 1.0253126621246338


Training:  18%|█▊        | 201/1142 [01:25<06:31,  2.41it/s]

Training steps: 200 Loss: 0.983864426612854


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.43it/s]

Training steps: 300 Loss: 1.0987050533294678


Training:  35%|███▌      | 401/1142 [02:50<05:36,  2.20it/s]

Training steps: 400 Loss: 1.0046327114105225


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.0978975296020508


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.0502091646194458


Training:  61%|██████▏   | 701/1142 [04:57<03:01,  2.42it/s]

Training steps: 700 Loss: 1.112976312637329


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 1.0081521272659302


Training:  79%|███████▉  | 901/1142 [06:22<01:41,  2.37it/s]

Training steps: 900 Loss: 0.9739266633987427


Training:  88%|████████▊ | 1001/1142 [07:04<00:58,  2.41it/s]

Training steps: 1000 Loss: 1.1951160430908203


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.102128267288208


Training: 100%|██████████| 1142/1142 [08:04<00:00,  2.36it/s]

TRAIN ACC : 0.772143580757331, TRAIN LOSS : 1.0644458396631031



Training:   1%|          | 2/286 [00:00<00:41,  6.80it/s]

Validation steps: 0 Loss: 0.7482934594154358


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.17it/s]

Validation steps: 100 Loss: 0.759438157081604


Training:  71%|███████   | 202/286 [00:28<00:11,  7.17it/s]

Validation steps: 200 Loss: 0.6081452965736389


Training: 100%|██████████| 286/286 [00:39<00:00,  7.17it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85       964
          경제       0.87      0.84      0.85      1245
          사회       0.81      0.77      0.79      1473
        생활문화       0.92      0.88      0.90      1186
          세계       0.92      0.93      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.91      0.93      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.88692946 0.83855422 0.766463   0.8836425  0.93119266 0.98053353
 0.92888889]
VALID ACC : 0.8882926295038879, VALID LOSS : 0.8087816036247707
{'epoch': 9, 'train_loss': 1.0644458396631031, 'train_acc': 0.772143580757331, 'valid_acc': 0.8882926295038879, 'val_loss': 0.8087816036247707, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 10


Training:   0%|          | 1/1142 [00:00<08:33,  2.22it/s]

Training steps: 0 Loss: 1.030562162399292


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.38it/s]

Training steps: 100 Loss: 1.063747763633728


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 1.0243934392929077


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.43it/s]

Training steps: 300 Loss: 0.9812730550765991


Training:  35%|███▌      | 401/1142 [02:49<05:37,  2.20it/s]

Training steps: 400 Loss: 1.1013412475585938


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.1619879007339478


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.1101698875427246


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 0.9863836169242859


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 1.1789098978042603


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.064386248588562


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.107987880706787


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.0039373636245728


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7764970018892204, TRAIN LOSS : 1.05566188098044



Training:   1%|          | 2/286 [00:00<00:43,  6.49it/s]

Validation steps: 0 Loss: 0.7531682252883911


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.24it/s]

Validation steps: 100 Loss: 0.8522611856460571


Training:  71%|███████   | 202/286 [00:28<00:11,  7.17it/s]

Validation steps: 200 Loss: 0.6026722192764282


Training: 100%|██████████| 286/286 [00:39<00:00,  7.19it/s]

              precision    recall  f1-score   support

        IT과학       0.80      0.90      0.85       964
          경제       0.90      0.79      0.84      1245
          사회       0.75      0.81      0.78      1473
        생활문화       0.93      0.85      0.89      1186
          세계       0.92      0.92      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.92      0.92      0.92      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.89522822 0.78955823 0.81126952 0.84907251 0.92070773 0.9776496
 0.92074074]
VALID ACC : 0.881831124739897, VALID LOSS : 0.8074466210978848
{'epoch': 10, 'train_loss': 1.05566188098044, 'train_acc': 0.7764970018892204, 'valid_acc': 0.881831124739897, 'val_loss': 0.8074466210978848, 'learning_rate': 1e-05}
Start Training: Epoch 11



Training:   0%|          | 1/1142 [00:00<08:01,  2.37it/s]

Training steps: 0 Loss: 1.033729076385498


Training:   9%|▉         | 101/1142 [00:42<07:19,  2.37it/s]

Training steps: 100 Loss: 1.0129663944244385


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 1.1438807249069214


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.43it/s]

Training steps: 300 Loss: 1.149131417274475


Training:  35%|███▌      | 401/1142 [02:49<05:34,  2.21it/s]

Training steps: 400 Loss: 1.032927393913269


Training:  44%|████▍     | 501/1142 [03:32<04:29,  2.37it/s]

Training steps: 500 Loss: 0.9179176688194275


Training:  53%|█████▎    | 601/1142 [04:14<03:44,  2.41it/s]

Training steps: 600 Loss: 1.064989447593689


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 0.9332851767539978


Training:  70%|███████   | 801/1142 [05:39<02:34,  2.20it/s]

Training steps: 800 Loss: 1.0120782852172852


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.0697624683380127


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.0166188478469849


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 0.8754125237464905


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7823837034197629, TRAIN LOSS : 1.0497855327488452



Training:   1%|          | 2/286 [00:00<00:42,  6.72it/s]

Validation steps: 0 Loss: 0.7383372783660889


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.25it/s]

Validation steps: 100 Loss: 0.761714518070221


Training:  71%|███████   | 202/286 [00:28<00:11,  7.17it/s]

Validation steps: 200 Loss: 0.6043869256973267


Training: 100%|██████████| 286/286 [00:39<00:00,  7.19it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.88      0.85       964
          경제       0.88      0.81      0.84      1245
          사회       0.78      0.80      0.79      1473
        생활문화       0.92      0.87      0.89      1186
          세계       0.91      0.94      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.92      0.91      0.92      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.89      0.88      0.88      9131

[0.88278008 0.80562249 0.80108622 0.86677909 0.93577982 0.97837058
 0.90962963]
VALID ACC : 0.8843500164275545, VALID LOSS : 0.8103692848365623
{'epoch': 11, 'train_loss': 1.0497855327488452, 'train_acc': 0.7823837034197629, 'valid_acc': 0.8843500164275545, 'val_loss': 0.8103692848365623, 'learning_rate': 1e-05}
Start Training: Epoch 12



Training:   0%|          | 1/1142 [00:00<08:01,  2.37it/s]

Training steps: 0 Loss: 0.9807751178741455


Training:   9%|▉         | 101/1142 [00:42<07:19,  2.37it/s]

Training steps: 100 Loss: 1.0645408630371094


Training:  18%|█▊        | 201/1142 [01:25<06:28,  2.42it/s]

Training steps: 200 Loss: 1.0496909618377686


Training:  26%|██▋       | 301/1142 [02:07<05:47,  2.42it/s]

Training steps: 300 Loss: 1.0816601514816284


Training:  35%|███▌      | 401/1142 [02:49<05:36,  2.20it/s]

Training steps: 400 Loss: 1.2454525232315063


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.2284554243087769


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 0.9829025268554688


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.0414425134658813


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 1.0402417182922363


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.0681551694869995


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.1660051345825195


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.0105353593826294


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7788790625085562, TRAIN LOSS : 1.0520946296313598



Training:   1%|          | 2/286 [00:00<00:41,  6.77it/s]

Validation steps: 0 Loss: 0.8062033653259277


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.15it/s]

Validation steps: 100 Loss: 0.7619812488555908


Training:  71%|███████   | 202/286 [00:28<00:11,  7.25it/s]

Validation steps: 200 Loss: 0.59935462474823


Training: 100%|██████████| 286/286 [00:39<00:00,  7.19it/s]

              precision    recall  f1-score   support

        IT과학       0.80      0.90      0.85       964
          경제       0.88      0.82      0.85      1245
          사회       0.78      0.81      0.79      1473
        생활문화       0.90      0.89      0.89      1186
          세계       0.93      0.91      0.92      1526
         스포츠       0.96      0.97      0.97      1387
          정치       0.93      0.91      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.88      0.89      0.88      9131
weighted avg       0.89      0.89      0.89      9131

[0.89522822 0.82409639 0.80651731 0.88701518 0.9102228  0.97188176
 0.90518519]
VALID ACC : 0.8857737378162305, VALID LOSS : 0.8096491838251794
{'epoch': 12, 'train_loss': 1.0520946296313598, 'train_acc': 0.7788790625085562, 'valid_acc': 0.8857737378162305, 'val_loss': 0.8096491838251794, 'learning_rate': 1e-05}
Start Training: Epoch 13



Training:   0%|          | 1/1142 [00:00<08:02,  2.36it/s]

Training steps: 0 Loss: 0.9804088473320007


Training:   9%|▉         | 101/1142 [00:42<07:19,  2.37it/s]

Training steps: 100 Loss: 0.957246720790863


Training:  18%|█▊        | 201/1142 [01:25<06:28,  2.42it/s]

Training steps: 200 Loss: 1.1194376945495605


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.43it/s]

Training steps: 300 Loss: 1.2764034271240234


Training:  35%|███▌      | 401/1142 [02:49<05:36,  2.20it/s]

Training steps: 400 Loss: 0.873013436794281


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.0343976020812988


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 0.9848511219024658


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 0.9204756021499634


Training:  70%|███████   | 801/1142 [05:39<02:34,  2.21it/s]

Training steps: 800 Loss: 1.0030795335769653


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.38it/s]

Training steps: 900 Loss: 1.1211520433425903


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.0346838235855103


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.081790804862976


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7846288640035046, TRAIN LOSS : 1.0458138272453315



Training:   1%|          | 2/286 [00:00<00:42,  6.72it/s]

Validation steps: 0 Loss: 0.7459473609924316


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.18it/s]

Validation steps: 100 Loss: 0.8035522699356079


Training:  71%|███████   | 202/286 [00:28<00:11,  7.12it/s]

Validation steps: 200 Loss: 0.5986723303794861


Training: 100%|██████████| 286/286 [00:39<00:00,  7.20it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.88      0.85       964
          경제       0.90      0.78      0.84      1245
          사회       0.76      0.80      0.78      1473
        생활문화       0.89      0.89      0.89      1186
          세계       0.91      0.92      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.91      0.91      0.91      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.88070539 0.7815261  0.79837067 0.8920742  0.92398427 0.97548666
 0.90666667]
VALID ACC : 0.8808454714708137, VALID LOSS : 0.8131719352482082
{'epoch': 13, 'train_loss': 1.0458138272453315, 'train_acc': 0.7846288640035046, 'valid_acc': 0.8808454714708137, 'val_loss': 0.8131719352482082, 'learning_rate': 1e-05}
Start Training: Epoch 14



Training:   0%|          | 1/1142 [00:00<08:02,  2.36it/s]

Training steps: 0 Loss: 1.0637738704681396


Training:   9%|▉         | 101/1142 [00:42<07:20,  2.37it/s]

Training steps: 100 Loss: 1.0888335704803467


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 1.124024748802185


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.43it/s]

Training steps: 300 Loss: 1.0076011419296265


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 0.962125301361084


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 0.9933835864067078


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.2597754001617432


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.0847454071044922


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 0.9866904020309448


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.1173228025436401


Training:  88%|████████▊ | 1001/1142 [07:04<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.0596609115600586


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 0.9579680562019348


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7905977055554034, TRAIN LOSS : 1.043323792895586



Training:   1%|          | 2/286 [00:00<00:42,  6.66it/s]

Validation steps: 0 Loss: 0.7821627259254456


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.16it/s]

Validation steps: 100 Loss: 0.7230144143104553


Training:  71%|███████   | 202/286 [00:28<00:11,  7.16it/s]

Validation steps: 200 Loss: 0.5975745916366577


Training: 100%|██████████| 286/286 [00:39<00:00,  7.18it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.86      0.84       964
          경제       0.86      0.82      0.84      1245
          사회       0.79      0.77      0.78      1473
        생활문화       0.89      0.90      0.89      1186
          세계       0.92      0.92      0.92      1526
         스포츠       0.96      0.98      0.97      1387
          정치       0.91      0.92      0.92      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.85684647 0.81767068 0.77325187 0.90134907 0.91874181 0.9776496
 0.92148148]
VALID ACC : 0.8820501587996934, VALID LOSS : 0.8228206982562593
{'epoch': 14, 'train_loss': 1.043323792895586, 'train_acc': 0.7905977055554034, 'valid_acc': 0.8820501587996934, 'val_loss': 0.8228206982562593, 'learning_rate': 1e-05}
EarlyStopping counter: 5 out of 5


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'cla

Start Training: Epoch 1


Training:   0%|          | 1/1142 [00:00<08:08,  2.34it/s]

Training steps: 0 Loss: 2.04337215423584


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.38it/s]

Training steps: 100 Loss: 1.891959309577942


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.8014508485794067


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.43it/s]

Training steps: 300 Loss: 1.7435201406478882


Training:  35%|███▌      | 401/1142 [02:49<05:36,  2.20it/s]

Training steps: 400 Loss: 1.4236177206039429


Training:  44%|████▍     | 501/1142 [03:31<04:30,  2.37it/s]

Training steps: 500 Loss: 1.444077730178833


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.1822260618209839


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.2450858354568481


Training:  70%|███████   | 801/1142 [05:38<02:35,  2.20it/s]

Training steps: 800 Loss: 1.131872534751892


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.098453164100647


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.41it/s]

Training steps: 1000 Loss: 1.210182785987854


Training:  96%|█████████▋| 1101/1142 [07:45<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.2497633695602417


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.5361005393861402, TRAIN LOSS : 1.414874580664309



Training:   1%|          | 2/286 [00:00<00:42,  6.71it/s]

Validation steps: 0 Loss: 0.7334927320480347


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.26it/s]

Validation steps: 100 Loss: 0.845985472202301


Training:  71%|███████   | 202/286 [00:27<00:11,  7.24it/s]

Validation steps: 200 Loss: 0.670418918132782


Training: 100%|██████████| 286/286 [00:39<00:00,  7.26it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.82      0.83       965
          경제       0.89      0.80      0.84      1245
          사회       0.77      0.78      0.78      1473
        생활문화       0.88      0.92      0.90      1186
          세계       0.93      0.94      0.93      1526
         스포츠       0.97      0.98      0.97      1386
          정치       0.91      0.93      0.92      1350

    accuracy                           0.88      9131
   macro avg       0.88      0.88      0.88      9131
weighted avg       0.88      0.88      0.88      9131

[0.82487047 0.8        0.78411405 0.91568297 0.93643512 0.97907648
 0.92666667]
VALID ACC : 0.8838024312780638, VALID LOSS : 0.8452551314880797
{'epoch': 1, 'train_loss': 1.414874580664309, 'train_acc': 0.5361005393861402, 'valid_acc': 0.8838024312780638, 'val_loss': 0.8452551314880797, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1142 [00:00<08:16,  2.30it/s]

Training steps: 0 Loss: 1.0517091751098633


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.193483591079712


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.43it/s]

Training steps: 200 Loss: 1.1765488386154175


Training:  26%|██▋       | 301/1142 [02:06<05:45,  2.43it/s]

Training steps: 300 Loss: 1.0059618949890137


Training:  35%|███▌      | 401/1142 [02:49<05:34,  2.21it/s]

Training steps: 400 Loss: 1.1042412519454956


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.39it/s]

Training steps: 500 Loss: 1.1242138147354126


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.1222668886184692


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.45it/s]

Training steps: 700 Loss: 0.8667338490486145


Training:  70%|███████   | 801/1142 [05:37<02:35,  2.20it/s]

Training steps: 800 Loss: 1.2005517482757568


Training:  79%|███████▉  | 901/1142 [06:19<01:41,  2.38it/s]

Training steps: 900 Loss: 1.2476402521133423


Training:  88%|████████▊ | 1001/1142 [07:01<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.2422596216201782


Training:  96%|█████████▋| 1101/1142 [07:43<00:16,  2.45it/s]

Training steps: 1100 Loss: 1.216932773590088


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.725022588505873, TRAIN LOSS : 1.1379954033056616



Training:   1%|          | 2/286 [00:00<00:41,  6.89it/s]

Validation steps: 0 Loss: 0.6999086737632751


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.23it/s]

Validation steps: 100 Loss: 0.7557165622711182


Training:  71%|███████   | 202/286 [00:27<00:11,  7.24it/s]

Validation steps: 200 Loss: 0.6425376534461975


Training: 100%|██████████| 286/286 [00:39<00:00,  7.28it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85       965
          경제       0.94      0.76      0.84      1245
          사회       0.75      0.84      0.79      1473
        생활문화       0.90      0.91      0.90      1186
          세계       0.94      0.94      0.94      1526
         스포츠       0.97      0.98      0.97      1386
          정치       0.93      0.90      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.89222798 0.75582329 0.84114053 0.91146712 0.93577982 0.98051948
 0.89925926]
VALID ACC : 0.8896068338626656, VALID LOSS : 0.8032071625852918
{'epoch': 2, 'train_loss': 1.1379954033056616, 'train_acc': 0.725022588505873, 'valid_acc': 0.8896068338626656, 'val_loss': 0.8032071625852918, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1142 [00:00<08:17,  2.30it/s]

Training steps: 0 Loss: 1.128079891204834


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.221089243888855


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.026115894317627


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.1310118436813354


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.2171764373779297


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.39it/s]

Training steps: 500 Loss: 1.0862007141113281


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.0159238576889038


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.1429836750030518


Training:  70%|███████   | 801/1142 [05:37<02:34,  2.21it/s]

Training steps: 800 Loss: 1.3034683465957642


Training:  79%|███████▉  | 901/1142 [06:19<01:40,  2.39it/s]

Training steps: 900 Loss: 0.996216356754303


Training:  88%|████████▊ | 1001/1142 [07:01<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.1212284564971924


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.1077032089233398


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.7319771103140487, TRAIN LOSS : 1.1183975330063007



Training:   1%|          | 2/286 [00:00<00:41,  6.76it/s]

Validation steps: 0 Loss: 0.7143439054489136


Training:  36%|███▌      | 102/286 [00:13<00:25,  7.36it/s]

Validation steps: 100 Loss: 0.7832702398300171


Training:  71%|███████   | 202/286 [00:27<00:11,  7.24it/s]

Validation steps: 200 Loss: 0.6274941563606262


Training: 100%|██████████| 286/286 [00:39<00:00,  7.29it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.90      0.86       965
          경제       0.87      0.84      0.85      1245
          사회       0.80      0.79      0.80      1473
        생활문화       0.89      0.92      0.90      1186
          세계       0.95      0.93      0.94      1526
         스포츠       0.97      0.98      0.98      1386
          정치       0.93      0.89      0.91      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.9015544  0.84016064 0.79497624 0.91821248 0.9266055  0.97979798
 0.88592593]
VALID ACC : 0.8919066914905268, VALID LOSS : 0.7937947294928811
{'epoch': 3, 'train_loss': 1.1183975330063007, 'train_acc': 0.7319771103140487, 'valid_acc': 0.8919066914905268, 'val_loss': 0.7937947294928811, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1142 [00:00<08:27,  2.25it/s]

Training steps: 0 Loss: 1.0883963108062744


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.1139711141586304


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.44it/s]

Training steps: 200 Loss: 1.0237903594970703


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 1.1841024160385132


Training:  35%|███▌      | 401/1142 [02:49<05:34,  2.21it/s]

Training steps: 400 Loss: 1.0317105054855347


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.39it/s]

Training steps: 500 Loss: 1.0612924098968506


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.0702276229858398


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.119521141052246


Training:  70%|███████   | 801/1142 [05:37<02:34,  2.21it/s]

Training steps: 800 Loss: 0.9644703269004822


Training:  79%|███████▉  | 901/1142 [06:19<01:40,  2.39it/s]

Training steps: 900 Loss: 1.1015689373016357


Training:  88%|████████▊ | 1001/1142 [07:01<00:57,  2.43it/s]

Training steps: 1000 Loss: 1.146349310874939


Training:  96%|█████████▋| 1101/1142 [07:43<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.1671653985977173


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.740766092599184, TRAIN LOSS : 1.104298890315505



Training:   1%|          | 2/286 [00:00<00:41,  6.90it/s]

Validation steps: 0 Loss: 0.6768312454223633


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.25it/s]

Validation steps: 100 Loss: 0.7690995335578918


Training:  71%|███████   | 202/286 [00:27<00:11,  7.24it/s]

Validation steps: 200 Loss: 0.6219508647918701


Training: 100%|██████████| 286/286 [00:39<00:00,  7.28it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.87      0.85       965
          경제       0.91      0.80      0.85      1245
          사회       0.76      0.84      0.80      1473
        생활문화       0.91      0.91      0.91      1186
          세계       0.94      0.94      0.94      1526
         스포츠       0.97      0.98      0.97      1386
          정치       0.94      0.88      0.91      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.86632124 0.80481928 0.84046164 0.90978078 0.93709043 0.98268398
 0.88148148]
VALID ACC : 0.8911400722812397, VALID LOSS : 0.7854465006531536
{'epoch': 4, 'train_loss': 1.104298890315505, 'train_acc': 0.740766092599184, 'valid_acc': 0.8911400722812397, 'val_loss': 0.7854465006531536, 'learning_rate': 1e-05}
Start Training: Epoch 5



Training:   0%|          | 1/1142 [00:00<08:02,  2.36it/s]

Training steps: 0 Loss: 1.0576187372207642


Training:   9%|▉         | 101/1142 [00:42<07:17,  2.38it/s]

Training steps: 100 Loss: 1.0581696033477783


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 0.9207648038864136


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.0668909549713135


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.1567184925079346


Training:  44%|████▍     | 501/1142 [03:31<04:29,  2.38it/s]

Training steps: 500 Loss: 1.178227424621582


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.116892695426941


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.45it/s]

Training steps: 700 Loss: 1.1355360746383667


Training:  70%|███████   | 801/1142 [05:37<02:33,  2.22it/s]

Training steps: 800 Loss: 1.1757261753082275


Training:  79%|███████▉  | 901/1142 [06:19<01:41,  2.38it/s]

Training steps: 900 Loss: 0.9956059455871582


Training:  88%|████████▊ | 1001/1142 [07:01<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.0678203105926514


Training:  96%|█████████▋| 1101/1142 [07:43<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.0654797554016113


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.745475453823618, TRAIN LOSS : 1.0947685867495796



Training:   1%|          | 2/286 [00:00<00:41,  6.81it/s]

Validation steps: 0 Loss: 0.6519342660903931


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.28it/s]

Validation steps: 100 Loss: 0.769116997718811


Training:  71%|███████   | 202/286 [00:27<00:11,  7.26it/s]

Validation steps: 200 Loss: 0.6162588596343994


Training: 100%|██████████| 286/286 [00:39<00:00,  7.29it/s]


              precision    recall  f1-score   support

        IT과학       0.85      0.84      0.85       965
          경제       0.87      0.84      0.85      1245
          사회       0.79      0.82      0.81      1473
        생활문화       0.92      0.91      0.91      1186
          세계       0.95      0.93      0.94      1526
         스포츠       0.97      0.98      0.97      1386
          정치       0.91      0.93      0.92      1350

    accuracy                           0.90      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.90      0.90      0.90      9131

[0.84455959 0.83694779 0.81805838 0.91231029 0.92791612 0.98268398
 0.92666667]
VALID ACC : 0.8950826853575731, VALID LOSS : 0.7846457330913811
{'epoch': 5, 'train_loss': 1.0947685867495796, 'train_acc': 0.745475453823618, 'valid_acc': 0.8950826853575731, 'val_loss': 0.7846457330913811, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 6


Training:   0%|          | 1/1142 [00:00<08:24,  2.26it/s]

Training steps: 0 Loss: 0.9801888465881348


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 0.9336454272270203


Training:  18%|█▊        | 201/1142 [01:24<06:26,  2.43it/s]

Training steps: 200 Loss: 1.047685146331787


Training:  26%|██▋       | 301/1142 [02:06<05:43,  2.45it/s]

Training steps: 300 Loss: 1.0516536235809326


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.117444634437561


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.38it/s]

Training steps: 500 Loss: 1.1190794706344604


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.046830177307129


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.1655455827713013


Training:  70%|███████   | 801/1142 [05:38<02:34,  2.20it/s]

Training steps: 800 Loss: 1.1804760694503784


Training:  79%|███████▉  | 901/1142 [06:20<01:41,  2.38it/s]

Training steps: 900 Loss: 0.9990592002868652


Training:  88%|████████▊ | 1001/1142 [07:02<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.2464450597763062


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.0518279075622559


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.751964515510774, TRAIN LOSS : 1.0885583355422612



Training:   1%|          | 2/286 [00:00<00:41,  6.83it/s]

Validation steps: 0 Loss: 0.6690903306007385


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.27it/s]

Validation steps: 100 Loss: 0.7556217312812805


Training:  71%|███████   | 202/286 [00:27<00:11,  7.28it/s]

Validation steps: 200 Loss: 0.6047737002372742


Training: 100%|██████████| 286/286 [00:39<00:00,  7.27it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.91      0.86       965
          경제       0.89      0.82      0.85      1245
          사회       0.78      0.80      0.79      1473
        생활문화       0.89      0.93      0.91      1186
          세계       0.95      0.92      0.94      1526
         스포츠       0.97      0.98      0.97      1386
          정치       0.93      0.88      0.90      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.90673575 0.8184739  0.80176511 0.92748735 0.92332896 0.97907648
 0.87703704]
VALID ACC : 0.889825867922462, VALID LOSS : 0.7897447107971965
{'epoch': 6, 'train_loss': 1.0885583355422612, 'train_acc': 0.751964515510774, 'valid_acc': 0.889825867922462, 'val_loss': 0.7897447107971965, 'learning_rate': 1e-05}
Start Training: Epoch 7



Training:   0%|          | 1/1142 [00:00<08:02,  2.36it/s]

Training steps: 0 Loss: 1.1158663034439087


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.176162838935852


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.0917131900787354


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 0.9968960881233215


Training:  35%|███▌      | 401/1142 [02:49<05:36,  2.20it/s]

Training steps: 400 Loss: 0.9211796522140503


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.38it/s]

Training steps: 500 Loss: 1.2586795091629028


Training:  53%|█████▎    | 601/1142 [04:13<03:43,  2.42it/s]

Training steps: 600 Loss: 1.0125457048416138


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.0922186374664307


Training:  70%|███████   | 801/1142 [05:38<02:35,  2.20it/s]

Training steps: 800 Loss: 1.0299577713012695


Training:  79%|███████▉  | 901/1142 [06:20<01:41,  2.38it/s]

Training steps: 900 Loss: 1.1164458990097046


Training:  88%|████████▊ | 1001/1142 [07:02<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.040457844734192


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.1776877641677856


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.760945157845741, TRAIN LOSS : 1.0777895181797015



Training:   1%|          | 2/286 [00:00<00:42,  6.70it/s]

Validation steps: 0 Loss: 0.6237455010414124


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.29it/s]

Validation steps: 100 Loss: 0.7964078783988953


Training:  71%|███████   | 202/286 [00:27<00:11,  7.32it/s]

Validation steps: 200 Loss: 0.6061031818389893


Training: 100%|██████████| 286/286 [00:39<00:00,  7.27it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.90      0.86       965
          경제       0.90      0.82      0.86      1245
          사회       0.82      0.79      0.80      1473
        생활문화       0.92      0.89      0.91      1186
          세계       0.93      0.94      0.93      1526
         스포츠       0.96      0.98      0.97      1386
          정치       0.90      0.93      0.92      1350

    accuracy                           0.90      9131
   macro avg       0.89      0.90      0.89      9131
weighted avg       0.90      0.90      0.89      9131

[0.8984456  0.82409639 0.7868296  0.89460371 0.94429882 0.98340548
 0.93333333]
VALID ACC : 0.8955207534771658, VALID LOSS : 0.7945510176095095
{'epoch': 7, 'train_loss': 1.0777895181797015, 'train_acc': 0.760945157845741, 'valid_acc': 0.8955207534771658, 'val_loss': 0.7945510176095095, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 8


Training:   0%|          | 1/1142 [00:00<08:26,  2.25it/s]

Training steps: 0 Loss: 1.0668492317199707


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.38it/s]

Training steps: 100 Loss: 1.303006887435913


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.0889619588851929


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.0932743549346924


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.0406519174575806


Training:  44%|████▍     | 501/1142 [03:31<04:28,  2.39it/s]

Training steps: 500 Loss: 1.103129267692566


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.1332368850708008


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.133719801902771


Training:  70%|███████   | 801/1142 [05:37<02:33,  2.22it/s]

Training steps: 800 Loss: 1.0701392889022827


Training:  79%|███████▉  | 901/1142 [06:20<01:41,  2.38it/s]

Training steps: 900 Loss: 1.0119084119796753


Training:  88%|████████▊ | 1001/1142 [07:02<00:58,  2.43it/s]

Training steps: 1000 Loss: 0.9079076051712036


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.0445327758789062


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.7644771787640665, TRAIN LOSS : 1.0719434710809939



Training:   1%|          | 2/286 [00:00<00:41,  6.82it/s]

Validation steps: 0 Loss: 0.6220599412918091


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.29it/s]

Validation steps: 100 Loss: 0.8064616918563843


Training:  71%|███████   | 202/286 [00:27<00:11,  7.31it/s]

Validation steps: 200 Loss: 0.6304616332054138


Training: 100%|██████████| 286/286 [00:39<00:00,  7.28it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.86      0.84       965
          경제       0.89      0.83      0.86      1245
          사회       0.80      0.80      0.80      1473
        생활문화       0.93      0.89      0.91      1186
          세계       0.93      0.94      0.94      1526
         스포츠       0.97      0.98      0.97      1386
          정치       0.91      0.93      0.92      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.86217617 0.82730924 0.80448065 0.89291737 0.94364351 0.98412698
 0.92888889]
VALID ACC : 0.8940970320884898, VALID LOSS : 0.7924810996005586
{'epoch': 8, 'train_loss': 1.0719434710809939, 'train_acc': 0.7644771787640665, 'valid_acc': 0.8940970320884898, 'val_loss': 0.7924810996005586, 'learning_rate': 1e-05}
Start Training: Epoch 9



Training:   0%|          | 1/1142 [00:00<08:00,  2.37it/s]

Training steps: 0 Loss: 0.9477524757385254


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.38it/s]

Training steps: 100 Loss: 1.215271234512329


Training:  18%|█▊        | 201/1142 [01:24<06:28,  2.42it/s]

Training steps: 200 Loss: 1.0160900354385376


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.0933160781860352


Training:  35%|███▌      | 401/1142 [02:49<05:34,  2.22it/s]

Training steps: 400 Loss: 1.1477444171905518


Training:  44%|████▍     | 501/1142 [03:31<04:29,  2.38it/s]

Training steps: 500 Loss: 1.0950117111206055


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 1.277599573135376


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 1.0118383169174194


Training:  70%|███████   | 801/1142 [05:37<02:34,  2.20it/s]

Training steps: 800 Loss: 0.9937031865119934


Training:  79%|███████▉  | 901/1142 [06:20<01:41,  2.38it/s]

Training steps: 900 Loss: 0.9453375935554504


Training:  88%|████████▊ | 1001/1142 [07:02<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.016385555267334


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.44it/s]

Training steps: 1100 Loss: 0.9873688817024231


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.7669413794047586, TRAIN LOSS : 1.0669445749861555



Training:   1%|          | 2/286 [00:00<00:41,  6.78it/s]

Validation steps: 0 Loss: 0.6360504627227783


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.33it/s]

Validation steps: 100 Loss: 0.756263017654419


Training:  71%|███████   | 202/286 [00:27<00:11,  7.28it/s]

Validation steps: 200 Loss: 0.6163115501403809


Training: 100%|██████████| 286/286 [00:39<00:00,  7.29it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.89      0.85       965
          경제       0.92      0.79      0.85      1245
          사회       0.77      0.81      0.79      1473
        생활문화       0.91      0.90      0.91      1186
          세계       0.92      0.95      0.94      1526
         스포츠       0.97      0.97      0.97      1386
          정치       0.93      0.90      0.91      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.88704663 0.78634538 0.81330618 0.90472175 0.95412844 0.97474747
 0.89555556]
VALID ACC : 0.8894973168327674, VALID LOSS : 0.79511907884291
{'epoch': 9, 'train_loss': 1.0669445749861555, 'train_acc': 0.7669413794047586, 'valid_acc': 0.8894973168327674, 'val_loss': 0.79511907884291, 'learning_rate': 1e-05}
Start Training: Epoch 10



Training:   0%|          | 1/1142 [00:00<08:01,  2.37it/s]

Training steps: 0 Loss: 1.044025182723999


Training:   9%|▉         | 101/1142 [00:42<07:16,  2.39it/s]

Training steps: 100 Loss: 1.0616955757141113


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.0403810739517212


Training:  26%|██▋       | 301/1142 [02:06<05:44,  2.44it/s]

Training steps: 300 Loss: 1.1262669563293457


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.0598827600479126


Training:  44%|████▍     | 501/1142 [03:31<04:29,  2.38it/s]

Training steps: 500 Loss: 1.1631011962890625


Training:  53%|█████▎    | 601/1142 [04:13<03:42,  2.43it/s]

Training steps: 600 Loss: 0.9192894101142883


Training:  61%|██████▏   | 701/1142 [04:55<03:00,  2.44it/s]

Training steps: 700 Loss: 0.9672375917434692


Training:  70%|███████   | 801/1142 [05:37<02:34,  2.21it/s]

Training steps: 800 Loss: 1.0390727519989014


Training:  79%|███████▉  | 901/1142 [06:19<01:40,  2.39it/s]

Training steps: 900 Loss: 1.0344346761703491


Training:  88%|████████▊ | 1001/1142 [07:01<00:57,  2.43it/s]

Training steps: 1000 Loss: 1.0869526863098145


Training:  96%|█████████▋| 1101/1142 [07:44<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.0430126190185547


Training: 100%|██████████| 1142/1142 [08:01<00:00,  2.37it/s]

TRAIN ACC : 0.7737863811844591, TRAIN LOSS : 1.0607257314717915



Training:   1%|          | 2/286 [00:00<00:41,  6.83it/s]

Validation steps: 0 Loss: 0.6271830797195435


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.26it/s]

Validation steps: 100 Loss: 0.7698007822036743


Training:  71%|███████   | 202/286 [00:27<00:11,  7.35it/s]

Validation steps: 200 Loss: 0.6002227663993835


Training: 100%|██████████| 286/286 [00:39<00:00,  7.28it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85       965
          경제       0.83      0.86      0.84      1245
          사회       0.82      0.77      0.80      1473
        생활문화       0.93      0.88      0.90      1186
          세계       0.94      0.93      0.94      1526
         스포츠       0.97      0.98      0.97      1386
          정치       0.92      0.91      0.91      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.89015544 0.86024096 0.77460964 0.88448567 0.93315858 0.98124098
 0.90814815]
VALID ACC : 0.8903734530719527, VALID LOSS : 0.8044831919503379
{'epoch': 10, 'train_loss': 1.0607257314717915, 'train_acc': 0.7737863811844591, 'valid_acc': 0.8903734530719527, 'val_loss': 0.8044831919503379, 'learning_rate': 1e-05}
Start Training: Epoch 11



Training:   0%|          | 1/1142 [00:00<08:01,  2.37it/s]

Training steps: 0 Loss: 1.1181390285491943


Training:   9%|▉         | 101/1142 [00:42<07:19,  2.37it/s]

Training steps: 100 Loss: 0.9820051193237305


Training:  18%|█▊        | 201/1142 [01:24<06:29,  2.42it/s]

Training steps: 200 Loss: 1.0392365455627441


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.43it/s]

Training steps: 300 Loss: 1.0930274724960327


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 0.9925757646560669


Training:  44%|████▍     | 501/1142 [03:31<04:30,  2.37it/s]

Training steps: 500 Loss: 1.0665401220321655


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.089890956878662


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 0.9452670216560364


Training:  70%|███████   | 801/1142 [05:38<02:34,  2.21it/s]

Training steps: 800 Loss: 1.020297884941101


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.38it/s]

Training steps: 900 Loss: 1.0482172966003418


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.1189135313034058


Training:  96%|█████████▋| 1101/1142 [07:45<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.1715219020843506


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7770446020315965, TRAIN LOSS : 1.055447015317643



Training:   1%|          | 2/286 [00:00<00:42,  6.74it/s]

Validation steps: 0 Loss: 0.6165094971656799


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.18it/s]

Validation steps: 100 Loss: 0.8262167572975159


Training:  71%|███████   | 202/286 [00:28<00:11,  7.19it/s]

Validation steps: 200 Loss: 0.6061720252037048


Training: 100%|██████████| 286/286 [00:39<00:00,  7.21it/s]

              precision    recall  f1-score   support

        IT과학       0.83      0.86      0.85       965
          경제       0.83      0.87      0.85      1245
          사회       0.82      0.77      0.79      1473
        생활문화       0.93      0.88      0.90      1186
          세계       0.93      0.94      0.93      1526
         스포츠       0.97      0.98      0.97      1386
          정치       0.91      0.92      0.91      1350

    accuracy                           0.89      9131
   macro avg       0.89      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.85906736 0.86827309 0.76985743 0.88195616 0.9423329  0.97619048
 0.92074074]
VALID ACC : 0.8897163508925638, VALID LOSS : 0.8038001114671881
{'epoch': 11, 'train_loss': 1.055447015317643, 'train_acc': 0.7770446020315965, 'valid_acc': 0.8897163508925638, 'val_loss': 0.8038001114671881, 'learning_rate': 1e-05}
Start Training: Epoch 12



Training:   0%|          | 1/1142 [00:00<08:03,  2.36it/s]

Training steps: 0 Loss: 1.0045592784881592


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.37it/s]

Training steps: 100 Loss: 1.1177396774291992


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 1.143175482749939


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.44it/s]

Training steps: 300 Loss: 1.059463620185852


Training:  35%|███▌      | 401/1142 [02:49<05:36,  2.20it/s]

Training steps: 400 Loss: 1.0248416662216187


Training:  44%|████▍     | 501/1142 [03:32<04:29,  2.38it/s]

Training steps: 500 Loss: 1.0685282945632935


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 0.9547114372253418


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.0034719705581665


Training:  70%|███████   | 801/1142 [05:39<02:34,  2.20it/s]

Training steps: 800 Loss: 1.0143688917160034


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.0951600074768066


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.1255203485488892


Training:  96%|█████████▋| 1101/1142 [07:45<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.1630589962005615


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7798373627577143, TRAIN LOSS : 1.0558905608286582



Training:   1%|          | 2/286 [00:00<00:42,  6.70it/s]

Validation steps: 0 Loss: 0.6456504464149475


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.16it/s]

Validation steps: 100 Loss: 0.8181210160255432


Training:  71%|███████   | 202/286 [00:28<00:11,  7.19it/s]

Validation steps: 200 Loss: 0.5983701348304749


Training: 100%|██████████| 286/286 [00:39<00:00,  7.20it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.87      0.84       965
          경제       0.83      0.87      0.85      1245
          사회       0.82      0.76      0.79      1473
        생활문화       0.91      0.90      0.91      1186
          세계       0.94      0.92      0.93      1526
         스포츠       0.97      0.98      0.98      1386
          정치       0.91      0.91      0.91      1350

    accuracy                           0.89      9131
   macro avg       0.88      0.89      0.89      9131
weighted avg       0.89      0.89      0.89      9131

[0.87253886 0.86506024 0.75831636 0.90303541 0.92201835 0.98124098
 0.91333333]
VALID ACC : 0.8878545613842953, VALID LOSS : 0.8138431525313771
{'epoch': 12, 'train_loss': 1.0558905608286582, 'train_acc': 0.7798373627577143, 'valid_acc': 0.8878545613842953, 'val_loss': 0.8138431525313771, 'learning_rate': 1e-05}
EarlyStopping counter: 5 out of 5


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'cla

Start Training: Epoch 1


Training:   0%|          | 1/1142 [00:00<08:01,  2.37it/s]

Training steps: 0 Loss: 2.036762237548828


Training:   9%|▉         | 101/1142 [00:42<07:17,  2.38it/s]

Training steps: 100 Loss: 1.9457330703735352


Training:  18%|█▊        | 201/1142 [01:25<06:28,  2.42it/s]

Training steps: 200 Loss: 1.7096521854400635


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.43it/s]

Training steps: 300 Loss: 1.6569280624389648


Training:  35%|███▌      | 401/1142 [02:49<05:34,  2.22it/s]

Training steps: 400 Loss: 1.5020108222961426


Training:  44%|████▍     | 501/1142 [03:32<04:29,  2.38it/s]

Training steps: 500 Loss: 1.2964080572128296


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.1825002431869507


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.1653385162353516


Training:  70%|███████   | 801/1142 [05:39<02:34,  2.20it/s]

Training steps: 800 Loss: 1.0924896001815796


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.38it/s]

Training steps: 900 Loss: 1.1945513486862183


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.0231136083602905


Training:  96%|█████████▋| 1101/1142 [07:45<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.198014497756958


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.5463530829043917, TRAIN LOSS : 1.405000019459716



Training:   1%|          | 2/286 [00:00<00:41,  6.78it/s]

Validation steps: 0 Loss: 0.6614586114883423


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.17it/s]

Validation steps: 100 Loss: 0.8284738659858704


Training:  71%|███████   | 202/286 [00:28<00:11,  7.26it/s]

Validation steps: 200 Loss: 0.7099802494049072


Training: 100%|██████████| 286/286 [00:39<00:00,  7.20it/s]


              precision    recall  f1-score   support

        IT과학       0.80      0.88      0.84       965
          경제       0.85      0.81      0.83      1244
          사회       0.77      0.77      0.77      1472
        생활문화       0.93      0.85      0.89      1187
          세계       0.91      0.94      0.92      1525
         스포츠       0.97      0.97      0.97      1386
          정치       0.91      0.92      0.91      1351

    accuracy                           0.88      9130
   macro avg       0.88      0.88      0.88      9130
weighted avg       0.88      0.88      0.88      9130

[0.88497409 0.80948553 0.77173913 0.85425442 0.93704918 0.96608947
 0.92376018]
VALID ACC : 0.8791894852135816, VALID LOSS : 0.8509072550943682
{'epoch': 1, 'train_loss': 1.405000019459716, 'train_acc': 0.5463530829043917, 'valid_acc': 0.8791894852135816, 'val_loss': 0.8509072550943682, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 2


Training:   0%|          | 1/1142 [00:00<08:30,  2.24it/s]

Training steps: 0 Loss: 1.2550342082977295


Training:   9%|▉         | 101/1142 [00:42<07:17,  2.38it/s]

Training steps: 100 Loss: 1.1540485620498657


Training:  18%|█▊        | 201/1142 [01:24<06:27,  2.43it/s]

Training steps: 200 Loss: 1.1616092920303345


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.44it/s]

Training steps: 300 Loss: 1.2137645483016968


Training:  35%|███▌      | 401/1142 [02:49<05:36,  2.20it/s]

Training steps: 400 Loss: 1.1806282997131348


Training:  44%|████▍     | 501/1142 [03:31<04:29,  2.38it/s]

Training steps: 500 Loss: 1.0600295066833496


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.0543708801269531


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.2156716585159302


Training:  70%|███████   | 801/1142 [05:38<02:34,  2.21it/s]

Training steps: 800 Loss: 0.9999393820762634


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.38it/s]

Training steps: 900 Loss: 1.0441747903823853


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.0406461954116821


Training:  96%|█████████▋| 1101/1142 [07:45<00:16,  2.43it/s]

Training steps: 1100 Loss: 0.9804166555404663


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7262621837695762, TRAIN LOSS : 1.1338165532373505



Training:   1%|          | 2/286 [00:00<00:42,  6.72it/s]

Validation steps: 0 Loss: 0.6431469917297363


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.21it/s]

Validation steps: 100 Loss: 0.7851114273071289


Training:  71%|███████   | 202/286 [00:28<00:11,  7.19it/s]

Validation steps: 200 Loss: 0.6814293265342712


Training: 100%|██████████| 286/286 [00:39<00:00,  7.21it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.89      0.86       965
          경제       0.90      0.77      0.83      1244
          사회       0.75      0.82      0.78      1472
        생활문화       0.90      0.90      0.90      1187
          세계       0.94      0.92      0.93      1525
         스포츠       0.97      0.98      0.97      1386
          정치       0.93      0.91      0.92      1351

    accuracy                           0.88      9130
   macro avg       0.89      0.88      0.88      9130
weighted avg       0.89      0.88      0.89      9130

[0.89119171 0.7733119  0.81725543 0.89553496 0.92       0.97979798
 0.90895633]
VALID ACC : 0.8846659364731654, VALID LOSS : 0.8074480034671463
{'epoch': 2, 'train_loss': 1.1338165532373505, 'train_acc': 0.7262621837695762, 'valid_acc': 0.8846659364731654, 'val_loss': 0.8074480034671463, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 3


Training:   0%|          | 1/1142 [00:00<08:22,  2.27it/s]

Training steps: 0 Loss: 1.1484699249267578


Training:   9%|▉         | 101/1142 [00:42<07:17,  2.38it/s]

Training steps: 100 Loss: 1.3421971797943115


Training:  18%|█▊        | 201/1142 [01:24<06:29,  2.42it/s]

Training steps: 200 Loss: 1.1874465942382812


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.44it/s]

Training steps: 300 Loss: 1.0684690475463867


Training:  35%|███▌      | 401/1142 [02:49<05:36,  2.20it/s]

Training steps: 400 Loss: 1.017788290977478


Training:  44%|████▍     | 501/1142 [03:31<04:29,  2.38it/s]

Training steps: 500 Loss: 1.0921576023101807


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.43it/s]

Training steps: 600 Loss: 1.0626251697540283


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.0709643363952637


Training:  70%|███████   | 801/1142 [05:38<02:34,  2.20it/s]

Training steps: 800 Loss: 1.159133791923523


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.38it/s]

Training steps: 900 Loss: 1.0627952814102173


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.1364415884017944


Training:  96%|█████████▋| 1101/1142 [07:45<00:16,  2.44it/s]

Training steps: 1100 Loss: 1.1450526714324951


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7331891359106341, TRAIN LOSS : 1.1140360587428204



Training:   1%|          | 2/286 [00:00<00:43,  6.60it/s]

Validation steps: 0 Loss: 0.6363469958305359


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.18it/s]

Validation steps: 100 Loss: 0.7840896844863892


Training:  71%|███████   | 202/286 [00:28<00:11,  7.24it/s]

Validation steps: 200 Loss: 0.6621952652931213


Training: 100%|██████████| 286/286 [00:39<00:00,  7.21it/s]


              precision    recall  f1-score   support

        IT과학       0.83      0.88      0.85       965
          경제       0.90      0.78      0.83      1244
          사회       0.75      0.82      0.78      1472
        생활문화       0.91      0.90      0.90      1187
          세계       0.93      0.93      0.93      1525
         스포츠       0.97      0.97      0.97      1386
          정치       0.93      0.92      0.92      1351

    accuracy                           0.89      9130
   macro avg       0.89      0.88      0.88      9130
weighted avg       0.89      0.89      0.89      9130

[0.88186528 0.77733119 0.81725543 0.89553496 0.9304918  0.97258297
 0.91561806]
VALID ACC : 0.8858707557502739, VALID LOSS : 0.7961778196838353
{'epoch': 3, 'train_loss': 1.1140360587428204, 'train_acc': 0.7331891359106341, 'valid_acc': 0.8858707557502739, 'val_loss': 0.7961778196838353, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 4


Training:   0%|          | 1/1142 [00:00<08:26,  2.25it/s]

Training steps: 0 Loss: 1.0795788764953613


Training:   9%|▉         | 101/1142 [00:42<07:19,  2.37it/s]

Training steps: 100 Loss: 1.1983381509780884


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 1.198319673538208


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.43it/s]

Training steps: 300 Loss: 1.1736546754837036


Training:  35%|███▌      | 401/1142 [02:49<05:37,  2.20it/s]

Training steps: 400 Loss: 1.0689619779586792


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.0348573923110962


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.1006577014923096


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.0171974897384644


Training:  70%|███████   | 801/1142 [05:39<02:34,  2.20it/s]

Training steps: 800 Loss: 1.1182208061218262


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.0625851154327393


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.208337426185608


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.1413744688034058


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7415945679553171, TRAIN LOSS : 1.1031601922733354



Training:   1%|          | 2/286 [00:00<00:41,  6.86it/s]

Validation steps: 0 Loss: 0.6241868138313293


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.21it/s]

Validation steps: 100 Loss: 0.8365457653999329


Training:  71%|███████   | 202/286 [00:28<00:11,  7.15it/s]

Validation steps: 200 Loss: 0.6588598489761353


Training: 100%|██████████| 286/286 [00:39<00:00,  7.20it/s]


              precision    recall  f1-score   support

        IT과학       0.82      0.88      0.85       965
          경제       0.87      0.81      0.84      1244
          사회       0.77      0.81      0.79      1472
        생활문화       0.93      0.87      0.90      1187
          세계       0.92      0.94      0.93      1525
         스포츠       0.97      0.98      0.98      1386
          정치       0.94      0.90      0.92      1351

    accuracy                           0.89      9130
   macro avg       0.89      0.89      0.89      9130
weighted avg       0.89      0.89      0.89      9130

[0.88186528 0.80707395 0.81453804 0.87194608 0.94295082 0.98051948
 0.9015544 ]
VALID ACC : 0.8876232201533406, VALID LOSS : 0.7958500351939168
{'epoch': 4, 'train_loss': 1.1031601922733354, 'train_acc': 0.7415945679553171, 'valid_acc': 0.8876232201533406, 'val_loss': 0.7958500351939168, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 5


Training:   0%|          | 1/1142 [00:00<08:30,  2.24it/s]

Training steps: 0 Loss: 1.1296260356903076


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.37it/s]

Training steps: 100 Loss: 1.0393363237380981


Training:  18%|█▊        | 201/1142 [01:25<06:28,  2.42it/s]

Training steps: 200 Loss: 1.1580301523208618


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.42it/s]

Training steps: 300 Loss: 1.0920255184173584


Training:  35%|███▌      | 401/1142 [02:49<05:37,  2.20it/s]

Training steps: 400 Loss: 1.1316159963607788


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 0.9846195578575134


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.1296955347061157


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.120278000831604


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 1.1396056413650513


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.1366431713104248


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.1352527141571045


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.1556328535079956


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7503833096046435, TRAIN LOSS : 1.090266004335233



Training:   1%|          | 2/286 [00:00<00:43,  6.52it/s]

Validation steps: 0 Loss: 0.6074579954147339


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.20it/s]

Validation steps: 100 Loss: 0.7961451411247253


Training:  71%|███████   | 202/286 [00:28<00:11,  7.20it/s]

Validation steps: 200 Loss: 0.6625654101371765


Training: 100%|██████████| 286/286 [00:39<00:00,  7.20it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85       965
          경제       0.89      0.77      0.83      1244
          사회       0.79      0.79      0.79      1472
        생활문화       0.93      0.87      0.90      1187
          세계       0.91      0.95      0.93      1525
         스포츠       0.97      0.98      0.97      1386
          정치       0.90      0.94      0.92      1351

    accuracy                           0.89      9130
   macro avg       0.89      0.88      0.88      9130
weighted avg       0.89      0.89      0.89      9130

[0.88601036 0.77411576 0.79347826 0.8652064  0.95081967 0.97979798
 0.94374537]
VALID ACC : 0.8867469879518072, VALID LOSS : 0.8012281962624797
{'epoch': 5, 'train_loss': 1.090266004335233, 'train_acc': 0.7503833096046435, 'valid_acc': 0.8867469879518072, 'val_loss': 0.8012281962624797, 'learning_rate': 1e-05}
Start Training: Epoch 6



Training:   0%|          | 1/1142 [00:00<08:06,  2.34it/s]

Training steps: 0 Loss: 0.9217819571495056


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.38it/s]

Training steps: 100 Loss: 1.0554101467132568


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 0.9893599152565002


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.43it/s]

Training steps: 300 Loss: 1.2244322299957275


Training:  35%|███▌      | 401/1142 [02:49<05:37,  2.19it/s]

Training steps: 400 Loss: 1.0713757276535034


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.1099010705947876


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.113160490989685


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.42it/s]

Training steps: 700 Loss: 1.0710182189941406


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 1.0606586933135986


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.0098809003829956


Training:  88%|████████▊ | 1001/1142 [07:04<00:58,  2.42it/s]

Training steps: 1000 Loss: 0.951029360294342


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.144487738609314


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7570364691709561, TRAIN LOSS : 1.084105588091234



Training:   1%|          | 2/286 [00:00<00:42,  6.63it/s]

Validation steps: 0 Loss: 0.636824369430542


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.17it/s]

Validation steps: 100 Loss: 0.8387595415115356


Training:  71%|███████   | 202/286 [00:28<00:11,  7.23it/s]

Validation steps: 200 Loss: 0.6537647843360901


Training: 100%|██████████| 286/286 [00:39<00:00,  7.19it/s]

              precision    recall  f1-score   support

        IT과학       0.85      0.80      0.82       965
          경제       0.85      0.83      0.84      1244
          사회       0.76      0.82      0.79      1472
        생활문화       0.91      0.88      0.90      1187
          세계       0.93      0.94      0.94      1525
         스포츠       0.96      0.98      0.97      1386
          정치       0.93      0.92      0.93      1351

    accuracy                           0.89      9130
   macro avg       0.89      0.88      0.88      9130
weighted avg       0.89      0.89      0.89      9130

[0.8        0.82636656 0.81589674 0.88374052 0.93836066 0.98196248
 0.9215396 ]
VALID ACC : 0.8857612267250822, VALID LOSS : 0.7943695729429071
{'epoch': 6, 'train_loss': 1.084105588091234, 'train_acc': 0.7570364691709561, 'valid_acc': 0.8857612267250822, 'val_loss': 0.7943695729429071, 'learning_rate': 1e-05}
Start Training: Epoch 7



Training:   0%|          | 1/1142 [00:00<08:05,  2.35it/s]

Training steps: 0 Loss: 1.121766209602356


Training:   9%|▉         | 101/1142 [00:42<07:19,  2.37it/s]

Training steps: 100 Loss: 0.9721468687057495


Training:  18%|█▊        | 201/1142 [01:25<06:27,  2.43it/s]

Training steps: 200 Loss: 1.0765835046768188


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.43it/s]

Training steps: 300 Loss: 1.0327636003494263


Training:  35%|███▌      | 401/1142 [02:49<05:36,  2.20it/s]

Training steps: 400 Loss: 1.0732265710830688


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 0.959136962890625


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.039607048034668


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.1077215671539307


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 1.3361554145812988


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.38it/s]

Training steps: 900 Loss: 1.0436285734176636


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.2939399480819702


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 0.9860920310020447


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7611159785346622, TRAIN LOSS : 1.0741525188010128



Training:   1%|          | 2/286 [00:00<00:41,  6.78it/s]

Validation steps: 0 Loss: 0.614198625087738


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.16it/s]

Validation steps: 100 Loss: 0.7984444499015808


Training:  71%|███████   | 202/286 [00:28<00:11,  7.14it/s]

Validation steps: 200 Loss: 0.6527944803237915


Training: 100%|██████████| 286/286 [00:39<00:00,  7.18it/s]

              precision    recall  f1-score   support

        IT과학       0.84      0.87      0.85       965
          경제       0.90      0.77      0.83      1244
          사회       0.75      0.82      0.78      1472
        생활문화       0.91      0.88      0.89      1187
          세계       0.91      0.94      0.93      1525
         스포츠       0.96      0.98      0.97      1386
          정치       0.94      0.91      0.92      1351

    accuracy                           0.89      9130
   macro avg       0.89      0.88      0.88      9130
weighted avg       0.89      0.89      0.89      9130

[0.87253886 0.77491961 0.81793478 0.88374052 0.94491803 0.98268398
 0.90599556]
VALID ACC : 0.8856516976998905, VALID LOSS : 0.7968914402531577
{'epoch': 7, 'train_loss': 1.0741525188010128, 'train_acc': 0.7611159785346622, 'valid_acc': 0.8856516976998905, 'val_loss': 0.7968914402531577, 'learning_rate': 1e-05}
Start Training: Epoch 8



Training:   0%|          | 1/1142 [00:00<08:04,  2.36it/s]

Training steps: 0 Loss: 1.2579227685928345


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.37it/s]

Training steps: 100 Loss: 1.0491957664489746


Training:  18%|█▊        | 201/1142 [01:25<06:30,  2.41it/s]

Training steps: 200 Loss: 0.9862645864486694


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.42it/s]

Training steps: 300 Loss: 1.1149399280548096


Training:  35%|███▌      | 401/1142 [02:50<05:37,  2.20it/s]

Training steps: 400 Loss: 0.9907383322715759


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 0.9261845350265503


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.1528338193893433


Training:  61%|██████▏   | 701/1142 [04:57<03:01,  2.42it/s]

Training steps: 700 Loss: 0.9330336451530457


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 0.9554386734962463


Training:  79%|███████▉  | 901/1142 [06:22<01:41,  2.37it/s]

Training steps: 900 Loss: 1.1216015815734863


Training:  88%|████████▊ | 1001/1142 [07:04<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.007842779159546


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.0764188766479492


Training: 100%|██████████| 1142/1142 [08:04<00:00,  2.36it/s]

TRAIN ACC : 0.7635253531924214, TRAIN LOSS : 1.0727213576253367



Training:   1%|          | 2/286 [00:00<00:42,  6.69it/s]

Validation steps: 0 Loss: 0.6147387027740479


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.14it/s]

Validation steps: 100 Loss: 0.9095252752304077


Training:  71%|███████   | 202/286 [00:28<00:11,  7.17it/s]

Validation steps: 200 Loss: 0.6258584856987


Training: 100%|██████████| 286/286 [00:39<00:00,  7.19it/s]

              precision    recall  f1-score   support

        IT과학       0.82      0.85      0.84       965
          경제       0.88      0.76      0.82      1244
          사회       0.73      0.82      0.78      1472
        생활문화       0.90      0.89      0.90      1187
          세계       0.94      0.93      0.93      1525
         스포츠       0.97      0.98      0.97      1386
          정치       0.94      0.90      0.92      1351

    accuracy                           0.88      9130
   macro avg       0.88      0.88      0.88      9130
weighted avg       0.88      0.88      0.88      9130

[0.84974093 0.76286174 0.82472826 0.89300758 0.92721311 0.98124098
 0.90081421]
VALID ACC : 0.8799561883899233, VALID LOSS : 0.7990949008014653
{'epoch': 8, 'train_loss': 1.0727213576253367, 'train_acc': 0.7635253531924214, 'valid_acc': 0.8799561883899233, 'val_loss': 0.7990949008014653, 'learning_rate': 1e-05}
Start Training: Epoch 9



Training:   0%|          | 1/1142 [00:00<08:07,  2.34it/s]

Training steps: 0 Loss: 1.0855506658554077


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.37it/s]

Training steps: 100 Loss: 1.0330837965011597


Training:  18%|█▊        | 201/1142 [01:25<06:30,  2.41it/s]

Training steps: 200 Loss: 0.9557839632034302


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.43it/s]

Training steps: 300 Loss: 0.9830977320671082


Training:  35%|███▌      | 401/1142 [02:50<05:38,  2.19it/s]

Training steps: 400 Loss: 0.9685961604118347


Training:  44%|████▍     | 501/1142 [03:32<04:29,  2.37it/s]

Training steps: 500 Loss: 1.0710482597351074


Training:  53%|█████▎    | 601/1142 [04:14<03:44,  2.41it/s]

Training steps: 600 Loss: 1.0139857530593872


Training:  61%|██████▏   | 701/1142 [04:57<03:01,  2.43it/s]

Training steps: 700 Loss: 1.0176187753677368


Training:  70%|███████   | 801/1142 [05:39<02:34,  2.20it/s]

Training steps: 800 Loss: 0.9966492056846619


Training:  79%|███████▉  | 901/1142 [06:22<01:41,  2.37it/s]

Training steps: 900 Loss: 1.1424951553344727


Training:  88%|████████▊ | 1001/1142 [07:04<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.1204049587249756


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.0647920370101929


Training: 100%|██████████| 1142/1142 [08:04<00:00,  2.36it/s]

TRAIN ACC : 0.7730259555360859, TRAIN LOSS : 1.06277934546646



Training:   1%|          | 2/286 [00:00<00:42,  6.66it/s]

Validation steps: 0 Loss: 0.6053617596626282


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.14it/s]

Validation steps: 100 Loss: 0.9703829884529114


Training:  71%|███████   | 202/286 [00:28<00:11,  7.18it/s]

Validation steps: 200 Loss: 0.6310319900512695


Training: 100%|██████████| 286/286 [00:39<00:00,  7.18it/s]


              precision    recall  f1-score   support

        IT과학       0.81      0.89      0.85       965
          경제       0.86      0.81      0.84      1244
          사회       0.82      0.77      0.79      1472
        생활문화       0.91      0.89      0.90      1187
          세계       0.92      0.94      0.93      1525
         스포츠       0.96      0.98      0.97      1386
          정치       0.92      0.93      0.92      1351

    accuracy                           0.89      9130
   macro avg       0.89      0.89      0.89      9130
weighted avg       0.89      0.89      0.89      9130

[0.89430052 0.81430868 0.7669837  0.89132266 0.9442623  0.98051948
 0.92968172]
VALID ACC : 0.8891566265060241, VALID LOSS : 0.8077613490444797
{'epoch': 9, 'train_loss': 1.06277934546646, 'train_acc': 0.7730259555360859, 'valid_acc': 0.8891566265060241, 'val_loss': 0.8077613490444797, 'learning_rate': 1e-05}
saving model ...
Start Training: Epoch 10


Training:   0%|          | 1/1142 [00:00<08:24,  2.26it/s]

Training steps: 0 Loss: 0.9395243525505066


Training:   9%|▉         | 101/1142 [00:42<07:20,  2.36it/s]

Training steps: 100 Loss: 1.0917853116989136


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 1.161475658416748


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.44it/s]

Training steps: 300 Loss: 0.9635429382324219


Training:  35%|███▌      | 401/1142 [02:50<05:36,  2.20it/s]

Training steps: 400 Loss: 1.2407811880111694


Training:  44%|████▍     | 501/1142 [03:32<04:29,  2.38it/s]

Training steps: 500 Loss: 1.0446439981460571


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.033254861831665


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.0001590251922607


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 1.0461140871047974


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.38it/s]

Training steps: 900 Loss: 1.017965316772461


Training:  88%|████████▊ | 1001/1142 [07:04<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.0618144273757935


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.1274855136871338


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7773792574745373, TRAIN LOSS : 1.0585840678987486



Training:   1%|          | 2/286 [00:00<00:42,  6.68it/s]

Validation steps: 0 Loss: 0.6342999339103699


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.17it/s]

Validation steps: 100 Loss: 0.8880009651184082


Training:  71%|███████   | 202/286 [00:28<00:11,  7.13it/s]

Validation steps: 200 Loss: 0.6567099690437317


Training: 100%|██████████| 286/286 [00:39<00:00,  7.18it/s]

              precision    recall  f1-score   support

        IT과학       0.84      0.85      0.84       965
          경제       0.87      0.80      0.84      1244
          사회       0.75      0.82      0.78      1472
        생활문화       0.91      0.89      0.90      1187
          세계       0.92      0.94      0.93      1525
         스포츠       0.97      0.97      0.97      1386
          정치       0.94      0.90      0.92      1351

    accuracy                           0.88      9130
   macro avg       0.89      0.88      0.88      9130
weighted avg       0.89      0.88      0.89      9130

[0.84766839 0.80466238 0.81793478 0.88542544 0.94229508 0.97474747
 0.9015544 ]
VALID ACC : 0.8849945235487404, VALID LOSS : 0.8016025207259438
{'epoch': 10, 'train_loss': 1.0585840678987486, 'train_acc': 0.7773792574745373, 'valid_acc': 0.8849945235487404, 'val_loss': 0.8016025207259438, 'learning_rate': 1e-05}
Start Training: Epoch 11



Training:   0%|          | 1/1142 [00:00<08:07,  2.34it/s]

Training steps: 0 Loss: 0.9735787510871887


Training:   9%|▉         | 101/1142 [00:42<07:19,  2.37it/s]

Training steps: 100 Loss: 1.0550789833068848


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 1.1591458320617676


Training:  26%|██▋       | 301/1142 [02:07<05:46,  2.43it/s]

Training steps: 300 Loss: 0.9933837056159973


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 0.9672327041625977


Training:  44%|████▍     | 501/1142 [03:32<04:29,  2.38it/s]

Training steps: 500 Loss: 1.0682772397994995


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 0.9222873449325562


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.0345361232757568


Training:  70%|███████   | 801/1142 [05:39<02:35,  2.20it/s]

Training steps: 800 Loss: 1.0843186378479004


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.38it/s]

Training steps: 900 Loss: 1.1723682880401611


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.1584608554840088


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.42it/s]

Training steps: 1100 Loss: 1.0525633096694946


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.779405322527653, TRAIN LOSS : 1.0563630120349223



Training:   1%|          | 2/286 [00:00<00:42,  6.75it/s]

Validation steps: 0 Loss: 0.612227737903595


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.12it/s]

Validation steps: 100 Loss: 0.9698807001113892


Training:  71%|███████   | 202/286 [00:28<00:11,  7.17it/s]

Validation steps: 200 Loss: 0.646316647529602


Training: 100%|██████████| 286/286 [00:39<00:00,  7.17it/s]

              precision    recall  f1-score   support

        IT과학       0.84      0.83      0.83       965
          경제       0.84      0.82      0.83      1244
          사회       0.79      0.79      0.79      1472
        생활문화       0.91      0.90      0.90      1187
          세계       0.92      0.94      0.93      1525
         스포츠       0.97      0.98      0.97      1386
          정치       0.91      0.93      0.92      1351

    accuracy                           0.89      9130
   macro avg       0.88      0.88      0.88      9130
weighted avg       0.89      0.89      0.89      9130

[0.82590674 0.81993569 0.78668478 0.89553496 0.94295082 0.97619048
 0.93042191]
VALID ACC : 0.8856516976998905, VALID LOSS : 0.8095339797176682
{'epoch': 11, 'train_loss': 1.0563630120349223, 'train_acc': 0.779405322527653, 'valid_acc': 0.8856516976998905, 'val_loss': 0.8095339797176682, 'learning_rate': 1e-05}
Start Training: Epoch 12



Training:   0%|          | 1/1142 [00:00<08:01,  2.37it/s]

Training steps: 0 Loss: 1.0743263959884644


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.37it/s]

Training steps: 100 Loss: 1.0887305736541748


Training:  18%|█▊        | 201/1142 [01:25<06:30,  2.41it/s]

Training steps: 200 Loss: 1.0517115592956543


Training:  26%|██▋       | 301/1142 [02:07<05:47,  2.42it/s]

Training steps: 300 Loss: 1.031341552734375


Training:  35%|███▌      | 401/1142 [02:50<05:37,  2.20it/s]

Training steps: 400 Loss: 1.0202716588974


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.2267471551895142


Training:  53%|█████▎    | 601/1142 [04:14<03:44,  2.40it/s]

Training steps: 600 Loss: 1.0020817518234253


Training:  61%|██████▏   | 701/1142 [04:57<03:01,  2.43it/s]

Training steps: 700 Loss: 0.8868597149848938


Training:  70%|███████   | 801/1142 [05:39<02:34,  2.20it/s]

Training steps: 800 Loss: 1.010035514831543


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.0644840002059937


Training:  88%|████████▊ | 1001/1142 [07:04<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.109841227531433


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 0.9292000532150269


Training: 100%|██████████| 1142/1142 [08:04<00:00,  2.36it/s]

TRAIN ACC : 0.7830467637717665, TRAIN LOSS : 1.0506200006985205



Training:   1%|          | 2/286 [00:00<00:42,  6.69it/s]

Validation steps: 0 Loss: 0.6528319120407104


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.24it/s]

Validation steps: 100 Loss: 0.8431469798088074


Training:  71%|███████   | 202/286 [00:28<00:11,  7.23it/s]

Validation steps: 200 Loss: 0.6555547118186951


Training: 100%|██████████| 286/286 [00:39<00:00,  7.21it/s]

              precision    recall  f1-score   support

        IT과학       0.84      0.83      0.83       965
          경제       0.85      0.81      0.83      1244
          사회       0.74      0.83      0.78      1472
        생활문화       0.93      0.87      0.90      1187
          세계       0.93      0.93      0.93      1525
         스포츠       0.97      0.98      0.97      1386
          정치       0.93      0.90      0.92      1351

    accuracy                           0.88      9130
   macro avg       0.88      0.88      0.88      9130
weighted avg       0.88      0.88      0.88      9130

[0.82590674 0.80948553 0.828125   0.873631   0.92918033 0.97546898
 0.89859363]
VALID ACC : 0.8809419496166484, VALID LOSS : 0.8047186882345827
{'epoch': 12, 'train_loss': 1.0506200006985205, 'train_acc': 0.7830467637717665, 'valid_acc': 0.8809419496166484, 'val_loss': 0.8047186882345827, 'learning_rate': 1e-05}
Start Training: Epoch 13



Training:   0%|          | 1/1142 [00:00<08:00,  2.38it/s]

Training steps: 0 Loss: 1.0480655431747437


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.38it/s]

Training steps: 100 Loss: 1.0529614686965942


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 0.952349841594696


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.43it/s]

Training steps: 300 Loss: 1.1328086853027344


Training:  35%|███▌      | 401/1142 [02:49<05:36,  2.20it/s]

Training steps: 400 Loss: 0.9423181414604187


Training:  44%|████▍     | 501/1142 [03:32<04:30,  2.37it/s]

Training steps: 500 Loss: 1.0040897130966187


Training:  53%|█████▎    | 601/1142 [04:14<03:43,  2.42it/s]

Training steps: 600 Loss: 1.1214295625686646


Training:  61%|██████▏   | 701/1142 [04:56<03:01,  2.43it/s]

Training steps: 700 Loss: 1.0683571100234985


Training:  70%|███████   | 801/1142 [05:39<02:34,  2.21it/s]

Training steps: 800 Loss: 1.0085363388061523


Training:  79%|███████▉  | 901/1142 [06:21<01:41,  2.37it/s]

Training steps: 900 Loss: 1.008617639541626


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.42it/s]

Training steps: 1000 Loss: 1.0619730949401855


Training:  96%|█████████▋| 1101/1142 [07:46<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.0660834312438965


Training: 100%|██████████| 1142/1142 [08:03<00:00,  2.36it/s]

TRAIN ACC : 0.7851275873398313, TRAIN LOSS : 1.0463267532935703



Training:   1%|          | 2/286 [00:00<00:42,  6.66it/s]

Validation steps: 0 Loss: 0.6027292609214783


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.19it/s]

Validation steps: 100 Loss: 0.9574429988861084


Training:  71%|███████   | 202/286 [00:28<00:11,  7.19it/s]

Validation steps: 200 Loss: 0.6336156129837036


Training: 100%|██████████| 286/286 [00:39<00:00,  7.20it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.88      0.84       965
          경제       0.84      0.81      0.83      1244
          사회       0.80      0.78      0.79      1472
        생활문화       0.91      0.89      0.90      1187
          세계       0.92      0.93      0.92      1525
         스포츠       0.96      0.98      0.97      1386
          정치       0.92      0.91      0.92      1351

    accuracy                           0.88      9130
   macro avg       0.88      0.88      0.88      9130
weighted avg       0.88      0.88      0.88      9130

[0.88290155 0.80948553 0.78192935 0.89216512 0.92721311 0.97979798
 0.90969652]
VALID ACC : 0.8838992332968236, VALID LOSS : 0.8133452153289235
{'epoch': 13, 'train_loss': 1.0463267532935703, 'train_acc': 0.7851275873398313, 'valid_acc': 0.8838992332968236, 'val_loss': 0.8133452153289235, 'learning_rate': 1e-05}
Start Training: Epoch 14



Training:   0%|          | 1/1142 [00:00<08:04,  2.36it/s]

Training steps: 0 Loss: 1.103049397468567


Training:   9%|▉         | 101/1142 [00:42<07:18,  2.37it/s]

Training steps: 100 Loss: 1.0888687372207642


Training:  18%|█▊        | 201/1142 [01:25<06:29,  2.42it/s]

Training steps: 200 Loss: 1.044801115989685


Training:  26%|██▋       | 301/1142 [02:07<05:45,  2.43it/s]

Training steps: 300 Loss: 1.001362919807434


Training:  35%|███▌      | 401/1142 [02:49<05:35,  2.21it/s]

Training steps: 400 Loss: 1.0763064622879028


Training:  44%|████▍     | 501/1142 [03:32<04:29,  2.38it/s]

Training steps: 500 Loss: 1.0651822090148926


Training:  53%|█████▎    | 601/1142 [04:14<03:42,  2.43it/s]

Training steps: 600 Loss: 1.0574277639389038


Training:  61%|██████▏   | 701/1142 [04:56<03:00,  2.44it/s]

Training steps: 700 Loss: 1.0016722679138184


Training:  70%|███████   | 801/1142 [05:38<02:34,  2.20it/s]

Training steps: 800 Loss: 1.011946439743042


Training:  79%|███████▉  | 901/1142 [06:20<01:41,  2.38it/s]

Training steps: 900 Loss: 1.0148452520370483


Training:  88%|████████▊ | 1001/1142 [07:03<00:58,  2.43it/s]

Training steps: 1000 Loss: 1.0333516597747803


Training:  96%|█████████▋| 1101/1142 [07:45<00:16,  2.43it/s]

Training steps: 1100 Loss: 1.1261786222457886


Training: 100%|██████████| 1142/1142 [08:02<00:00,  2.36it/s]

TRAIN ACC : 0.7867703427883036, TRAIN LOSS : 1.0434445638059526



Training:   1%|          | 2/286 [00:00<00:42,  6.65it/s]

Validation steps: 0 Loss: 0.6481903791427612


Training:  36%|███▌      | 102/286 [00:14<00:25,  7.26it/s]

Validation steps: 100 Loss: 0.9740315079689026


Training:  71%|███████   | 202/286 [00:27<00:11,  7.30it/s]

Validation steps: 200 Loss: 0.6577467322349548


Training: 100%|██████████| 286/286 [00:39<00:00,  7.29it/s]

              precision    recall  f1-score   support

        IT과학       0.81      0.87      0.84       965
          경제       0.82      0.84      0.83      1244
          사회       0.81      0.77      0.79      1472
        생활문화       0.92      0.89      0.90      1187
          세계       0.93      0.93      0.93      1525
         스포츠       0.97      0.97      0.97      1386
          정치       0.91      0.93      0.92      1351

    accuracy                           0.89      9130
   macro avg       0.88      0.88      0.88      9130
weighted avg       0.89      0.89      0.88      9130

[0.86632124 0.83601286 0.76970109 0.88542544 0.92590164 0.97402597
 0.93190229]
VALID ACC : 0.8851040525739321, VALID LOSS : 0.8223328354892198
{'epoch': 14, 'train_loss': 1.0434445638059526, 'train_acc': 0.7867703427883036, 'valid_acc': 0.8851040525739321, 'val_loss': 0.8223328354892198, 'learning_rate': 1e-05}
EarlyStopping counter: 5 out of 5
************************************************** auc_




In [11]:
torch.cuda.empty_cache()

## Inference

In [12]:
def inference_main():
    args = parse_args()
    args.model_name = "temp"
    preprocess = Preprocess(args)
    preprocess.load_test_data()
    test_data = preprocess.test_data

    print(f"size of test data : {len(test_data)}")
    torch.cuda.empty_cache()
    # del model
    inference(args, test_data)

inference_main()

size of test data : 9131
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_1.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'cla

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_1.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:38<00:00,  7.34it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/cnn_activation/output_1.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_2.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'cla

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_2.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:39<00:00,  7.28it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/cnn_activation/output_2.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_3.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'cla

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_3.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:39<00:00,  7.26it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/cnn_activation/output_3.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_4.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'cla

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_4.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:39<00:00,  7.22it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/cnn_activation/output_4.csv
Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_5.pt


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weight', 'cla

Loading Model from: /content/drive/MyDrive/KLUE_TC/models/temp_5.pt ...Finished.


Inferencing: 100%|██████████| 286/286 [00:39<00:00,  7.27it/s]


writing prediction : /content/drive/MyDrive/KLUE_TC/output/cnn_activation/output_5.csv
writing prediction : /content/drive/MyDrive/KLUE_TC/output/cnn_activation/output_softvote.csv
