In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
# 释放缓存
#!apt install psmisc
#!sudo fuser /dev/nvidia*
#!kill -9 pid
!nvidia-smi 

Thu Jan 27 03:21:52 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
# set path
import os
os.chdir("/content/gdrive/My Drive/nlp_program")
print(os.path.abspath('.'))

/content/gdrive/My Drive/nlp_program


In [4]:
# install packages
!pip install folium
!pip3 install torch torchvision
!pip install transformers

Collecting transformers
  Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)
[K     |████████████████████████████████| 3.4 MB 1.8 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 60.2 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 47.9 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 66.6 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 6.7 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  A

### Roberta

In [5]:
import numpy as np
import random
import pandas as pd
import json, time 
from tqdm import tqdm 
from sklearn.metrics import f1_score, classification_report
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import RobertaTokenizer, RobertaModel, AdamW, get_linear_schedule_with_warmup
import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#bert_path = 'bert_model/' #该文件夹下存放三个文件（'vocab.txt', 'pytorch_model.bin', 'config.json'）
# 初始化tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', do_lower_case = False)

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [6]:
def set_seed(seed):
    random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
set_seed(123)

小样本数据处理 data_helper

In [None]:
"""
data process
"""

import os
import json
import random
import copy
from collections import Counter
from itertools import chain
from typing import Dict, Tuple, Optional, List, Union
from transformers import RobertaTokenizer
import gensim
import numpy as np
import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader
# 初始化tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', do_lower_case = False)

class InductionData(object):
    def __init__(self, sequence_length = 256, num_classes = 5, num_support = 10,
                 num_queries = 50, num_tasks = 1000, num_eval_tasks = 100):
        """
        init method
        :param num_classes: number of support class
        :param num_support: number of support sample per class
        :param num_queries: number of query sample per class
        :param num_tasks: number of pre-sampling tasks, this will speeding up train
        :param num_eval_tasks: number of pre-sampling tasks in eval stage
        """
        self.__sequence_length = sequence_length
        self.__num_classes = num_classes
        self.__num_support = num_support
        self.__num_queries = num_queries
        self.__num_tasks = num_tasks
        self.__num_eval_tasks = num_eval_tasks


    def load_data(data_path):
        """
        read train_set/eval_set/test_set
        :param data_path:
        :return: {file_name: {label: dataframe, ...}, ...}
        """
        category_files = os.listdir(data_path)
        categories_data = {}

        for category_file in category_files:
            file_path = os.path.join(data_path, category_file)
            sentiment_data = {}
            with open(file_path, "r", encoding="utf8") as fr:
                for line in fr.readlines():
                    temp = list(line.strip().split("\t"))[-1]
                    cls = temp.split(',')[0]
                    sent = ' '.join([cls, temp[len(cls)+2:-1]])
                    sent = sent.strip()
                    label = int(temp[-1])
                    
                    # 随机拆分成两组标签
                    if label % 2 == 0:
                        label1 = label // 2
                    elif label == 1:
                        label1 = random.randint(0,1)
                    else:
                        label1 = random.randint(1,2)
                    label2 = label - label1

                    encoded_dict = tokenizer.encode_plus(
                                text = sent,          # 输入文本
                                add_special_tokens = True,   # 添加'[CLS]'和'[SEP]'
                                max_length = self.__sequence_length,       # 填充/截断长度
                                padding = 'max_length',
                                truncation = True
                            )
                    input_ids = encoded_dict['input_ids']
                    input_masks = encoded_dict['attention_mask']

                    content = {}
                    if sentiment_data.get(label, None):
                        sentiment_data[label]['sent'].append(sent)
                        sentiment_data[label]['ids'].append(input_ids)
                        sentiment_data[label]['masks'].append(input_masks)
                        sentiment_data[label]['label1'].append(label1)
                        sentiment_data[label]['label2'].append(label2)
                    else:
                        sentiment_data[label] = {}
                        sentiment_data[label]['sent'] = [sent]
                        sentiment_data[label]['ids'] = [input_ids]
                        sentiment_data[label]['masks'] = [input_masks]
                        sentiment_data[label]['label1'] = [label1]
                        sentiment_data[label]['label2'] = [label2]
            

            print("task name: ", category_file)
            for i in range(5):
                # 每类的样本整合
                sentiment_data[i] = pd.DataFrame(sentiment_data[i])
                print(i, "pos samples length: ", sentiment_data[i].shape[0])
            categories_data[category_file] = sentiment_data
        return categories_data

    def choice_support_query(self, task_data):
        """
        randomly selecting support set, query set form a task.
        :param task_data: all data for a task
        """
        support_ids = []  # [num_classes, num_support, ]
        support_masks = []
        support_label1 = []
        suuport_label2 = []

        query_ids = []  # [num_classes * num_queries, ]
        query_masks = []
        labels = []

        # 每类不放回抽样
        for i in range(5):
            samples_i = task_data[i]
            all_idx = list(range(len(samples_i)))
            s_idx = np.random.choice(np.array(all_idx), self.__num_support, replace=False)
            support_i = samples_i.loc[s_idx, :]

            
            [all_idx.remove(k) for k in s_idx.tolist()]
            query_idx = np.random.choice(np.array(all_idx), self.__num_queries, replace=False)
            query_i = samples_i.loc[query_idx, :]

            support_ids.append(support_i.ids.tolist())
            support_masks.append(support_i.masks.tolist())
            support_label1.append(support_i.label1.tolist())
            support_label2.append(support_i.label2.tolist())

            query_ids += query_i.ids.tolist()
            query_masks += query_i.masks.tolist()
            labels += [i*len(query_i)]
        support_set = TensorDataset(torch.LongTensor(support_ids),
                        torch.LongTensor(support_masks),
                        torch.LongTensor(support_label1),
                        torch.LongTensor(support_labels2))
        query_set = TensorDataset(torch.LongTensor(query_ids),
                        torch.LongTensor(query_masks))
        labels = TensorDataset(torch.LongTensor(labels))

        return support_set, query_set, labels

    def samples(self, data, f_name, is_training):
        """
        c_class sample from raw data
        """
        tasks = []
        if is_training:
            num_tasks = self.__num_tasks
        else:
            num_tasks = self.__num_eval_tasks
        for i in range(num_tasks):
            # use train_set to construct train sample
            # if eval: k=1, if test: k=2
            support_category = category_list[f_name]
            support_set, query_set, labels = self.choice_support_query(data[support_category])
            tasks.append(dict(support=support_set, queries=query_set, labels=labels))
        return tasks

    def next_batch(self, data, f_name, is_training):
        """
        train a task at every turn
        """

        tasks = self.samples(data, f_name, is_training)

        for task in tasks:
            yield task


In [None]:
data_obj = InductionData()
data = data_obj.load_data('./dontpatronizeme_pcl/')
#train:
for train_batch in data_obj.next_batch(data, f_name = 'train_set.pcl.csv', is_training = True):

#eval:
for eval_batch in data_obj.next_batch(data, f_name = 'val_set.pcl.csv', is_training = False):

#test:
for test_batch in data_obj.next_batch(data, f_name = 'test_set.pcl.csv', is_training = False):

In [9]:
# Model with extra layers on top of Roberta
class Roberta_Model(nn.Module):
  def __init__(self, bert_path, classes = 5):
    super(Roberta_Model, self).__init__()
    self.roberta = RobertaModel.from_pretrained(bert_path)
    self.c1 = nn.Linear(768, 768)
    self.c2 = nn.Linear(768, 768)
    self.l1 = nn.Linear(768, 3)
    self.l2 = nn.Linear(768, 3)
    self.fc = nn.Linear(3*2, classes)

  def forward(self, input_ids, attention_masks=None):
    outputs = self.roberta(input_ids, attention_masks)
    out_pool = outputs[1]
    # try: 拉开输入到分类器的embedding差距
    linear1 = self.c1(out_pool)
    linear2 = self.c2(out_pool)

    logit1 = self.l1(linear1)
    logit2 = self.l2(linear2)
    context_combine = torch.cat((logit1, logit2), dim=-1)
    # 五分类
    logits = self.fc(context_combine)
    return logit1, logit2, logits

In [10]:
# 从预训练模型实例化BertForSequenceClassification
def get_parameter_number(model):
    #  打印模型参数量级
    total_num = sum(p.numel() for p in model.roberta.parameters())
    trainable_num = sum(p.numel() for p in model.roberta.parameters() if p.requires_grad)
    return 'Total parameters: {}, Trainable parameters: {}'.format(total_num, trainable_num)

model = Roberta_Model('roberta-base').to(device)
print(get_parameter_number(model))

Downloading:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Total parameters: 124645632, Trainable parameters: 124645632


In [11]:
epochs = 100
# 定义AdamW优化器
optimizer = AdamW(model.parameters(), lr = 2e-5)
# 学习率调度器: 线性warmup一个epoch，再线性下降
scheduler = get_linear_schedule_with_warmup(optimizer,
                      num_warmup_steps=len(train_loader),
                      num_training_steps=epochs*len(train_loader))

# 定义评估指标F1(Macro F1 & Micro F1)
def F1_Score(preds, labels, average):
  """
  average = 'macro'/'micro'
  """
  pred_flat = np.argmax(preds, axis=1).flatten()
  labels_flat = labels.flatten()
  return f1_score(pred_flat, labels_flat, average=average)


几种损失函数

In [12]:
#=====================Focal Loss===============================#
from torch.autograd import Variable
class FocalLoss(nn.Module):
    """
        This criterion is a implemenation of Focal Loss, which is proposed in 
        Focal Loss for Dense Object Detection.

            Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])

        The losses are averaged across observations for each minibatch.

        Args:
            alpha(1D Tensor, Variable) : the scalar factor for this criterion
            gamma(float, double) : gamma > 0; reduces the relative loss for well-classiﬁed examples (p > .5), 
                                   putting more focus on hard, misclassiﬁed examples
            size_average(bool): By default, the losses are averaged over observations for each minibatch.
                                However, if the field size_average is set to False, the losses are
                                instead summed for each minibatch.


    """
    def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
        super(FocalLoss, self).__init__()
        if alpha is None:
            self.alpha = Variable(torch.ones(class_num, 1))
        else:
            if isinstance(alpha, Variable):
                self.alpha = alpha
            else:
                self.alpha = Variable(alpha)
        self.gamma = gamma
        self.class_num = class_num
        self.size_average = size_average

    def forward(self, inputs, targets):
        N = inputs.size(0)
        C = inputs.size(1)
        P = F.softmax(inputs)

        class_mask = inputs.data.new(N, C).fill_(0)
        class_mask = Variable(class_mask)
        ids = targets.view(-1, 1)
        class_mask.scatter_(1, ids.data, 1.)
        #print(class_mask)


        if inputs.is_cuda and not self.alpha.is_cuda:
            self.alpha = self.alpha.cuda()
        alpha = self.alpha[ids.data.view(-1)]

        probs = (P*class_mask).sum(1).view(-1,1)

        log_p = probs.log()
        #print('probs size= {}'.format(probs.size()))
        #print(probs)

        batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p 
        #print('-----bacth_loss------')
        #print(batch_loss)


        if self.size_average:
            loss = batch_loss.mean()
        else:
            loss = batch_loss.sum()
        return loss

In [13]:
#=====================Dice Loss===============================#
class BinaryDiceLoss(nn.Module):
  def __init__(self):
    super(BinaryDiceLoss, self).__init__()
	
  def forward(self, input, target):
    N = target.size(0)
    smooth = 1

    input_flat = input.view(N, -1)
    target_flat = target.view(N, -1)

    intersection = input_flat * target_flat

    dice_eff = (2*intersection.sum(1) + smooth) / (input_flat.sum(1) + target_flat.sum(1) + smooth)
    loss = 1 - dice_eff.sum() / N
    return loss

class DiceLoss(nn.Module):
  """Dice loss
  Args:
      predict: A tensor of shape [N, C, *]
      target: A tensor of same shape with predict
  Return:
      same as BinaryDiceLoss
  """
  def __init__(self):
    super(DiceLoss, self).__init__()

  def forward(self, predict, target):
    target = F.one_hot(target.long(), predict.shape[1])
    assert predict.shape == target.shape, 'predict & target shape do not match'
    dice = BinaryDiceLoss()
    total_loss = 0
    predict = F.softmax(predict, dim=1)

    for i in range(predict.shape[1]):
      dice_loss = dice(predict[:, i], target[:, i])
      total_loss += dice_loss

    return total_loss/predict.shape[1]

早停策略

In [14]:
import numpy as np
import torch

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [15]:
# 定义验证集评估函数
def evaluate(model, data_loader, device):
  # 设置模型为评估模式 (主要针对BN层 & DropOut层)
  model.eval()
  #criterion = nn.CrossEntropyLoss()
  criterion1 = FocalLoss(3)
  criterion2 = FocalLoss(5)
  criterion3 = DiceLoss()
  # Tracking variables
  total_f1_macro = 0
  total_f1_micro = 0
  total_loss = 0
  with torch.no_grad():
    for idx, (ids, masks, labels, labels_1, labels_2) in enumerate(data_loader):
      # batch inputs加载到gpu中
      ids = ids.to(device)
      masks = masks.to(device)
      labels = labels.to(device)
      labels_1 = labels_1.to(device)
      labels_2 = labels_2.to(device)

      logits_1, logits_2, logits = model(ids, masks)
      loss = 0.4*criterion2(logits, labels) + 0.6*criterion1(logits_1, labels_1) + 0.6*criterion1(logits_2, labels_2)
      # 累加loss
      total_loss += loss.item()
      # 将y_pred和标签加载到cpu中计算
      y_pred = logits.detach().cpu().numpy()
      y = labels.to('cpu').numpy()
      # 计算F1准确率
      total_f1_macro += F1_Score(y_pred, y, 'macro')
      total_f1_micro += F1_Score(y_pred, y, 'micro')

  avg_f1_macro = total_f1_macro / len(data_loader)
  avg_f1_micro = total_f1_micro / len(data_loader)
  avg_loss = total_loss / len(data_loader)
  return avg_f1_macro, avg_f1_micro, avg_loss

# 定义预测函数
def predict(model, data_loader, device):
  model.eval()
  y_preds = []
  with torch.no_grad():
    for idx, (ids, masks) in enumerate(data_loader):
      _, _, logits = model(ids.to(device), masks.to(device))
      y_pred = torch.argmax(logits, dim=1).detach().cpu().numpy().tolist()
      y_preds.extend(y_pred)
  return y_preds

In [16]:
# 计时器设置
import time
import datetime
def format_time(elapsed):
  """
  Takes a time in seconds and return a string hh:mm:ss
  """
  elapsed_rounded = int(round(elapsed))
  return str(datetime.timedelta(seconds=elapsed_rounded))

In [17]:
def train(model, 
      train_loader, 
      val_loader,
      optimizer,
      scheduler,
      device,
      epochs):
  """
  Train the ROBERTa model
  """
  #criterion = nn.CrossEntropyLoss()
  criterion1 = FocalLoss(3)
  criterion2 = FocalLoss(5)
  criterion3 = DiceLoss()
  best_f1_macro = 0.0
  # set early stopping
  early_stopping = EarlyStopping(patience=20, verbose=True)
  # 整体训练时长
  total_t0 = time.time()

  for i in range(epochs):
    ##########################################
    #        Training        #
    ##########################################
    print("******** Running training epoch {:}/{:} ********".format(i+1, epochs))
    
    t0 = time.time()  
    model.train()    
    train_loss_sum = 0.0
    
    for step, (ids, masks, labels, labels_1, labels_2) in enumerate(train_loader):
      ids = ids.to(device)
      masks = masks.to(device)
      labels = labels.to(device)
      labels_1 = labels_1.to(device)
      labels_2 = labels_2.to(device)
      
      optimizer.zero_grad()         # 梯度初始化为零
      logits_1, logits_2, logits = model(ids, masks) # forward
      loss = 0.4*criterion2(logits, labels) + 0.6*criterion1(logits_1, labels_1) + 0.6*criterion1(logits_2, labels_2)
      train_loss_sum += loss.item()

      loss.backward()                   # backward
      nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # 梯度剪裁
      optimizer.step()                   # 更新参数
      scheduler.step()                   # 更新学习率

      # 经过 len(train_loader)//5 次迭代, 打印进度条
      if (step+1) % (len(train_loader)//5) == 0:
        elapsed = format_time(time.time() - t0)
        print("   Epoch {:} | Step {:}/{:} | Loss {:.4f} | Time {:}".\
              format(i+1, step+1, len(train_loader), train_loss_sum/(step+1), elapsed))
    
    train_loss_avg = train_loss_sum / len(train_loader)
    epoch_time = format_time(time.time() - t0)
    print("")
    print("   Average training loss: {:.4f}".format(train_loss_avg))
    print("   Training epoch time: {:}".format(epoch_time))

    ##########################################
    #               Validation               #
    ##########################################
    print("")
    print("Running Validation...")
    t0 = time.time()
    model.eval()
    val_f1_macro, val_f1_micro, val_loss = evaluate(model, val_loader, device)
    val_time = format_time(time.time()-t0)

    if val_f1_macro > best_f1_macro:
      best_f1_macro = val_f1_macro
      torch.save(model.state_dict(), "best_roberta_model.pth")

    print("   F1_macro: {:.2f}".format(val_f1_macro))
    print("   F1_micro: {:.2f}".format(val_f1_micro))
    print("   Validation Loss: {:.4f}".format(val_loss))
    print("   Validation time: {:}".format(val_time))

    # early_stopping needs the validation loss to check if it has decresed, 
    # and if it has, it will make a checkpoint of the current model
    early_stopping(val_loss, model)
    
    if early_stopping.early_stop:
        print("Early stopping")
        break
  
  print("")
  print("Training completed!")
  print("Total training time: {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))

In [18]:
train(model, train_loader, val_loader, optimizer, scheduler, device, epochs)

******** Running training epoch 1/100 ********
   Epoch 1 | Step 69/349 | Loss 1.0290 | Time 0:01:12
   Epoch 1 | Step 138/349 | Loss 0.8408 | Time 0:02:25
   Epoch 1 | Step 207/349 | Loss 0.7395 | Time 0:03:39
   Epoch 1 | Step 276/349 | Loss 0.6658 | Time 0:04:53
   Epoch 1 | Step 345/349 | Loss 0.6151 | Time 0:06:07

   Average training loss: 0.6127
   Training epoch time: 0:06:11

Running Validation...
   F1_macro: 0.38
   F1_micro: 0.74
   Validation Loss: 0.3912
   Validation time: 0:00:16
Validation loss decreased (inf --> 0.391211).  Saving model ...
******** Running training epoch 2/100 ********
   Epoch 2 | Step 69/349 | Loss 0.4384 | Time 0:01:14
   Epoch 2 | Step 138/349 | Loss 0.4151 | Time 0:02:28
   Epoch 2 | Step 207/349 | Loss 0.3933 | Time 0:03:42
   Epoch 2 | Step 276/349 | Loss 0.3836 | Time 0:04:56
   Epoch 2 | Step 345/349 | Loss 0.3801 | Time 0:06:10

   Average training loss: 0.3804
   Training epoch time: 0:06:14

Running Validation...
   F1_macro: 0.35
   F1_m

In [19]:
# 加载最优模型进行测试
model.load_state_dict(torch.load("best_roberta_model.pth"))
test_pred = predict(model, test_loader, device)
test_labels = test_df.labels

test_f1_macro = f1_score(test_pred, test_labels, average='macro')
test_f1_micro = f1_score(test_pred, test_labels, average='micro')
print("test_F1_macro: {:.2f}".format(test_f1_macro))
print("test_F1_micro: {:.2f}".format(test_f1_micro))

test_F1_macro: 0.42
test_F1_micro: 0.79
