<a href="https://colab.research.google.com/github/AnhVietPham/Text-Mining/blob/main/VihealthBert_NER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install torchcrf

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install pytorch-crf==0.7.2

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install seqeval==0.0.12

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import torch.nn as nn
from torchcrf import CRF
from transformers.models.roberta.modeling_roberta import RobertaPreTrainedModel, RobertaModel
import copy
import json
import logging
import os
import torch
from torch.utils.data import TensorDataset
import numpy as np
import argparse
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.tensorboard import SummaryWriter
from tqdm.auto import tqdm, trange
from transformers import AdamW, get_linear_schedule_with_warmup
import random
from seqeval.metrics import f1_score, precision_score, recall_score, classification_report

from transformers import (
    AutoTokenizer,
    RobertaConfig
)

In [None]:
class SlotClassifier(nn.Module):
    def __init__(
        self,
        input_dim,
        num_slot_labels,
        dropout_rate=0.0,
    ):
        super(SlotClassifier, self).__init__()
        self.num_slot_labels = num_slot_labels
        self.dropout = nn.Dropout(dropout_rate)
        self.linear = nn.Linear(input_dim, num_slot_labels)

    def forward(self, x):
        x = self.dropout(x)
        return self.linear(x)

In [None]:
class phoBERT(RobertaPreTrainedModel):
    def __init__(self, config, args, slot_label_lst):
        super(phoBERT, self).__init__(config)
        self.args = args
        self.num_slot_labels = len(slot_label_lst)
        self.roberta = RobertaModel(config)  # Load pretrained bert

        self.slot_classifier = SlotClassifier(
            config.hidden_size,
            self.num_slot_labels,
            args.dropout_rate,
        )

        if args.use_crf:
            self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)

    def forward(self, input_ids, attention_mask, token_type_ids=None, slot_labels_ids=None):
        outputs = self.roberta(
            input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids
        )  # sequence_output, pooled_output, (hidden_states), (attentions)
        sequence_output = outputs[0]
        pooled_output = outputs[1]  # [CLS]

        
        slot_logits = self.slot_classifier(sequence_output)

        total_loss = 0

        # 2. Slot Softmax
        if slot_labels_ids is not None:
            if self.args.use_crf:
                slot_loss = self.crf(slot_logits, slot_labels_ids, mask=attention_mask.byte(), reduction="mean")
                slot_loss = -1 * slot_loss  # negative log-likelihood
            else:
                slot_loss_fct = nn.CrossEntropyLoss(ignore_index=self.args.ignore_index)
                # Only keep active parts of the loss
                if attention_mask is not None:
                    active_loss = attention_mask.view(-1) == 1
                    active_logits = slot_logits.view(-1, self.num_slot_labels)[active_loss]
                    active_labels = slot_labels_ids.view(-1)[active_loss]
                    slot_loss = slot_loss_fct(active_logits, active_labels)
                else:
                    slot_loss = slot_loss_fct(slot_logits.view(-1, self.num_slot_labels), slot_labels_ids.view(-1))
            total_loss += slot_loss

        outputs = ((slot_logits),) + outputs[2:]  # add hidden states and attention if they are here

        outputs = (total_loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions) # Logits is a tuple of intent and slot logits

In [None]:
class ViHnBERT(RobertaPreTrainedModel):
    def __init__(self, config, args, slot_label_lst):
        super(ViHnBERT, self).__init__(config)
        self.args = args
        self.num_slot_labels = len(slot_label_lst)
        self.roberta = RobertaModel(config)  # Load pretrained bert

        self.slot_classifier = SlotClassifier(
            config.hidden_size,
            self.num_slot_labels,
            args.dropout_rate,
        )

        if args.use_crf:
            self.crf = CRF(num_tags=self.num_slot_labels, batch_first=True)

    def forward(self, input_ids, attention_mask, token_type_ids=None, slot_labels_ids=None):
        outputs = self.roberta(
            input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids
        )  # sequence_output, pooled_output, (hidden_states), (attentions)
        sequence_output = outputs[0]
        pooled_output = outputs[1]  # [CLS]

        
        slot_logits = self.slot_classifier(sequence_output)

        total_loss = 0

        # 2. Slot Softmax
        if slot_labels_ids is not None:
            if self.args.use_crf:
                slot_loss = self.crf(slot_logits, slot_labels_ids, mask=attention_mask.byte(), reduction="mean")
                slot_loss = -1 * slot_loss  # negative log-likelihood
            else:
                slot_loss_fct = nn.CrossEntropyLoss(ignore_index=self.args.ignore_index)
                # Only keep active parts of the loss
                if attention_mask is not None:
                    active_loss = attention_mask.view(-1) == 1
                    active_logits = slot_logits.view(-1, self.num_slot_labels)[active_loss]
                    active_labels = slot_labels_ids.view(-1)[active_loss]
                    slot_loss = slot_loss_fct(active_logits, active_labels)
                else:
                    slot_loss = slot_loss_fct(slot_logits.view(-1, self.num_slot_labels), slot_labels_ids.view(-1))
            total_loss += slot_loss

        outputs = ((slot_logits),) + outputs[2:]  # add hidden states and attention if they are here

        outputs = (total_loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions) # Logits is a tuple of intent and slot logits

In [None]:
logger = logging.getLogger(__name__)


class InputExample(object):
    """
    A single training/test example for simple sequence classification.

    Args:
        guid: Unique id for the example.
        words: list. The words of the sequence.
        intent_label: (Optional) string. The intent label of the example.
        slot_labels: (Optional) list. The slot labels of the example.
    """

    def __init__(self, guid, words, slot_labels=None):
        self.guid = guid
        self.words = words
        self.slot_labels = slot_labels

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string."""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"


class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self, input_ids, attention_mask, token_type_ids, slot_labels_ids):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.token_type_ids = token_type_ids
        self.slot_labels_ids = slot_labels_ids

    def __repr__(self):
        return str(self.to_json_string())

    def to_dict(self):
        """Serializes this instance to a Python dictionary."""
        output = copy.deepcopy(self.__dict__)
        return output

    def to_json_string(self):
        """Serializes this instance to a JSON string."""
        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"


class JointProcessor(object):
    """Processor for the JointBERT data set """

    def __init__(self, args):
        self.args = args
        self.slot_labels = get_slot_labels(args)

        self.input_text_file = "seq.in"
        self.slot_labels_file = "seq.out"

    @classmethod
    def _read_file(cls, input_file, quotechar=None):
        """Reads a tab separated value file."""
        with open(input_file, "r", encoding="utf-8") as f:
            lines = []
            for line in f:
                lines.append(line.strip())
            return lines

    def _create_examples(self, texts, slots, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for i, (text, slot) in enumerate(zip(texts, slots)):
            guid = "%s-%s" % (set_type, i)
            # 1. input_text
            words = text.split()  # Some are spaced twice

            # 2. slot
            slot_labels = []
            for s in slot.split(" "):
                slot_labels.append(self.slot_labels.index(s) if s in self.slot_labels else self.slot_labels.index("O"))
            try:
                assert len(words) == len(slot_labels)
            except:
                print(i)
                print(words)
                print(slot_labels)
                print(len(words))
                print(len(slot_labels))
            examples.append(InputExample(guid=guid, words=words, slot_labels=slot_labels))
        return examples

    def get_examples(self, args, mode):
        """
        Args:
            mode: train, dev, test
        """
        data_path = os.path.join(self.args.data_dir, self.args.token_level, mode)
        logger.info("LOOKING AT {}".format(data_path))
        return self._create_examples(
            texts=self._read_file(os.path.join(data_path, self.input_text_file)),
            slots=self._read_file(os.path.join(data_path, self.slot_labels_file)),
            set_type=mode,
        )        
        
processors = JointProcessor


def convert_examples_to_features(
    examples,
    max_seq_len,
    tokenizer,
    pad_token_label_id=-100,
    cls_token_segment_id=0,
    pad_token_segment_id=0,
    sequence_a_segment_id=0,
    mask_padding_with_zero=True,
):
    # Setting based on the current model type
    cls_token = tokenizer.cls_token
    sep_token = tokenizer.sep_token
    unk_token = tokenizer.unk_token
    pad_token_id = tokenizer.pad_token_id

    features = []
    for (ex_index, example) in enumerate(examples):
        if ex_index % 5000 == 0:
            logger.info("Writing example %d of %d" % (ex_index, len(examples)))

        # Tokenize word by word (for NER)
        tokens = []
        slot_labels_ids = []
        for word, slot_label in zip(example.words, example.slot_labels):
            word_tokens = tokenizer.tokenize(word)
            if not word_tokens:
                word_tokens = [unk_token]  # For handling the bad-encoded word
            tokens.extend(word_tokens)
            # Use the real label id for the first token of the word, and padding ids for the remaining tokens
            slot_labels_ids.extend([int(slot_label)] + [pad_token_label_id] * (len(word_tokens) - 1))

        # Account for [CLS] and [SEP]
        special_tokens_count = 2
        if len(tokens) > max_seq_len - special_tokens_count:
            tokens = tokens[: (max_seq_len - special_tokens_count)]
            slot_labels_ids = slot_labels_ids[: (max_seq_len - special_tokens_count)]

        # Add [SEP] token
        tokens += [sep_token]
        slot_labels_ids += [pad_token_label_id]
        token_type_ids = [sequence_a_segment_id] * len(tokens)

        # Add [CLS] token
        tokens = [cls_token] + tokens
        slot_labels_ids = [pad_token_label_id] + slot_labels_ids
        token_type_ids = [cls_token_segment_id] + token_type_ids

        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.
        attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

        # Zero-pad up to the sequence length.
        padding_length = max_seq_len - len(input_ids)
        input_ids = input_ids + ([pad_token_id] * padding_length)
        attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
        token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)
        slot_labels_ids = slot_labels_ids + ([pad_token_label_id] * padding_length)

        assert len(input_ids) == max_seq_len, "Error with input length {} vs {}".format(len(input_ids), max_seq_len)
        assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(
            len(attention_mask), max_seq_len
        )
        assert len(token_type_ids) == max_seq_len, "Error with token type length {} vs {}".format(
            len(token_type_ids), max_seq_len
        )
        assert len(slot_labels_ids) == max_seq_len, "Error with slot labels length {} vs {}".format(
            len(slot_labels_ids), max_seq_len
        )

        if ex_index < 5:
            logger.info("*** Example ***")
            logger.info("guid: %s" % example.guid)
            logger.info("tokens: %s" % " ".join([str(x) for x in tokens]))
            logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
            logger.info("attention_mask: %s" % " ".join([str(x) for x in attention_mask]))
            logger.info("token_type_ids: %s" % " ".join([str(x) for x in token_type_ids]))
            logger.info("slot_labels: %s" % " ".join([str(x) for x in slot_labels_ids]))

        features.append(
            InputFeatures(
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids,
                slot_labels_ids=slot_labels_ids,
            )
        )

    return features

def load_and_cache_examples(args, tokenizer, mode):
    processor = processors(args)

    # Load data features from cache or dataset file
    cached_features_file = os.path.join(
        args.data_dir,
        "cached_{}_{}_{}_{}".format(
            mode, args.token_level, list(filter(None, args.model_name_or_path.split("/"))).pop(), args.max_seq_len
        ),
    )

    if os.path.exists(cached_features_file):
        logger.info("Loading features from cached file %s", cached_features_file)
        features = torch.load(cached_features_file)
    else:
        # Load data features from dataset file
        logger.info("Creating features from dataset file at %s", args.data_dir)
        if mode == "train":
            examples = processor.get_examples(args, "train")
        elif mode == "dev":
            examples = processor.get_examples(args, "dev")
        elif mode == "test":
            examples = processor.get_examples(args, "test")
        else:
            raise Exception("For mode, Only train, dev, test is available")

        # Use cross entropy ignore index as padding label id so that only real label ids contribute to the loss later
        pad_token_label_id = args.ignore_index
        features = convert_examples_to_features(
            examples, args.max_seq_len, tokenizer, pad_token_label_id=pad_token_label_id
        )
        logger.info("Saving features into cached file %s", cached_features_file)
        torch.save(features, cached_features_file)

    # Convert to Tensors and build dataset
     
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
    all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
    all_slot_labels_ids = torch.tensor([f.slot_labels_ids for f in features], dtype=torch.long)

    dataset = TensorDataset(
        all_input_ids, all_attention_mask, all_token_type_ids, all_slot_labels_ids
    )
    return dataset

In [None]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""

    def __init__(self, patience=7, verbose=False):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model, args):
        if args.tuning_metric == "loss":
            score = -val_loss
        else:
            score = val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, args)
        elif score < self.best_score:
            self.counter += 1
            print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, args)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, args):
        """Saves model when validation loss decreases or accuracy/f1 increases."""
        if self.verbose:
            if args.tuning_metric == "loss":
                print(f"Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...")
            else:
                print(
                    f"{args.tuning_metric} increased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ..."
                )
        model.save_pretrained(args.model_dir)
        torch.save(args, os.path.join(args.model_dir, "training_args.bin"))
        self.val_loss_min = val_loss

        # # Save model checkpoint (Overwrite)
        # if not os.path.exists(self.args.model_dir):
        #     os.makedirs(self.args.model_dir)
        # model_to_save = self.model.module if hasattr(self.model, 'module') else self.model
        # model_to_save.save_pretrained(self.args.model_dir)

        # # Save training arguments together with the trained model
        # torch.save(self.args, os.path.join(self.args.model_dir, 'training_args.bin'))
        # logger.info("Saving model checkpoint to %s", self.args.model_dir)

In [None]:
logger = logging.getLogger(__name__)


class Trainer(object):
    def __init__(self, args, train_dataset=None, dev_dataset=None, test_dataset=None):
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset

        self.slot_label_lst = get_slot_labels(args)
        # Use cross entropy ignore index as padding label id so that only real label ids contribute to the loss later
        self.pad_token_label_id = args.ignore_index
        self.config_class, self.model_class, _ = MODEL_CLASSES[args.model_type]
        # self.config = self.config_class.from_pretrained(model_path, finetuning_task=args.task)

        if args.pretrained:
            self.config = self.config_class.from_pretrained(args.pretrained_path, finetuning_task=args.token_level)
            self.model = self.model_class.from_pretrained(
                args.pretrained_path,
                config=self.config,
                args=args,
                slot_label_lst=self.slot_label_lst,
            )
        else:
            self.config = self.config_class.from_pretrained(args.model_name_or_path, finetuning_task=args.token_level)
            self.model = self.model_class.from_pretrained(
                args.model_name_or_path,
                config=self.config,
                args=args,
                slot_label_lst=self.slot_label_lst,
            )
        # GPU or CPU
        torch.cuda.set_device(self.args.gpu_id)
        print('GPU ID :',self.args.gpu_id)
        print('Cuda device:',torch.cuda.current_device())
        self.device = args.device

        self.model.to(self.device)
        model_parameters =  sum(p.numel() for p in self.model.parameters() if p.requires_grad)  
        print('#params:',model_parameters)

    def train(self):
        train_sampler = RandomSampler(self.train_dataset)
        train_dataloader = DataLoader(self.train_dataset, sampler=train_sampler, batch_size=self.args.train_batch_size)
        writer = SummaryWriter(log_dir=self.args.model_dir)
        if self.args.max_steps > 0:
            t_total = self.args.max_steps
            self.args.num_train_epochs = (
                self.args.max_steps // (len(train_dataloader) // self.args.gradient_accumulation_steps) + 1
            )
        else:
            t_total = len(train_dataloader) // self.args.gradient_accumulation_steps * self.args.num_train_epochs

        results = self.evaluate("dev")
        print(results)
        results = self.evaluate("test")
        print(results)
        # Prepare optimizer and schedule (linear warmup and decay)
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": self.args.weight_decay,
            },
            {
                "params": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=self.args.learning_rate, eps=self.args.adam_epsilon)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=self.args.warmup_steps, num_training_steps=t_total
        )

        # Train!
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(self.train_dataset))
        logger.info("  Num Epochs = %d", self.args.num_train_epochs)
        logger.info("  Total train batch size = %d", self.args.train_batch_size)
        logger.info("  Gradient Accumulation steps = %d", self.args.gradient_accumulation_steps)
        logger.info("  Total optimization steps = %d", t_total)
        logger.info("  Logging steps = %d", self.args.logging_steps)
        logger.info("  Save steps = %d", self.args.save_steps)

        global_step = 0
        tr_loss = 0.0
        self.model.zero_grad()

        train_iterator = trange(int(self.args.num_train_epochs), desc="Epoch")
        early_stopping = EarlyStopping(patience=self.args.early_stopping, verbose=True)

        for _ in train_iterator:
            epoch_iterator = tqdm(train_dataloader, desc="Iteration", position=0, leave=True)
            print("\nEpoch", _)

            for step, batch in enumerate(epoch_iterator):
                self.model.train()
                batch = tuple(t.to(self.device) for t in batch)  # GPU or CPU

                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "slot_labels_ids": batch[3],
                }
                if self.args.model_type != "distilbert":
                    inputs["token_type_ids"] = batch[2]
                outputs = self.model(**inputs)
                loss = outputs[0]

                if self.args.gradient_accumulation_steps > 1:
                    loss = loss / self.args.gradient_accumulation_steps

                loss.backward()

                tr_loss += loss.item()
                if (step + 1) % self.args.gradient_accumulation_steps == 0:
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm)

                    optimizer.step()
                    scheduler.step()  # Update learning rate schedule
                    self.model.zero_grad()
                    global_step += 1

                    if self.args.logging_steps > 0 and global_step % self.args.logging_steps == 0:
                        print("\nTuning metrics:", self.args.tuning_metric)
                        results = self.evaluate("dev")
                        writer.add_scalar("Loss/validation", results["loss"], _)
                        writer.add_scalar("Slot F1/validation", results["slot_f1"], _)
                        early_stopping(results[self.args.tuning_metric], self.model, self.args)
                        if early_stopping.early_stop:
                            print("Early stopping")
                            break

                    # if self.args.save_steps > 0 and global_step % self.args.save_steps == 0:
                    #     self.save_model()

                if 0 < self.args.max_steps < global_step:
                    epoch_iterator.close()
                    break

            if 0 < self.args.max_steps < global_step or early_stopping.early_stop:
                train_iterator.close()
                break
            writer.add_scalar("Loss/train", tr_loss / global_step, _)

        return global_step, tr_loss / global_step

    def write_evaluation_result(self, out_file, results):
        out_file = self.args.model_dir + "/" + out_file
        w = open(out_file, "w", encoding="utf-8")
        w.write("***** Eval results *****\n")
        for key in sorted(results.keys()):
            to_write = " {key} = {value}".format(key=key, value=str(results[key]))
            w.write(to_write)
            w.write("\n")
        w.close()

    def evaluate(self, mode):
        if mode == "test":
            dataset = self.test_dataset
        elif mode == "dev":
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        
        slot_preds = None
        out_slot_labels_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "slot_labels_ids": batch[3],
                }
                if self.args.model_type != "distilbert":
                    inputs["token_type_ids"] = batch[2]
                outputs = self.model(**inputs)
                tmp_eval_loss, (slot_logits) = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1


            # Slot prediction
            if slot_preds is None:
                if self.args.use_crf:
                    # decode() in `torchcrf` returns list with best index directly
                    slot_preds = np.array(self.model.crf.decode(slot_logits))
                else:
                    slot_preds = slot_logits.detach().cpu().numpy()

                out_slot_labels_ids = inputs["slot_labels_ids"].detach().cpu().numpy()
            else:
                if self.args.use_crf:
                    slot_preds = np.append(slot_preds, np.array(self.model.crf.decode(slot_logits)), axis=0)
                else:
                    slot_preds = np.append(slot_preds, slot_logits.detach().cpu().numpy(), axis=0)

                out_slot_labels_ids = np.append(
                    out_slot_labels_ids, inputs["slot_labels_ids"].detach().cpu().numpy(), axis=0
                )

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}

        # Slot result
        if not self.args.use_crf:
            slot_preds = np.argmax(slot_preds, axis=2)
        slot_label_map = {i: label for i, label in enumerate(self.slot_label_lst)}
        out_slot_label_list = [[] for _ in range(out_slot_labels_ids.shape[0])]
        slot_preds_list = [[] for _ in range(out_slot_labels_ids.shape[0])]

        for i in range(out_slot_labels_ids.shape[0]):
            for j in range(out_slot_labels_ids.shape[1]):
                if out_slot_labels_ids[i, j] != self.pad_token_label_id:
                    out_slot_label_list[i].append(slot_label_map[out_slot_labels_ids[i][j]])
                    slot_preds_list[i].append(slot_label_map[slot_preds[i][j]])

        total_result = compute_metrics(slot_preds_list, out_slot_label_list)
        results.update(total_result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))
        if mode == "test":
            self.write_evaluation_result("eval_test_results.txt", results)
        elif mode == "dev":
            self.write_evaluation_result("eval_dev_results.txt", results)
        return results

    def save_model(self):
        # Save model checkpoint (Overwrite)
        if not os.path.exists(self.args.model_dir):
            os.makedirs(self.args.model_dir)
        model_to_save = self.model.module if hasattr(self.model, "module") else self.model
        model_to_save.save_pretrained(self.args.model_dir)

        # Save training arguments together with the trained model
        torch.save(self.args, os.path.join(self.args.model_dir, "training_args.bin"))
        logger.info("Saving model checkpoint to %s", self.args.model_dir)

    def load_model(self):
        # Check whether model exists
        if not os.path.exists(self.args.model_dir):
            raise Exception("Model doesn't exists! Train first!")

        try:
            self.model = self.model_class.from_pretrained(
                self.args.model_dir,
                config=self.config,
                args=self.args,
                slot_label_lst=self.slot_label_lst,
            )
            self.model.to(self.device)
            logger.info("***** Model Loaded *****")
        except Exception:
            raise Exception("Some model files might be missing...")

In [None]:
MODEL_CLASSES = {
    "vihnbert": (RobertaConfig, ViHnBERT, AutoTokenizer),
    "phobert": (RobertaConfig, phoBERT, AutoTokenizer)
}

MODEL_PATH_MAP = {
    "vihnbert": "demdecuong/vihealthbert-base-word",
    "phobert": "vinai/phobert-base"
}

def get_slot_labels(args):
    return [
        label.strip()
        for label in open(os.path.join(args.data_dir, args.slot_label_file), "r", encoding="utf-8")
    ]

def load_tokenizer(args):
    return MODEL_CLASSES[args.model_type][2].from_pretrained(args.model_name_or_path)


def init_logger():
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
    )

def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if not args.no_cuda and torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)

def compute_metrics(slot_preds, slot_labels):
    assert len(slot_preds) == len(slot_labels)
    results = {}
    slot_result = get_slot_metrics(slot_preds, slot_labels)
    results.update(slot_result)
    return results

def get_slot_metrics(preds, labels):
    assert len(preds) == len(labels)
    print(classification_report(labels, preds, digits=4))
    return {
        "slot_precision": precision_score(labels, preds),
        "slot_recall": recall_score(labels, preds),
        "slot_f1": f1_score(labels, preds),
    }

In [None]:
def main(args):
    init_logger()
    set_seed(args)
    tokenizer = load_tokenizer(args)

    train_dataset = load_and_cache_examples(args, tokenizer, mode="train")
    dev_dataset = load_and_cache_examples(args, tokenizer, mode="dev")
    test_dataset = load_and_cache_examples(args, tokenizer, mode="test")

    trainer = Trainer(args, train_dataset, dev_dataset, test_dataset)
    
    if args.do_train:
        trainer.train()

    if args.do_eval:
        trainer.load_model()
        trainer.evaluate("dev")
        trainer.evaluate("test")

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument("--model_dir", default="/content/drive/MyDrive/Luận Văn Thạc Sĩ/data-vihealbert/ner/model-save", type=str, help="Path to save, load model")
parser.add_argument("--data_dir", default="/content/drive/MyDrive/Luận Văn Thạc Sĩ/data-vihealbert/ner", type=str, help="The input data dir")
parser.add_argument("--slot_label_file", default="/content/drive/MyDrive/Luận Văn Thạc Sĩ/data-vihealbert/ner/slot_labels.txt", type=str, help="Slot Label file")

parser.add_argument(
        "--model_type",
        default="vihnbert",
        type=str,
        help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()),
)
parser.add_argument("--tuning_metric", default="loss", type=str, help="Metrics to tune when training")
parser.add_argument("--seed", type=int, default=1, help="random seed for initialization")
parser.add_argument("--train_batch_size", default=16, type=int, help="Batch size for training.")
parser.add_argument("--eval_batch_size", default=16, type=int, help="Batch size for evaluation.")
parser.add_argument(
        "--max_seq_len", default=70, type=int, help="The maximum total input sequence length after tokenization."
)
parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
parser.add_argument(
        "--num_train_epochs", default=10.0, type=float, help="Total number of training epochs to perform."
)
parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help="Number of updates steps to accumulate before performing a backward/update pass.",
)
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help="If > 0: set total number of training steps to perform. Override num_train_epochs.",
)
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
parser.add_argument("--dropout_rate", default=0.1, type=float, help="Dropout for fully-connected layers")

parser.add_argument("--logging_steps", type=int, default=200, help="Log every X updates steps.")
parser.add_argument("--save_steps", type=int, default=200, help="Save checkpoint every X updates steps.")

parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the test set.")

parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")

parser.add_argument(
        "--ignore_index",
        default=0,
        type=int,
        help="Specifies a target value that is ignored and does not contribute to the input gradient",
)

parser.add_argument(
        "--token_level",
        type=str,
        default="",
        help="Tokens are at syllable level or word level (Vietnamese) [word-level, syllable-level]",
)
parser.add_argument(
        "--early_stopping",
        type=int,
        default=50,
        help="Number of unincreased validation step to wait for early stopping",
)
parser.add_argument("--gpu_id", type=int, default=0, help="Select gpu id")
# CRF option
parser.add_argument("--use_crf", action="store_true", help="Whether to use CRF")
# init pretrained
parser.add_argument("--pretrained", action="store_true", help="Whether to init model from pretrained base model")
parser.add_argument("--pretrained_path", default="./viatis_xlmr_crf", type=str, help="The pretrained model path")
parser.add_argument('-f')

args = parser.parse_args()

args.model_name_or_path = MODEL_PATH_MAP[args.model_type]
args.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"

main(args)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of ViHnBERT were not initialized from the model checkpoint at demdecuong/vihealthbert-base-word and are newly initialized: ['slot_classifier.linear.weight', 'slot_classifier.linear.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPU ID : 0
Cuda device: 0
#params: 135014421


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.0035    0.0045    0.0040      1101
       ORGANIZATION     0.0005    0.0018    0.0007       548
     TRANSPORTATION     0.0019    0.0230    0.0035        87
           LOCATION     0.0151    0.0111    0.0128      2703
         PATIENT_ID     0.0198    0.0056    0.0087      1257
                JOB     0.0085    0.0640    0.0151       125
SYMPTOM_AND_DISEASE     0.0062    0.0052    0.0057       766
             GENDER     0.0420    0.4391    0.0767       271
                AGE     0.0000    0.0000    0.0000       349
               NAME     0.0000    0.0000    0.0000       186

          micro avg     0.0139    0.0238    0.0175      7393
          macro avg     0.0118    0.0238    0.0105      7393

{'loss': 3.033681131362915, 'slot_precision': 0.013870281346047758, 'slot_recall': 0.02380630325984039, 'slot_f1': 0.017528134647943436}


Evaluating:   0%|          | 0/188 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.0064    0.0085    0.0073      1641
SYMPTOM_AND_DISEASE     0.0080    0.0071    0.0075      1125
               NAME     0.0008    0.0032    0.0012       313
           LOCATION     0.0195    0.0137    0.0160      4395
         PATIENT_ID     0.0034    0.0010    0.0016      1955
     TRANSPORTATION     0.0012    0.0104    0.0022       193
             GENDER     0.0455    0.4574    0.0828       446
       ORGANIZATION     0.0000    0.0000    0.0000       770
                JOB     0.0052    0.0462    0.0093       173
                AGE     0.0013    0.0018    0.0015       564

          micro avg     0.0151    0.0259    0.0191     11575
          macro avg     0.0116    0.0259    0.0116     11575

{'loss': 3.035632256497728, 'slot_precision': 0.015066291683406991, 'slot_recall': 0.02591792656587473, 'slot_f1': 0.0190554832152952}


Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Iteration:   0%|          | 0/315 [00:00<?, ?it/s]


Epoch 0

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9937    0.9955    0.9946      1101
       ORGANIZATION     0.8606    0.9015    0.8806       548
     TRANSPORTATION     0.6800    0.7816    0.7273        87
           LOCATION     0.9489    0.9334    0.9411      2703
         PATIENT_ID     0.9872    0.9817    0.9844      1257
                JOB     1.0000    0.0160    0.0315       125
SYMPTOM_AND_DISEASE     0.9022    0.8668    0.8842       766
             GENDER     0.9049    0.9483    0.9261       271
                AGE     0.9942    0.9885    0.9914       349
               NAME     0.9833    0.9516    0.9672       186

          micro avg     0.9482    0.9279    0.9379      7393
          macro avg     0.9498    0.9279    0.9306      7393

Validation loss decreased (inf --> 0.100392).  Saving model ...


Iteration:   0%|          | 0/315 [00:00<?, ?it/s]


Epoch 1

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9946    0.9973    0.9959      1101
       ORGANIZATION     0.9229    0.9179    0.9204       548
     TRANSPORTATION     0.7647    0.8966    0.8254        87
           LOCATION     0.9541    0.9604    0.9572      2703
         PATIENT_ID     0.9896    0.9841    0.9868      1257
                JOB     0.6106    0.5520    0.5798       125
SYMPTOM_AND_DISEASE     0.9254    0.9073    0.9163       766
             GENDER     1.0000    0.9705    0.9850       271
                AGE     1.0000    0.9857    0.9928       349
               NAME     0.9677    0.9677    0.9677       186

          micro avg     0.9572    0.9554    0.9563      7393
          macro avg     0.9570    0.9554    0.9561      7393

Validation loss decreased (0.100392 --> 0.070731).  Saving model ...

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9910    0.9955    0.9932      1101
       ORGANIZATION     0.9335    0.8960    0.9143       548
     TRANSPORTATION     0.9222    0.9540    0.9379        87
           LOCATION     0.9552    0.9619    0.9585      2703
         PATIENT_ID     0.9921    0.9928    0.9924      1257
                JOB     0.8269    0.6880    0.7511       125
SYMPTOM_AND_DISEASE     0.9052    0.9099    0.9076       766
             GENDER     1.0000    0.9631    0.9812       271
                AGE     1.0000    0.9914    0.9957       349
               NAME     0.9524    0.9677    0.9600       186

          micro avg     0.9615    0.9587    0.9601      7393
          macro avg     0.9611    0.9587    0.9598      7393

Validation loss decreased (0.070731 --> 0.063839).  Saving model ...


Iteration:   0%|          | 0/315 [00:00<?, ?it/s]


Epoch 2

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9937    0.9964    0.9950      1101
       ORGANIZATION     0.9160    0.8157    0.8629       548
     TRANSPORTATION     0.9886    1.0000    0.9943        87
           LOCATION     0.9404    0.9578    0.9490      2703
         PATIENT_ID     0.9952    0.9912    0.9932      1257
                JOB     0.9700    0.7760    0.8622       125
SYMPTOM_AND_DISEASE     0.9188    0.8864    0.9023       766
             GENDER     1.0000    0.9742    0.9869       271
                AGE     1.0000    0.9857    0.9928       349
               NAME     0.9543    0.8978    0.9252       186

          micro avg     0.9603    0.9491    0.9547      7393
          macro avg     0.9600    0.9491    0.9541      7393

EarlyStopping counter: 1 out of 50


Iteration:   0%|          | 0/315 [00:00<?, ?it/s]


Epoch 3

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9909    0.9891    0.9900      1101
       ORGANIZATION     0.9102    0.9252    0.9176       548
     TRANSPORTATION     0.9775    1.0000    0.9886        87
           LOCATION     0.9601    0.9427    0.9513      2703
         PATIENT_ID     0.9936    0.9905    0.9920      1257
                JOB     0.9820    0.8720    0.9237       125
SYMPTOM_AND_DISEASE     0.9262    0.9008    0.9133       766
             GENDER     1.0000    0.9852    0.9926       271
                AGE     1.0000    0.9885    0.9942       349
               NAME     0.9503    0.9247    0.9373       186

          micro avg     0.9669    0.9548    0.9608      7393
          macro avg     0.9668    0.9548    0.9607      7393

Validation loss decreased (0.063839 --> 0.060499).  Saving model ...

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9946    0.9964    0.9955      1101
       ORGANIZATION     0.9312    0.8887    0.9094       548
     TRANSPORTATION     0.9886    1.0000    0.9943        87
           LOCATION     0.9558    0.9689    0.9623      2703
         PATIENT_ID     0.9952    0.9920    0.9936      1257
                JOB     0.9700    0.7760    0.8622       125
SYMPTOM_AND_DISEASE     0.9222    0.8825    0.9019       766
             GENDER     1.0000    0.9483    0.9735       271
                AGE     1.0000    0.9742    0.9869       349
               NAME     0.9781    0.9624    0.9702       186

          micro avg     0.9680    0.9585    0.9632      7393
          macro avg     0.9679    0.9585    0.9629      7393

EarlyStopping counter: 1 out of 50


Iteration:   0%|          | 0/315 [00:00<?, ?it/s]


Epoch 4

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9946    0.9955    0.9950      1101
       ORGANIZATION     0.9141    0.9124    0.9132       548
     TRANSPORTATION     1.0000    1.0000    1.0000        87
           LOCATION     0.9664    0.9571    0.9617      2703
         PATIENT_ID     0.9920    0.9920    0.9920      1257
                JOB     0.9213    0.6560    0.7664       125
SYMPTOM_AND_DISEASE     0.9141    0.8890    0.9014       766
             GENDER     1.0000    0.9668    0.9831       271
                AGE     1.0000    0.9857    0.9928       349
               NAME     0.9714    0.9140    0.9418       186

          micro avg     0.9686    0.9544    0.9614      7393
          macro avg     0.9682    0.9544    0.9609      7393

EarlyStopping counter: 2 out of 50


Iteration:   0%|          | 0/315 [00:00<?, ?it/s]


Epoch 5

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9945    0.9927    0.9936      1101
       ORGANIZATION     0.9307    0.9069    0.9187       548
     TRANSPORTATION     0.9560    1.0000    0.9775        87
           LOCATION     0.9653    0.9674    0.9664      2703
         PATIENT_ID     0.9928    0.9881    0.9904      1257
                JOB     0.9727    0.8560    0.9106       125
SYMPTOM_AND_DISEASE     0.9163    0.8721    0.8936       766
             GENDER     1.0000    0.9889    0.9944       271
                AGE     1.0000    0.9885    0.9942       349
               NAME     0.9774    0.9301    0.9532       186

          micro avg     0.9702    0.9597    0.9649      7393
          macro avg     0.9699    0.9597    0.9647      7393

EarlyStopping counter: 3 out of 50

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9955    0.9946    0.9950      1101
       ORGANIZATION     0.9170    0.9069    0.9119       548
     TRANSPORTATION     0.9775    1.0000    0.9886        87
           LOCATION     0.9666    0.9634    0.9650      2703
         PATIENT_ID     0.9920    0.9920    0.9920      1257
                JOB     0.9804    0.8000    0.8811       125
SYMPTOM_AND_DISEASE     0.9260    0.8825    0.9037       766
             GENDER     1.0000    0.9815    0.9907       271
                AGE     1.0000    0.9857    0.9928       349
               NAME     0.9721    0.9355    0.9534       186

          micro avg     0.9708    0.9590    0.9649      7393
          macro avg     0.9706    0.9590    0.9646      7393

EarlyStopping counter: 4 out of 50


Iteration:   0%|          | 0/315 [00:00<?, ?it/s]


Epoch 6

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9918    0.9927    0.9923      1101
       ORGANIZATION     0.9276    0.9124    0.9200       548
     TRANSPORTATION     0.9775    1.0000    0.9886        87
           LOCATION     0.9683    0.9597    0.9640      2703
         PATIENT_ID     0.9936    0.9920    0.9928      1257
                JOB     0.9811    0.8320    0.9004       125
SYMPTOM_AND_DISEASE     0.9169    0.8930    0.9048       766
             GENDER     1.0000    0.9815    0.9907       271
                AGE     1.0000    0.9799    0.9899       349
               NAME     0.9719    0.9301    0.9505       186

          micro avg     0.9710    0.9590    0.9650      7393
          macro avg     0.9708    0.9590    0.9648      7393

EarlyStopping counter: 5 out of 50

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9945    0.9918    0.9932      1101
       ORGANIZATION     0.9311    0.9124    0.9217       548
     TRANSPORTATION     0.9775    1.0000    0.9886        87
           LOCATION     0.9681    0.9663    0.9672      2703
         PATIENT_ID     0.9936    0.9920    0.9928      1257
                JOB     0.9623    0.8160    0.8831       125
SYMPTOM_AND_DISEASE     0.9164    0.8734    0.8944       766
             GENDER     1.0000    0.9779    0.9888       271
                AGE     1.0000    0.9885    0.9942       349
               NAME     0.9728    0.9624    0.9676       186

          micro avg     0.9714    0.9601    0.9657      7393
          macro avg     0.9711    0.9601    0.9654      7393

EarlyStopping counter: 6 out of 50


Iteration:   0%|          | 0/315 [00:00<?, ?it/s]


Epoch 7

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9954    0.9927    0.9941      1101
       ORGANIZATION     0.9194    0.9161    0.9177       548
     TRANSPORTATION     0.9775    1.0000    0.9886        87
           LOCATION     0.9672    0.9593    0.9632      2703
         PATIENT_ID     0.9920    0.9920    0.9920      1257
                JOB     0.9633    0.8400    0.8974       125
SYMPTOM_AND_DISEASE     0.9153    0.8890    0.9020       766
             GENDER     1.0000    0.9815    0.9907       271
                AGE     1.0000    0.9885    0.9942       349
               NAME     0.9727    0.9570    0.9648       186

          micro avg     0.9698    0.9600    0.9649      7393
          macro avg     0.9696    0.9600    0.9647      7393

EarlyStopping counter: 7 out of 50


Iteration:   0%|          | 0/315 [00:00<?, ?it/s]


Epoch 8

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9945    0.9927    0.9936      1101
       ORGANIZATION     0.9261    0.9142    0.9201       548
     TRANSPORTATION     0.9775    1.0000    0.9886        87
           LOCATION     0.9674    0.9674    0.9674      2703
         PATIENT_ID     0.9936    0.9928    0.9932      1257
                JOB     0.9722    0.8400    0.9013       125
SYMPTOM_AND_DISEASE     0.9087    0.8838    0.8961       766
             GENDER     1.0000    0.9815    0.9907       271
                AGE     1.0000    0.9885    0.9942       349
               NAME     0.9727    0.9570    0.9648       186

          micro avg     0.9700    0.9624    0.9662      7393
          macro avg     0.9698    0.9624    0.9660      7393

EarlyStopping counter: 8 out of 50

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9954    0.9927    0.9941      1101
       ORGANIZATION     0.9255    0.9288    0.9271       548
     TRANSPORTATION     0.9775    1.0000    0.9886        87
           LOCATION     0.9738    0.9634    0.9686      2703
         PATIENT_ID     0.9936    0.9897    0.9916      1257
                JOB     0.9630    0.8320    0.8927       125
SYMPTOM_AND_DISEASE     0.9147    0.8825    0.8983       766
             GENDER     1.0000    0.9815    0.9907       271
                AGE     1.0000    0.9857    0.9928       349
               NAME     0.9721    0.9355    0.9534       186

          micro avg     0.9729    0.9605    0.9666      7393
          macro avg     0.9727    0.9605    0.9665      7393

EarlyStopping counter: 9 out of 50


Iteration:   0%|          | 0/315 [00:00<?, ?it/s]


Epoch 9

Tuning metrics: loss


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9954    0.9927    0.9941      1101
       ORGANIZATION     0.9229    0.9179    0.9204       548
     TRANSPORTATION     0.9775    1.0000    0.9886        87
           LOCATION     0.9725    0.9667    0.9696      2703
         PATIENT_ID     0.9936    0.9897    0.9916      1257
                JOB     0.9633    0.8400    0.8974       125
SYMPTOM_AND_DISEASE     0.9185    0.8825    0.9001       766
             GENDER     1.0000    0.9815    0.9907       271
                AGE     1.0000    0.9857    0.9928       349
               NAME     0.9725    0.9516    0.9620       186

          micro avg     0.9726    0.9615    0.9670      7393
          macro avg     0.9724    0.9615    0.9668      7393

EarlyStopping counter: 10 out of 50


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9909    0.9891    0.9900      1101
       ORGANIZATION     0.9102    0.9252    0.9176       548
     TRANSPORTATION     0.9775    1.0000    0.9886        87
           LOCATION     0.9601    0.9427    0.9513      2703
         PATIENT_ID     0.9936    0.9905    0.9920      1257
                JOB     0.9820    0.8720    0.9237       125
SYMPTOM_AND_DISEASE     0.9262    0.9008    0.9133       766
             GENDER     1.0000    0.9852    0.9926       271
                AGE     1.0000    0.9885    0.9942       349
               NAME     0.9503    0.9247    0.9373       186

          micro avg     0.9669    0.9548    0.9608      7393
          macro avg     0.9668    0.9548    0.9607      7393



Evaluating:   0%|          | 0/188 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9909    0.9921    0.9915      1641
SYMPTOM_AND_DISEASE     0.9322    0.9040    0.9179      1125
               NAME     0.9073    0.9073    0.9073       313
           LOCATION     0.9529    0.9381    0.9454      4395
         PATIENT_ID     0.9857    0.9841    0.9849      1955
     TRANSPORTATION     0.9442    0.9637    0.9538       193
             GENDER     1.0000    0.9776    0.9887       446
       ORGANIZATION     0.9064    0.9182    0.9123       770
                JOB     0.8659    0.8208    0.8427       173
                AGE     0.9803    0.9681    0.9741       564

          micro avg     0.9592    0.9497    0.9545     11575
          macro avg     0.9592    0.9497    0.9544     11575



In [None]:
parser = argparse.ArgumentParser()
parser.add_argument("--model_dir", default="/content/drive/MyDrive/Luận Văn Thạc Sĩ/data-vihealbert/ner/model-save", type=str, help="Path to save, load model")
parser.add_argument("--data_dir", default="/content/drive/MyDrive/Luận Văn Thạc Sĩ/data-vihealbert/ner", type=str, help="The input data dir")
parser.add_argument("--slot_label_file", default="/content/drive/MyDrive/Luận Văn Thạc Sĩ/data-vihealbert/ner/slot_labels.txt", type=str, help="Slot Label file")

parser.add_argument(
        "--model_type",
        default="vihnbert",
        type=str,
        help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()),
)
parser.add_argument("--tuning_metric", default="loss", type=str, help="Metrics to tune when training")
parser.add_argument("--seed", type=int, default=1, help="random seed for initialization")
parser.add_argument("--train_batch_size", default=16, type=int, help="Batch size for training.")
parser.add_argument("--eval_batch_size", default=16, type=int, help="Batch size for evaluation.")
parser.add_argument(
        "--max_seq_len", default=70, type=int, help="The maximum total input sequence length after tokenization."
)
parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
parser.add_argument(
        "--num_train_epochs", default=10.0, type=float, help="Total number of training epochs to perform."
)
parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help="Number of updates steps to accumulate before performing a backward/update pass.",
)
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help="If > 0: set total number of training steps to perform. Override num_train_epochs.",
)
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
parser.add_argument("--dropout_rate", default=0.1, type=float, help="Dropout for fully-connected layers")

parser.add_argument("--logging_steps", type=int, default=200, help="Log every X updates steps.")
parser.add_argument("--save_steps", type=int, default=200, help="Save checkpoint every X updates steps.")

parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the test set.")

parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")

parser.add_argument(
        "--ignore_index",
        default=0,
        type=int,
        help="Specifies a target value that is ignored and does not contribute to the input gradient",
)

parser.add_argument(
        "--token_level",
        type=str,
        default="",
        help="Tokens are at syllable level or word level (Vietnamese) [word-level, syllable-level]",
)
parser.add_argument(
        "--early_stopping",
        type=int,
        default=50,
        help="Number of unincreased validation step to wait for early stopping",
)
parser.add_argument("--gpu_id", type=int, default=0, help="Select gpu id")
# CRF option
parser.add_argument("--use_crf", action="store_true", help="Whether to use CRF")
# init pretrained
parser.add_argument("--pretrained", action="store_true", help="Whether to init model from pretrained base model")
parser.add_argument("--pretrained_path", default="./viatis_xlmr_crf", type=str, help="The pretrained model path")
parser.add_argument('-f')

args = parser.parse_args()

args.model_name_or_path = MODEL_PATH_MAP[args.model_type]
args.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"

main(args)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of ViHnBERT were not initialized from the model checkpoint at demdecuong/vihealthbert-base-word and are newly initialized: ['slot_classifier.linear.weight', 'slot_classifier.linear.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


GPU ID : 0
Cuda device: 0
#params: 135014421


Evaluating:   0%|          | 0/125 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9909    0.9891    0.9900      1101
       ORGANIZATION     0.9102    0.9252    0.9176       548
     TRANSPORTATION     0.9775    1.0000    0.9886        87
           LOCATION     0.9601    0.9427    0.9513      2703
         PATIENT_ID     0.9936    0.9905    0.9920      1257
                JOB     0.9820    0.8720    0.9237       125
SYMPTOM_AND_DISEASE     0.9262    0.9008    0.9133       766
             GENDER     1.0000    0.9852    0.9926       271
                AGE     1.0000    0.9885    0.9942       349
               NAME     0.9503    0.9247    0.9373       186

          micro avg     0.9669    0.9548    0.9608      7393
          macro avg     0.9668    0.9548    0.9607      7393



Evaluating:   0%|          | 0/188 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

               DATE     0.9909    0.9921    0.9915      1641
SYMPTOM_AND_DISEASE     0.9322    0.9040    0.9179      1125
               NAME     0.9073    0.9073    0.9073       313
           LOCATION     0.9529    0.9381    0.9454      4395
         PATIENT_ID     0.9857    0.9841    0.9849      1955
     TRANSPORTATION     0.9442    0.9637    0.9538       193
             GENDER     1.0000    0.9776    0.9887       446
       ORGANIZATION     0.9064    0.9182    0.9123       770
                JOB     0.8659    0.8208    0.8427       173
                AGE     0.9803    0.9681    0.9741       564

          micro avg     0.9592    0.9497    0.9545     11575
          macro avg     0.9592    0.9497    0.9544     11575

