In [2]:
import logging
import os
import sys
from dataclasses import dataclass, field
from typing import Optional, Union
import random

import datasets
import numpy as np
import torch
from datasets import load_dataset
from model import BertForChID

import transformers
from transformers import (
    AutoConfig,
    AutoTokenizer,
    HfArgumentParser,
    Trainer,
    TrainingArguments,
    default_data_collator,
    set_seed,
)
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from transformers.trainer_utils import get_last_checkpoint
logger = logging.getLogger(__name__)

In [3]:
@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
    """

    model_name_or_path: str = field(
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
    )
    config_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
    )
    tokenizer_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
        default=None,
        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
    )
    use_fast_tokenizer: bool = field(
        default=True,
        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
    )
    model_revision: str = field(
        default="main",
        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
    )
    use_auth_token: bool = field(
        default=False,
        metadata={
            "help": (
                "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
                "with private models)."
            )
        },
    )


In [4]:
@dataclass
class DataTrainingArguments:
    """
    Arguments pertaining to what data we are going to input our model for training and eval.
    """

    train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
    validation_file: Optional[str] = field(
        default=None,
        metadata={"help": "An optional input evaluation data file (a text file)."},
    )
    test_file: Optional[str] = field(
        default=None,
        metadata={"help": "An optional input test data file (a text file)."},
    )
    overwrite_cache: bool = field(
        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
    )
    preprocessing_num_workers: Optional[int] = field(
        default=None,
        metadata={"help": "The number of processes to use for the preprocessing."},
    )
    max_seq_length: Optional[int] = field(
        default=None,
        metadata={
            "help": (
                "The maximum total input sequence length after tokenization. If passed, sequences longer "
                "than this will be truncated, sequences shorter will be padded."
            )
        },
    )
    pad_to_max_length: bool = field(
        default=False,
        metadata={
            "help": (
                "Whether to pad all samples to the maximum sentence length. "
                "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
                "efficient on GPU but very bad for TPU."
            )
        },
    )
    max_train_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": (
                "For debugging purposes or quicker training, truncate the number of training examples to this "
                "value if set."
            )
        },
    )
    max_eval_samples: Optional[int] = field(
        default=None,
        metadata={
            "help": (
                "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
                "value if set."
            )
        },
    )

    def __post_init__(self):
        if self.train_file is not None:
            extension = self.train_file.split(".")[-1]
            assert extension in ["csv", "json"], "`train_file` should be a csv or a json file."
        if self.validation_file is not None:
            extension = self.validation_file.split(".")[-1]
            assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."


In [5]:
@dataclass
class DataCollatorForChID:
    """
    Data collator that will dynamically pad the inputs.
    Candidate masks will be computed to indicate which tokens are candidates.

    Args:
        tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]):
            The tokenizer used for encoding the data.
        padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
            among:

            - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single sequence
              if provided).
            - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
              acceptable input length for the model if that argument is not provided.
            - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
              lengths).
        max_length (`int`, *optional*):
            Maximum length of the returned list and optionally padding length (see above).
        pad_to_multiple_of (`int`, *optional*):
            If set will pad the sequence to a multiple of the provided value.

            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
            7.5 (Volta).
    """

    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(self, features):
        label_name = "label" if "label" in features[0].keys() else "labels"
        labels = [feature.pop(label_name) for feature in features]

        batch = self.tokenizer.pad(
            features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )


        # Add back labels
        batch["labels"] = torch.tensor(labels, dtype=torch.int64)
        # Compute candidate masks
        batch["candidate_mask"] = batch["input_ids"] == self.tokenizer.mask_token_id
        return batch


In [6]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
# See all possible arguments in src/transformers/training_args.py
# or by passing the --help flag to this script.
# We now keep distinct sets of args, for a cleaner separation of concerns.

parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath('run.json'))


# Setup logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    handlers=[logging.StreamHandler(sys.stdout)],
)
log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
transformers.utils.logging.set_verbosity(log_level)
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

# Log on each process the small summary:
logger.warning(
    f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
    + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")

# Detecting last checkpoint.
last_checkpoint = None
if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
    last_checkpoint = get_last_checkpoint(training_args.output_dir)
    if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
        raise ValueError(
            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
            "Use --overwrite_output_dir to overcome."
        )
    elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
        logger.info(
            f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
            "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
        )

# Set seed before initializing model.
set_seed(training_args.seed)

11/03/2022 07:07:48 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
_n_gpu=7,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=1,
do_predict=False,
do_train=1,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=True,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False

In [7]:
if data_args.train_file is not None or data_args.validation_file is not None:
    data_files = {}
    if data_args.train_file is not None:
        data_files["train"] = data_args.train_file
    if data_args.validation_file is not None:
        data_files["validation"] = data_args.validation_file
    if data_args.test_file is not None:
        data_files["test"] = data_args.test_file
    extension = data_args.train_file.split(".")[-1]
    raw_datasets = load_dataset(
        extension,
        data_files=data_files,
        cache_dir=model_args.cache_dir,
        use_auth_token=True if model_args.use_auth_token else None,
    )
else:
    # Downloading and loading the chid dataset from the hub. This code is not supposed to be executed in.
    raw_datasets = load_dataset(
        "YuAnthony/chid",
        cache_dir=model_args.cache_dir,
        use_auth_token=True if model_args.use_auth_token else None,
    )

11/03/2022 07:07:59 - INFO - datasets.info - Loading Dataset Infos from /home/huyt/.conda/envs/py37/lib/python3.7/site-packages/datasets/packaged_modules/json
11/03/2022 07:07:59 - INFO - datasets.builder - Overwrite dataset info from restored data version.
11/03/2022 07:07:59 - INFO - datasets.info - Loading Dataset info from /home/huyt/.cache/huggingface/datasets/json/default-0761be92431bbb42/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab
11/03/2022 07:07:59 - INFO - datasets.info - Loading Dataset info from /home/huyt/.cache/huggingface/datasets/json/default-0761be92431bbb42/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab


  0%|          | 0/3 [00:00<?, ?it/s]

In [8]:
config = AutoConfig.from_pretrained(
    model_args.config_name if model_args.config_name else model_args.model_name_or_path,
    cache_dir=model_args.cache_dir,
    revision=model_args.model_revision,
    use_auth_token=True if model_args.use_auth_token else None,
)
tokenizer = AutoTokenizer.from_pretrained(
    model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
    cache_dir=model_args.cache_dir,
    use_fast=model_args.use_fast_tokenizer,
    revision=model_args.model_revision,
    use_auth_token=True if model_args.use_auth_token else None,
)
model = BertForChID.from_pretrained(
    model_args.model_name_or_path,
    from_tf=bool(".ckpt" in model_args.model_name_or_path),
    config=config,
    cache_dir=model_args.cache_dir,
    revision=model_args.model_revision,
    use_auth_token=True if model_args.use_auth_token else None,
)

label_column_name = "labels"
idiom_tag = '#idiom#'

if data_args.max_seq_length is None:
    max_seq_length = tokenizer.model_max_length
    if max_seq_length > 1024:
        logger.warning(
            f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
            "Picking 1024 instead. You can change that default value by passing --max_seq_length xxx."
        )
        max_seq_length = 1024
else:
    if data_args.max_seq_length > tokenizer.model_max_length:
        logger.warning(
            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
        )
    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)


[INFO|configuration_utils.py:648] 2022-11-03 07:08:00,445 >> loading configuration file config.json from cache at /home/huyt/.cache/huggingface/hub/models--hfl--chinese-roberta-wwm-ext/snapshots/5c58d0b8ec1d9014354d691c538661bf00bfdb44/config.json
[INFO|configuration_utils.py:700] 2022-11-03 07:08:00,450 >> Model config BertConfig {
  "_name_or_path": "hfl/chinese-roberta-wwm-ext",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "directionality": "bidi",
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_

In [9]:
def preprocess_function_resize(examples):
    return_dic = {}
    return_dic_keys = ['candidates', 'content', 'synonyms', 'explaination', 'exp embedding', 'labels', 'labels_syn']
    for k in return_dic_keys:
        return_dic[k] = []

    for i in range(len(examples['content'])):
        idx = -1
        text = examples['content'][i]
        for j in range(examples['realCount'][i]):
            return_dic['candidates'].append(examples['candidates'][i][j])
            idx = text.find(idiom_tag, idx+1)
            return_dic['content'].append(text[:idx] + tokenizer.mask_token*4 + text[idx+len(idiom_tag):])
            examples['synonyms'][i][j] = [examples['groundTruth'][i][j]] + examples['synonyms'][i][j]
            random.shuffle(examples['synonyms'][i][j])
            return_dic['synonyms'].append(examples['synonyms'][i][j])
            return_dic['explaination'].append(examples['explaination'][i][j])
            return_dic['exp embedding'].append(examples['exp embedding'][i][j])
            for k, candidate in enumerate(examples['candidates'][i][j]):
                if candidate == examples['groundTruth'][i][j]:
                    return_dic['labels'].append(k)
                    break
            for k, candidate in enumerate(examples['synonyms'][i][j]):
                if candidate == examples['groundTruth'][i][j]:
                    return_dic['labels_syn'].append(k)
                    break
    return return_dic

# tokenize all instances
def preprocess_function_tokenize(examples):
    first_sentences = examples['content']
    labels = examples[label_column_name]
    # truncate the first sentences.
    for i, sentence in enumerate(first_sentences):
        if len(sentence) <= 500:
            continue
        if sentence.find(tokenizer.mask_token*4) > len(sentence) // 2:
            first_sentences[i] = sentence[-500:]
        else:
            first_sentences[i] = sentence[:500]
            
    tokenized_examples = tokenizer(
        first_sentences,
        max_length=max_seq_length,
        padding="max_length" if data_args.pad_to_max_length else False,
        truncation=True,
    )
    tokenized_examples["labels"] = examples['labels']
    tokenized_candidates = [[tokenizer.convert_tokens_to_ids(list(candidate)) for candidate in candidates]for candidates in examples['candidates']]
    tokenized_examples["candidates"] = tokenized_candidates
    
    tokenized_examples["labels_syn"] = examples['labels_syn']
    tokenized_synonyms = [[tokenizer.convert_tokens_to_ids(list(synonym)) for synonym in synonyms]for synonyms in examples['synonyms']]
    tokenized_examples["synonyms"] = tokenized_synonyms
    
    tokenized_examples["position"] = [l.index(tokenizer.mask_token_id) for l in tokenized_examples["input_ids"]]
    return tokenized_examples


In [10]:
if training_args.do_train:
    if "train" not in raw_datasets:
        raise ValueError("--do_train requires a train dataset")
    train_dataset = raw_datasets["train"]
    if data_args.max_train_samples is not None:
        max_train_samples = min(len(train_dataset), data_args.max_train_samples)
        train_dataset = train_dataset.select(range(max_train_samples))
    with training_args.main_process_first(desc="train dataset map pre-processing"):
        train_dataset = train_dataset.map(
            preprocess_function_resize,
            batched=True,
            remove_columns=["groundTruth", "realCount"],
            num_proc=data_args.preprocessing_num_workers,
            load_from_cache_file=not data_args.overwrite_cache,
        )

        train_dataset = train_dataset.map(
            preprocess_function_tokenize,
            batched=True,
            num_proc=data_args.preprocessing_num_workers,
            load_from_cache_file=not data_args.overwrite_cache,
        )
        # for index in range(3):
        #     logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")
if training_args.do_eval:
    if "validation" not in raw_datasets:
        raise ValueError("--do_eval requires a validation dataset")
    eval_dataset = raw_datasets["validation"]
    if data_args.max_eval_samples is not None:
        max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
        eval_dataset = eval_dataset.select(range(max_eval_samples))
    with training_args.main_process_first(desc="validation dataset map pre-processing"):
        eval_dataset = eval_dataset.map(
            preprocess_function_resize,
            batched=True,
            remove_columns=["groundTruth", "realCount"],
            num_proc=data_args.preprocessing_num_workers,
            load_from_cache_file=not data_args.overwrite_cache,
        )
        eval_dataset = eval_dataset.map(
            preprocess_function_tokenize,
            batched=True,
            num_proc=data_args.preprocessing_num_workers,
            load_from_cache_file=not data_args.overwrite_cache,
        )
    test_dataset = raw_datasets["test"]
    with training_args.main_process_first(desc="test dataset map pre-processing"):
        test_dataset = test_dataset.map(
            preprocess_function_resize,
            batched=True,
            remove_columns=["groundTruth", "realCount"],
            num_proc=data_args.preprocessing_num_workers,
            load_from_cache_file=not data_args.overwrite_cache,
        )
        test_dataset = test_dataset.map(
            preprocess_function_tokenize,
            batched=True,
            num_proc=data_args.preprocessing_num_workers,
            load_from_cache_file=not data_args.overwrite_cache,
        )
# Data collator
data_collator = (
    default_data_collator
    if data_args.pad_to_max_length
    else DataCollatorForChID(tokenizer=tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
)

  0%|          | 0/10 [00:00<?, ?ba/s]

11/03/2022 07:08:05 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/huyt/.cache/huggingface/datasets/json/default-0761be92431bbb42/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab/cache-9b0903d659c00ab0.arrow


  0%|          | 0/13 [00:00<?, ?ba/s]

11/03/2022 07:08:14 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/huyt/.cache/huggingface/datasets/json/default-0761be92431bbb42/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab/cache-b7fdcf2b73e00589.arrow


  0%|          | 0/20 [00:00<?, ?ba/s]

11/03/2022 07:08:26 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/huyt/.cache/huggingface/datasets/json/default-0761be92431bbb42/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab/cache-d4b79f39d41f73e0.arrow


  0%|          | 0/25 [00:00<?, ?ba/s]

11/03/2022 07:08:45 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/huyt/.cache/huggingface/datasets/json/default-0761be92431bbb42/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab/cache-e804ef7c5083736f.arrow


  0%|          | 0/20 [00:00<?, ?ba/s]

11/03/2022 07:09:09 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/huyt/.cache/huggingface/datasets/json/default-0761be92431bbb42/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab/cache-a6eff548050c5846.arrow


  0%|          | 0/25 [00:00<?, ?ba/s]

11/03/2022 07:09:28 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/huyt/.cache/huggingface/datasets/json/default-0761be92431bbb42/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab/cache-2969b78058e18dc9.arrow


In [12]:
def compute_metrics(eval_predictions):
    predictions, label_ids = eval_predictions
    preds = np.argmax(predictions, axis=1)
    return {"accuracy": (preds == label_ids).astype(np.float32).mean().item()}
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset if training_args.do_train else None,
    eval_dataset=eval_dataset if training_args.do_eval else None,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [13]:
if training_args.do_train:
    checkpoint = None
    if training_args.resume_from_checkpoint is not None:
        checkpoint = training_args.resume_from_checkpoint
    elif last_checkpoint is not None:
        checkpoint = last_checkpoint
    train_result = trainer.train(resume_from_checkpoint=checkpoint)
    trainer.save_model()  # Saves the tokenizer too for easy upload
    metrics = train_result.metrics

    max_train_samples = (
        data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
    )
    metrics["train_samples"] = min(max_train_samples, len(train_dataset))

    trainer.log_metrics("train", metrics)
    trainer.save_metrics("train", metrics)
    trainer.save_state()


[INFO|trainer.py:745] 2022-11-03 07:17:48,602 >> The following columns in the training set don't have a corresponding argument in `BertForChID.forward` and have been ignored: explaination, labels_syn, synonyms, position, content, exp embedding. If explaination, labels_syn, synonyms, position, content, exp embedding are not expected by `BertForChID.forward`,  you can safely ignore this message.
[INFO|trainer.py:1628] 2022-11-03 07:17:48,643 >> ***** Running training *****
[INFO|trainer.py:1629] 2022-11-03 07:17:48,644 >>   Num examples = 12462
[INFO|trainer.py:1630] 2022-11-03 07:17:48,644 >>   Num Epochs = 5
[INFO|trainer.py:1631] 2022-11-03 07:17:48,645 >>   Instantaneous batch size per device = 32
[INFO|trainer.py:1632] 2022-11-03 07:17:48,646 >>   Total train batch size (w. parallel, distributed & accumulation) = 224
[INFO|trainer.py:1633] 2022-11-03 07:17:48,646 >>   Gradient Accumulation steps = 1
[INFO|trainer.py:1634] 2022-11-03 07:17:48,647 >>   Total optimization steps = 280
[

11/03/2022 07:17:49 - ERROR - wandb.jupyter - Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
Traceback (most recent call last):
  File "/home/huyt/.conda/envs/py37/lib/python3.7/site-packages/wandb/sdk/wandb_init.py", line 759, in init
    wi.setup(kwargs)
  File "/home/huyt/.conda/envs/py37/lib/python3.7/site-packages/wandb/sdk/wandb_init.py", line 156, in setup
  File "/home/huyt/.conda/envs/py37/lib/python3.7/site-packages/wandb/sdk/wandb_login.py", line 238, in _login
    wlogin.prompt_api_key()
  File "/home/huyt/.conda/envs/py37/lib/python3.7/site-packages/wandb/sdk/wandb_login.py", line 166, in prompt_api_key
    no_create=self._settings.force,
  File "/home/huyt/.conda/envs/py37/lib/python3.7/site-packages/wandb/sdk/lib/apikey.py", line 121, in prompt_api_key
    write_key(settings, key, api=api)
  File "/home/huyt/.conda/envs/py37/lib/python3.7/site-packages/wandb/sdk/lib/apikey.py", line 206, in write_key
    raise ValueError("API key must be 40 characters long, yours was %

Exception: problem

In [None]:
if training_args.do_eval:
    logger.info("*** Evaluate ***")

    metrics = trainer.evaluate(eval_dataset=test_dataset)
    metrics["test_samples"] = len(test_dataset)

    trainer.log_metrics("test", metrics)
    trainer.save_metrics("test", metrics)