In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [35]:
from datasets.filesystems import S3FileSystem
from datasets import DatasetDict, load_metric
import torch
import transformers as tr
from tqdm import tqdm
from pathlib import Path
import shutil

In [3]:
s3fs = S3FileSystem()

In [4]:
dataset = DatasetDict.load_from_disk("s3://traal-storage/datasets/conll2003", fs=s3fs)

In [5]:
id2label = ['O', 'B-LOC', 'B-MISC', 'B-ORG', 'B-PER', 'I-LOC', 'I-MISC', 'I-ORG', 'I-PER']
label2id = {l:i for i, l in enumerate(id2label)}

In [6]:
def model_init():
    return tr.AutoModelForTokenClassification.from_pretrained("distilbert-base-cased", num_labels=len(id2label))

In [7]:
def convert_label_to_ids(item):
    return {
        'ner': [label2id[x] for x in item['ner']]
    }

In [8]:
dataset = dataset.map(convert_label_to_ids, batched=False, num_proc=2)

    

#0:   0%|          | 0/7021 [00:00<?, ?ex/s]

#1:   0%|          | 0/7021 [00:00<?, ?ex/s]

    

#0:   0%|          | 0/1626 [00:00<?, ?ex/s]

#1:   0%|          | 0/1625 [00:00<?, ?ex/s]

    

#1:   0%|          | 0/1727 [00:00<?, ?ex/s]

#0:   0%|          | 0/1727 [00:00<?, ?ex/s]

In [9]:
tokenizer = tr.AutoTokenizer.from_pretrained("distilbert-base-cased")

In [10]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples[f"ner"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

In [11]:
dataset = dataset.map(tokenize_and_align_labels, batched=True, batch_size=128, num_proc=2)

    

#0:   0%|          | 0/55 [00:00<?, ?ba/s]

#1:   0%|          | 0/55 [00:00<?, ?ba/s]

    

#0:   0%|          | 0/13 [00:00<?, ?ba/s]

#1:   0%|          | 0/13 [00:00<?, ?ba/s]

    

#0:   0%|          | 0/14 [00:00<?, ?ba/s]

#1:   0%|          | 0/14 [00:00<?, ?ba/s]

In [12]:
data_collator = tr.DataCollatorForTokenClassification(tokenizer=tokenizer, pad_to_multiple_of=8)

In [13]:
metric = load_metric("seqeval")
import numpy as np

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    # Remove ignored index (special tokens)
    true_predictions = [
        [id2label[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [id2label[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

In [14]:
from uuid import uuid4

def create_experiment_id() -> str:
    base_uuid = str(uuid4()).split("-")[-1]
    return base_uuid

In [15]:
from datetime import datetime

def get_time_for_saving():
    current_datatime = datetime.now()
    return current_datatime.strftime("%m-%d-%a-%H-%M")

In [21]:
experiment_id = create_experiment_id()
experiment_time = get_time_for_saving()
experiment_type = "conll2003"
experiment_seed = 42
output_dir = Path(f"../experiments/{experiment_type}/{experiment_seed}/{experiment_time}-{experiment_id}")
hp_search_dir = output_dir / "hp_search"

training_args = tr.TrainingArguments(
    output_dir=hp_search_dir,
    evaluation_strategy="epoch",
    save_strategy='epoch',
    disable_tqdm=False,
    group_by_length=True,
    seed=experiment_seed,
    fp16=True, fp16_opt_level="O2",
    metric_for_best_model='eval_f1',
    load_best_model_at_end=True
)
trainer = tr.Trainer(
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    data_collator=data_collator,
    model_init=model_init,
    compute_metrics=compute_metrics,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/distilbert-base-cased/resolve/main/config.json from cache at /home/rexhaif/.cache/huggingface/transformers/ebe1ea24d11aa664488b8de5b21e33989008ca78f207d4e30ec6350b693f073f.302bfd1b5e031cc1b17796e0b6e5b242ba2045d31d00f97589e12b458ebff27a
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-cased",
  "activation": "gelu",
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8"
  },
  "initializer_range": 0.02,
  "lab

In [22]:
import optuna

def custom_search_objective(trial: optuna.Trial):
    possible_batch_sizes = [4, 8, 16, 32, 64]
    possible_batch_sizes = [x for x in possible_batch_sizes if (len(dataset['train']) / x) >= 20.0]
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
        "num_train_epochs": trial.suggest_int("num_train_epochs", 1, 5),
        "warmup_ratio": trial.suggest_float("warmup_ratio", 0.0, 0.3, step=0.01),
        "weight_decay": trial.suggest_float("weight_decay", 0.0, 0.1, step=0.01),
        "lr_scheduler_type": trial.suggest_categorical("lr_scheduler_type", ['linear', 'constant', 'cosine']),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", possible_batch_sizes)
    }

In [23]:
def compute_objective(metrics):
    return metrics['eval_f1']

In [24]:
best_trial = trainer.hyperparameter_search(
    hp_space=custom_search_objective,
    compute_objective=compute_objective,
    direction="maximize",
    backend="optuna",
    n_trials=10 # number of trials
)

[32m[I 2022-05-15 18:10:26,120][0m A new study created in memory with name: no-name-5e007dc4-23d2-467f-9576-ddf5b36f17fc[0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-cased/resolve/main/config.json from cache at /home/rexhaif/.cache/huggingface/transformers/ebe1ea24d11aa664488b8de5b21e33989008ca78f207d4e30ec6350b693f073f.302bfd1b5e031cc1b17796e0b6e5b242ba2045d31d00f97589e12b458ebff27a
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-cased",
  "activation": "gelu",
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "LABEL_7": 7,
    "LAB

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.1661,0.103742,0.837657,0.842309,0.839977,0.973093
2,0.064,0.064461,0.903677,0.901548,0.902612,0.983276
3,0.0401,0.058844,0.906389,0.928812,0.917463,0.986254
4,0.0266,0.05496,0.92286,0.934197,0.928494,0.987948
5,0.0205,0.054974,0.926253,0.936385,0.931291,0.988299


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner. If tokens, ner are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3251
  Batch size = 8
Saving model checkpoint to ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-0/checkpoint-1756
Configuration saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-0/checkpoint-1756/config.json
Model weights saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-0/checkpoint-1756/pytorch_model.bin
tokenizer config file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-0/checkpoint-1756/tokenizer_config.json
Special tokens file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-0/checkpoint-1756/special_tokens_map.jso

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.2173,0.161353,0.740152,0.752609,0.746328,0.958802
2,0.1081,0.106741,0.819864,0.834904,0.827316,0.971847
3,0.0882,0.086669,0.849926,0.869236,0.859473,0.976656
4,0.0708,0.073793,0.877903,0.890609,0.884211,0.980569
5,0.0589,0.0686,0.89195,0.904409,0.898137,0.98238


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner. If tokens, ner are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3251
  Batch size = 8
Saving model checkpoint to ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-1/checkpoint-1756
Configuration saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-1/checkpoint-1756/config.json
Model weights saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-1/checkpoint-1756/pytorch_model.bin
tokenizer config file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-1/checkpoint-1756/tokenizer_config.json
Special tokens file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-1/checkpoint-1756/special_tokens_map.jso

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.2784,0.175528,0.711133,0.721306,0.716183,0.954499
2,0.1131,0.111091,0.819967,0.82935,0.824632,0.971049
3,0.0965,0.104278,0.826603,0.843992,0.835207,0.972801


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner. If tokens, ner are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3251
  Batch size = 8
Saving model checkpoint to ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-2/checkpoint-1756
Configuration saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-2/checkpoint-1756/config.json
Model weights saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-2/checkpoint-1756/pytorch_model.bin
tokenizer config file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-2/checkpoint-1756/tokenizer_config.json
Special tokens file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-2/checkpoint-1756/special_tokens_map.jso

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.3087,0.181854,0.714668,0.718277,0.716468,0.953292
2,0.1003,0.098956,0.83908,0.847694,0.843365,0.974125
3,0.0793,0.078841,0.863651,0.880512,0.872,0.978759
4,0.0654,0.072574,0.879682,0.893302,0.88644,0.980959
5,0.0595,0.071537,0.881953,0.893975,0.887923,0.981134


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner. If tokens, ner are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3251
  Batch size = 8
Saving model checkpoint to ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-3/checkpoint-1756
Configuration saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-3/checkpoint-1756/config.json
Model weights saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-3/checkpoint-1756/pytorch_model.bin
tokenizer config file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-3/checkpoint-1756/tokenizer_config.json
Special tokens file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-3/checkpoint-1756/special_tokens_map.jso

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.2238,0.192854,0.683606,0.710872,0.696972,0.95045


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner. If tokens, ner are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3251
  Batch size = 8
Saving model checkpoint to ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-4/checkpoint-1756
Configuration saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-4/checkpoint-1756/config.json
Model weights saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-4/checkpoint-1756/pytorch_model.bin
tokenizer config file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-4/checkpoint-1756/tokenizer_config.json
Special tokens file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-4/checkpoint-1756/special_tokens_map.jso

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.0719,0.070164,0.9278,0.910468,0.919052,0.984677
2,0.0229,0.053602,0.927142,0.93588,0.931491,0.988318


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner. If tokens, ner are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3251
  Batch size = 8
Saving model checkpoint to ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-5/checkpoint-1756
Configuration saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-5/checkpoint-1756/config.json
Model weights saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-5/checkpoint-1756/pytorch_model.bin
tokenizer config file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-5/checkpoint-1756/tokenizer_config.json
Special tokens file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-5/checkpoint-1756/special_tokens_map.jso

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.2739,0.205864,0.653482,0.672837,0.663018,0.945641


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner. If tokens, ner are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3251
  Batch size = 8
[32m[I 2022-05-15 18:29:19,774][0m Trial 6 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-cased/resolve/main/config.json from cache at /home/rexhaif/.cache/huggingface/transformers/ebe1ea24d11aa664488b8de5b21e33989008ca78f207d4e30ec6350b693f073f.302bfd1b5e031cc1b17796e0b6e5b242ba2045d31d00f97589e12b458ebff27a
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-cased",
  "activation": "gelu",
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
  

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.0974,0.078398,0.906212,0.886234,0.896112,0.980472
2,0.0475,0.06145,0.911562,0.91417,0.912864,0.984619


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner. If tokens, ner are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3251
  Batch size = 8
Saving model checkpoint to ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-7/checkpoint-1756
Configuration saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-7/checkpoint-1756/config.json
Model weights saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-7/checkpoint-1756/pytorch_model.bin
tokenizer config file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-7/checkpoint-1756/tokenizer_config.json
Special tokens file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-7/checkpoint-1756/special_tokens_map.jso

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.1037,0.087486,0.894429,0.875463,0.884844,0.97909


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner. If tokens, ner are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3251
  Batch size = 8
Saving model checkpoint to ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-8/checkpoint-1756
Configuration saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-8/checkpoint-1756/config.json
Model weights saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-8/checkpoint-1756/pytorch_model.bin
tokenizer config file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-8/checkpoint-1756/tokenizer_config.json
Special tokens file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-8/checkpoint-1756/special_tokens_map.jso

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.0743,0.070074,0.934794,0.904746,0.919525,0.983743


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner. If tokens, ner are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3251
  Batch size = 8
Saving model checkpoint to ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-9/checkpoint-1756
Configuration saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-9/checkpoint-1756/config.json
Model weights saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-9/checkpoint-1756/pytorch_model.bin
tokenizer config file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-9/checkpoint-1756/tokenizer_config.json
Special tokens file saved in ../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/hp_search/run-9/checkpoint-1756/special_tokens_map.jso

In [24]:
best_trial

BestRun(run_id='7', objective=0.938149513912169, hyperparameters={'learning_rate': 2.1586422382993534e-05, 'num_train_epochs': 3, 'warmup_ratio': 0.18, 'weight_decay': 0.0, 'per_device_train_batch_size': 64})

In [25]:
def get_checkpoint_dir(base_dir: str, best_trial: tr.trainer_utils.BestRun):
    best_run_dir = Path(base_dir, f"run-{best_trial.run_id}")
    max_step = -1
    max_step_path = None
    for dir in best_run_dir.iterdir():
        step_n = int(dir.parts[-1].split("-")[1])
        if step_n > max_step:
            max_step = step_n
            max_step_path = dir

    return max_step_path


In [26]:
model_dir = get_checkpoint_dir(hp_search_dir, best_trial)

In [27]:
save_dir = Path(output_dir) / "models" / "full"

In [28]:
save_dir.mkdir(parents=True, exist_ok=True)

In [29]:
model_dir.replace(save_dir)

PosixPath('../experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/models/full')

In [36]:
shutil.rmtree(hp_search_dir)

In [32]:
!ls /home/rexhaif/traal/experiments/conll2003/42/05-15-Sun-18-10-7bfd6675c8d0/models/full

config.json	   rng_state.pth  special_tokens_map.json  trainer_state.json
optimizer.pt	   scaler.pt	  tokenizer_config.json    training_args.bin
pytorch_model.bin  scheduler.pt   tokenizer.json	   vocab.txt
