In [None]:
!pip install optuna datasets transformers[torch] seqeval

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downl

In [None]:
import optuna
from transformers import TrainingArguments, Trainer, AutoModelForTokenClassification, DataCollatorForTokenClassification, EarlyStoppingCallback, AutoTokenizer
import datasets
import numpy as np
from datasets import load_metric, load_from_disk

In [None]:
dataset = load_from_disk("drive/MyDrive/ner_dataset_conll_format")
label_list = dataset["train"].features["ner_tags"].feature.names

In [None]:
tokenizer = AutoTokenizer.from_pretrained("dbmdz/convbert-base-turkish-cased")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/83.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/251k [00:00<?, ?B/s]

In [None]:
def tokenize_and_align_labels(examples, label_all_tokens=True):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
    labels = []
    for i, label in enumerate(examples["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            else:
                label_ids.append(label[word_idx] if label_all_tokens else -100)
            previous_word_idx = word_idx
        labels.append(label_ids)
    tokenized_inputs["labels"] = labels
    return tokenized_inputs

In [None]:
tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=True)

In [None]:
metric = datasets.load_metric("seqeval")

The repository for seqeval contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/seqeval.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


In [None]:
def compute_metrics(eval_preds):
    pred_logits, labels = eval_preds
    pred_logits = np.argmax(pred_logits, axis=2)
    predictions = [
        [label_list[eval_preds] for (eval_preds, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(pred_logits, labels)
    ]
    true_labels = [
        [label_list[l] for (eval_preds, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(pred_logits, labels)
    ]
    results = metric.compute(predictions=predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

In [None]:
def objective(trial):
    model_name = "dbmdz/convbert-base-turkish-cased"
    model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=len(label_list), ignore_mismatched_sizes=True)

    # Define hyperparameters to tune
    args = TrainingArguments(
        output_dir="test-ner",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=3,
        learning_rate=trial.suggest_loguniform("learning_rate", 1e-6, 1e-4),
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        num_train_epochs=40,
        weight_decay=0.01,
        #weight_decay=trial.suggest_loguniform("weight_decay", 1e-5, 1e-3),
        #warmup_steps=trial.suggest_categorical("warmup_steps", [100, 200 ,300]),
        #gradient_accumulation_steps=trial.suggest_int("gradient_accumulation_steps", 1, 5),
        load_best_model_at_end=True,
    )

    data_collator = DataCollatorForTokenClassification(tokenizer)

    trainer = Trainer(
        model,
        args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["validation"],
        data_collator=data_collator,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=10)],
    )
    trainer.train()

    eval_result = trainer.evaluate(eval_dataset=tokenized_datasets["validation"])
    return eval_result["eval_f1"]

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

[I 2024-06-18 12:38:15,195] A new study created in memory with name: no-name-a1ea80b6-f948-4d39-a3a0-e023b20f4ae8
Some weights of ConvBertForTokenClassification were not initialized from the model checkpoint at dbmdz/convbert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  learning_rate=trial.suggest_loguniform("learning_rate", 1e-6, 1e-4),


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,1.215416,0.031898,0.027174,0.029347,0.595847
2,No log,0.73174,0.497019,0.453125,0.474058,0.764956
3,No log,0.571345,0.601513,0.648098,0.623937,0.808162
4,No log,0.505153,0.638923,0.709239,0.672247,0.823958
5,No log,0.481942,0.684597,0.76087,0.720721,0.835144
6,No log,0.479012,0.689106,0.764946,0.725048,0.839147
7,No log,0.46368,0.709168,0.793478,0.748958,0.841827
8,No log,0.448145,0.714024,0.795516,0.752571,0.844722
9,No log,0.422259,0.728373,0.817935,0.77056,0.853513
10,No log,0.442609,0.721183,0.811821,0.763822,0.850368


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


[I 2024-06-18 12:42:01,521] Trial 0 finished with value: 0.776463063639271 and parameters: {'learning_rate': 8.418101794039093e-06}. Best is trial 0 with value: 0.776463063639271.
Some weights of ConvBertForTokenClassification were not initialized from the model checkpoint at dbmdz/convbert-base-turkish-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  learning_rate=trial.suggest_loguniform("learning_rate", 1e-6, 1e-4),


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,1.396223,0.09315,0.19769,0.126632,0.572654
2,No log,0.977963,0.427995,0.432065,0.43002,0.673826
3,No log,0.702333,0.534357,0.496603,0.514789,0.772497
4,No log,0.578958,0.601673,0.63519,0.617978,0.807233


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
[W 2024-06-18 12:42:50,036] Trial 1 failed with parameters: {'learning_rate': 5.794204499954747e-06} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-14-9a334b40c98a>", line 34, in objective
    trainer.train()
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1885, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2221, in _inner_training_loop
    and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
KeyboardInterrupt
[W 2024-06-18 12:42:50,037] Trial 1 failed with value None.


KeyboardInterrupt: 

In [None]:
best_trial = study.best_trial
best_params = best_trial.params
print(f"Best F1 Score: {best_trial.value}")
print(f"Best Params: {best_params}")

Best F1 Score: 0.8077046548956661
Best Params: {'learning_rate': 4.6350348759130274e-05, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 32, 'weight_decay': 1.1480139123018277e-05, 'warmup_steps': 300, 'gradient_accumulation_steps': 4}


In [None]:
best_model = best_trial.user_attrs["model"]
best_model.save_pretrained("best_ner_model")
tokenizer.save_pretrained("best_tokenizer")

KeyError: 'model'