In [1]:
from transformers import (DataCollatorForTokenClassification,
                          BertTokenizer, BertForTokenClassification)
from transformers import Trainer, TrainingArguments
from datasets import Dataset, DatasetDict
pretrained_model = "./Model_01.01/04/model/"
dataset = DatasetDict().load_from_disk('./built_datasets/peykareh.01')
model = BertForTokenClassification.from_pretrained(pretrained_model, num_labels=3)
print(model)
print(dataset)

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, e

In [2]:

trainer = Trainer(model=model,
                  args=TrainingArguments(output_dir='./Model/results'),
                  train_dataset=dataset['train'],
                  eval_dataset=dataset["test"])
print(dataset["test"])
print(dataset["test"][0])
print(type(dataset["test"]))
print(type(dataset["test"][0]))


Dataset({
    features: ['input_ids', 'labels', 'attention_mask'],
    num_rows: 7623
})
{'input_ids': [478, 459, 461, 451, 515, 477, 476, 451, 478, 498, 461, 472, 515, 466, 479, 452, 461, 505, 454, 451, 462, 458, 459, 451, 479, 477, 459, 470, 472, 479, 461, 476, 477, 451, 477, 479, 463, 475, 451, 476, 454, 515, 451, 477, 458, 451, 477, 479, 451, 459, 478, 476, 457, 454, 461, 476, 461, 451, 458, 479, 451, 478, 451, 477, 515, 476, 119, 102, 101, 475, 451, 515, 452, 477, 515, 454, 463, 476, 469, 454, 473, 459, 452, 479, 459, 505, 478, 478, 461, 507, 479, 477, 478, 469, 473, 475, 451, 477, 515, 454, 451, 462, 477, 515, 461, 479, 515, 454, 479, 451, 476, 451, 477, 472, 461, 459, 515, 479, 467, 452, 515, 469, 454, 463, 461, 499, 464, 476, 478, 476, 515, 507, 515, 461, 459, 448, 196, 476, 477, 183, 451, 477, 463, 451, 477, 515, 459, 461, 451, 515, 477, 451, 476, 461, 451, 478, 476, 515, 454, 458, 451, 465, 459, 451, 461, 459, 446, 452, 515, 451, 477, 505, 478, 505, 475, 515, 454, 459, 451, 4

In [4]:
predictions = trainer.predict(dataset["test"])
print(predictions)
predicted_labels = predictions.predictions.argmax(axis=-1)
print(predicted_labels)

ValueError: dictionary update sequence element #0 has length 512; 2 is required

In [10]:
print(dataset['test'])

Dataset({
    features: ['input_ids', 'labels', 'attention_mask'],
    num_rows: 7623
})


In [5]:
from utils.label_evaluator import Evaluator
from corpus import Type
evaluator = Evaluator(labels=(0,1,2))
evaluator.evaluate(dataset['test']['labels'],predicted_labels,Type.sents_raw)

In [6]:
evaluator.show_metrics()

╒═════════╤═════════════╤══════════╤════════════╤════════════╕
│ Label   │   Precision │   Recall │   Accuracy │   F1 Score │
╞═════════╪═════════════╪══════════╪════════════╪════════════╡
│ 0       │    0.996294 │ 0.996555 │   0.996555 │   0.996425 │
├─────────┼─────────────┼──────────┼────────────┼────────────┤
│ 1       │    0.987987 │ 0.986389 │   0.986389 │   0.987187 │
├─────────┼─────────────┼──────────┼────────────┼────────────┤
│ 2       │    0.936259 │ 0.945634 │   0.945634 │   0.940923 │
├─────────┼─────────────┼──────────┼────────────┼────────────┤
│ Average │    0.973513 │ 0.976193 │   0.976193 │   0.974845 │
╘═════════╧═════════════╧══════════╧════════════╧════════════╛


In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

predictions_flat = [label for sample in predicted_labels for label in sample]
true_labels_flat = [label for sample in dataset['test']['labels'] for label in sample]

# Calculate the accuracy, precision, recall, and F1 score
accuracy = accuracy_score(true_labels_flat, predictions_flat)
precision = precision_score(true_labels_flat, predictions_flat, average='macro')
recall = recall_score(true_labels_flat, predictions_flat, average='macro')
f1 = f1_score(true_labels_flat, predictions_flat, average='macro')

# Print the results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 score: {f1:.2f}")


Accuracy: 0.99
Precision: 0.97
Recall: 0.98
F1 score: 0.97


In [10]:
report = classification_report(true_labels_flat, predictions_flat,digits=5)

# Print the report to the console
print(report)

              precision    recall  f1-score   support

           0    0.99629   0.99656   0.99642   2923715
           1    0.98799   0.98639   0.98719    909052
           2    0.93626   0.94563   0.94092     70209

    accuracy                        0.99327   3902976
   macro avg    0.97351   0.97619   0.97485   3902976
weighted avg    0.99328   0.99327   0.99327   3902976
