In [2]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
from transformers import DataCollatorForTokenClassification
from transformers import AutoTokenizer
from datasets import load_metric
import pickle
import json
import numpy as np

### Read lable map and create a reverese label map

In [3]:
# read label_maps
with open(f"label_maps.json", 'r') as f:
    label2id = json.load(f)
print(label2id)

{'O': 0, 'B-geo': 1, 'B-org': 2, 'B-tim': 3, 'I-per': 4, 'I-org': 5, 'B-per': 6, 'B-gpe': 7, 'I-geo': 8, 'I-tim': 9, 'B-art': 10, 'B-eve': 11, 'I-eve': 12, 'I-art': 13, 'I-gpe': 14, 'B-nat': 15, 'I-nat': 16}


In [4]:
# creating reverse lable_maps
id2label = {}
for k in label2id.keys():
    id2label[label2id[k]]=k
print(id2label)

{0: 'O', 1: 'B-geo', 2: 'B-org', 3: 'B-tim', 4: 'I-per', 5: 'I-org', 6: 'B-per', 7: 'B-gpe', 8: 'I-geo', 9: 'I-tim', 10: 'B-art', 11: 'B-eve', 12: 'I-eve', 13: 'I-art', 14: 'I-gpe', 15: 'B-nat', 16: 'I-nat'}


In [5]:
label_list = list(label2id.keys())
print(label_list)

['O', 'B-geo', 'B-org', 'B-tim', 'I-per', 'I-org', 'B-per', 'B-gpe', 'I-geo', 'I-tim', 'B-art', 'B-eve', 'I-eve', 'I-art', 'I-gpe', 'B-nat', 'I-nat']


### Read tokenized train and test data

In [6]:
# load tokenized training data
with open('../data/huggingface/train_tokenized.pkl', 'rb') as file:
    train_tokenized = pickle.load(file)

train_tokenized

Dataset({
    features: ['id', 'ner_tags', 'tokens', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 16000
})

In [7]:
# load tokenized test data
with open('../data/huggingface/test_tokenized.pkl', 'rb') as file:
    test_tokenized = pickle.load(file)

test_tokenized

Dataset({
    features: ['id', 'ner_tags', 'tokens', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 2000
})

### Create the tokenizer that we used earlier

In [8]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

### Make evaluation function

In [9]:

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [[label_list[p] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    true_labels = [[label_list[l] for (p, l) in zip(prediction, label) if l != -100] for prediction, label in zip(predictions, labels)]
    metric = load_metric("seqeval")
    results = metric.compute(predictions=true_predictions, references=true_labels)
    return {"precision": results["overall_precision"], "recall": results["overall_recall"], "f1": results["overall_f1"], "accuracy": results["overall_accuracy"]}
    


In [10]:
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [12]:
print(dir(AutoModelForTokenClassification))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_model_mapping', 'from_config', 'from_pretrained', 'register']


### Model compiling and training

In [17]:
model = AutoModelForTokenClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=len(id2label), id2label=id2label, label2id=label2id
)

training_args = TrainingArguments(
    output_dir=r"E:\Work\Data_Science\Projects\Custom_NER\models\huggingface",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=test_tokenized,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()



Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForTokenClassification: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN t

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,0.1349,0.118816,0.790999,0.801098,0.796016,0.963754
2,0.1037,0.114329,0.798743,0.803603,0.801166,0.964491


  metric = load_metric("seqeval")
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


TrainOutput(global_step=4000, training_loss=0.1434553918838501, metrics={'train_runtime': 781.5516, 'train_samples_per_second': 40.944, 'train_steps_per_second': 5.118, 'total_flos': 600305536007904.0, 'train_loss': 0.1434553918838501, 'epoch': 2.0})

### Basic evaluation

In [11]:
trainer.evaluate()

{'eval_loss': 0.1208786740899086,
 'eval_precision': 0.7905628197839681,
 'eval_recall': 0.8068933503539515,
 'eval_f1': 0.7986446129106364,
 'eval_accuracy': 0.964006240092595,
 'eval_runtime': 13.1964,
 'eval_samples_per_second': 151.556,
 'eval_steps_per_second': 18.945,
 'epoch': 2.0}

### Save the model

In [14]:
trainer.save_model(r'E:\Work\Data_Science\Projects\Custom_NER\models\huggingface\final.model')