Libraries 

In [1]:
import warnings
warnings.filterwarnings("ignore")

import torch
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertTokenizer, BertForTokenClassification, AdamW
import numpy as np
import os
from transformers import BertTokenizerFast
from tqdm import tqdm
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

Configuration

In [2]:
warnings.filterwarnings("ignore")

MAX_LEN = 128
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_EPOCHS = 5
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LANGUAGE_CODE = 'fr'

tokenizer = BertTokenizerFast.from_pretrained('bert-base-multilingual-cased')

Function Definitions

In [3]:
def load_data(file_path):
    tokens, labels = [], []
    with open(file_path, 'r', encoding='utf-8') as file:
        current_tokens, current_labels = [], []
        for line in file:
            if line.strip() == "":
                if current_tokens:
                    tokens.append(current_tokens)
                    labels.append(current_labels)
                    current_tokens, current_labels = [], []
            else:
                parts = line.strip().split()
                current_tokens.append(parts[0])
                current_labels.append(parts[-1])
        if current_tokens:
            tokens.append(current_tokens)
            labels.append(current_labels)
    return tokens, labels

def encode_tags(tags, encodings):
    labels = [[tag2id[tag] for tag in doc] for doc in tags]
    encoded_labels = []

    for doc_labels, doc_offset in zip(labels, encodings.offset_mapping):

        doc_enc_labels = np.ones(len(doc_offset), dtype=int) * -100

        label_position = 0
        for i, (start, end) in enumerate(doc_offset):
            if start != 0:
                continue

            if label_position >= len(doc_labels):
                continue
            doc_enc_labels[i] = doc_labels[label_position]
            
        encoded_labels.append(doc_enc_labels.tolist())

    return encoded_labels

def create_dataset(token_lists, tag_lists):
    encodings = tokenizer(token_lists, is_split_into_words=True, return_offsets_mapping=True, padding=True, truncation=True, max_length=MAX_LEN)
    tags = encode_tags(tag_lists, encodings)
    input_ids = torch.tensor(encodings['input_ids'])
    attention_masks = torch.tensor(encodings['attention_mask'])
    labels = torch.tensor(tags)
    return TensorDataset(input_ids, attention_masks, labels)

Loading Data and Model Preparation

In [4]:
warnings.filterwarnings("ignore")

train_path = f'E:\\MultiCoNER_2_train_dev\\train_dev\\{LANGUAGE_CODE}-train.conll'
dev_path = f'E:\\MultiCoNER_2_train_dev\\train_dev\\{LANGUAGE_CODE}-dev.conll'

train_tokens, train_tags = load_data(train_path)
dev_tokens, dev_tags = load_data(dev_path)

tag2id = {tag: idx for idx, tag in enumerate(np.unique([tag for sublist in train_tags for tag in sublist]))}

train_dataset = create_dataset(train_tokens, train_tags)
dev_dataset = create_dataset(dev_tokens, dev_tags)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=False)

model = BertForTokenClassification.from_pretrained('bert-base-multilingual-cased', num_labels=len(tag2id))
model.to(DEVICE)

optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train and Evaluate

In [5]:
# train
model.train()
train_losses = []
train_predictions = []
train_true_labels = []

for epoch in range(NUM_EPOCHS):
    loop = tqdm(train_loader, leave=True)
    for batch in loop:
        batch = tuple(item.to(DEVICE) for item in batch)
        inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}
        
        outputs = model(**inputs)
        loss = outputs.loss
        logits = outputs.logits
        
        train_losses.append(loss.item())
        predictions = torch.argmax(logits, dim=2)
        
        # Remove ignored index (special tokens)
        active_accuracy = inputs['attention_mask'].view(-1) == 1
        labels = inputs['labels'].view(-1)
        predictions = torch.masked_select(predictions.view(-1), active_accuracy)
        labels = torch.masked_select(labels, active_accuracy)

        train_predictions.extend(predictions.cpu().numpy())
        train_true_labels.extend(labels.cpu().numpy())

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        loop.set_description(f"Epoch {epoch + 1}")
        loop.set_postfix(loss=loss.item())

# evaluate
model.eval()
eval_losses = []
eval_predictions = []
eval_true_labels = []

with torch.no_grad():
    for batch in tqdm(dev_loader, desc="Evaluating"):
        batch = tuple(item.to(DEVICE) for item in batch)
        inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}
        
        outputs = model(**inputs)
        logits = outputs.logits
        loss = outputs.loss
        
        eval_losses.append(loss.item())
        predictions = torch.argmax(logits, dim=2)

        # Remove ignored index (special tokens)
        active_accuracy = inputs['attention_mask'].view(-1) == 1
        labels = inputs['labels'].view(-1)
        predictions = torch.masked_select(predictions.view(-1), active_accuracy)
        labels = torch.masked_select(labels, active_accuracy)

        eval_predictions.extend(predictions.cpu().numpy())
        eval_true_labels.extend(labels.cpu().numpy())

print(f"Average training loss: {np.mean(train_losses)}")
print(f"Average evaluation loss: {np.mean(eval_losses)}")

Epoch 1: 100%|██████████| 518/518 [01:50<00:00,  4.68it/s, loss=0.00189]
Epoch 2: 100%|██████████| 518/518 [01:48<00:00,  4.77it/s, loss=0.000982]
Epoch 3: 100%|██████████| 518/518 [01:48<00:00,  4.76it/s, loss=0.000557]
Epoch 4: 100%|██████████| 518/518 [01:49<00:00,  4.75it/s, loss=0.000336]
Epoch 5: 100%|██████████| 518/518 [01:50<00:00,  4.70it/s, loss=0.000221]
Evaluating: 100%|██████████| 27/27 [00:01<00:00, 21.48it/s]

Average training loss: 0.009484322288596794
Average evaluation loss: 0.0001430926355102134





In [6]:
train_acc = accuracy_score(train_true_labels, train_predictions)
train_precision, train_recall, train_f1, _ = precision_recall_fscore_support(train_true_labels, train_predictions, average='weighted')

print(f"Training Accuracy: {train_acc}")
print(f"Training Precision: {train_precision}")
print(f"Training Recall: {train_recall}")
print(f"Training F1-Score: {train_f1}")

print("===============================================")

eval_acc = accuracy_score(eval_true_labels, eval_predictions)
eval_precision, eval_recall, eval_f1, _ = precision_recall_fscore_support(eval_true_labels, eval_predictions, average='weighted')

print(f"Evaluation Accuracy: {eval_acc}")
print(f"Evaluation Precision: {eval_precision}")
print(f"Evaluation Recall: {eval_recall}")
print(f"Evaluation F1-Score: {eval_f1}")

Training Accuracy: 0.7692028544731209
Training Precision: 0.5925232016461537
Training Recall: 0.7692028544731209
Training F1-Score: 0.669401214729863
Evaluation Accuracy: 0.771695132787047
Evaluation Precision: 0.5955133779672183
Evaluation Recall: 0.771695132787047
Evaluation F1-Score: 0.6722526544738184
