In [29]:
import json
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

import torch
from transformers import AdamW
import torch.nn.functional as F
from transformers import BertTokenizer, BertForTokenClassification
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

from sklearn.metrics import accuracy_score, f1_score

def load_data_to_dataframe(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = [json.loads(line) for line in file]
    df = pd.DataFrame(data)
    return df


class CONFIG: 
    EPS = 1e-8 
    EPOCHS = 3 # 3~5
    BATCH_SIZE = 16 # 8, 32
    LEARNING_RATE = 2e-4 # 1e-5
    MAX_LENGTH = 512 # 256
    BERT_MODEL_NAME = 'bert-base-uncased' # large, RoBERTa, DeBERTa
    DEVICE_NAME = "mps" # Cuda or alternative

'''BERTBASE (L=12, H=768, A=12, Total Param- eters=110M) and 
   BERTLARGE (L=24, H=1024, A=16, Total Parameters=340M).'''

'BERTBASE (L=12, H=768, A=12, Total Param- eters=110M) and \n   BERTLARGE (L=24, H=1024, A=16, Total Parameters=340M).'

In [None]:
train_df = load_data_to_dataframe('semeval_train.txt')
val_df = load_data_to_dataframe('semeval_val.txt')
test_df = load_data_to_dataframe('semeval_test.txt')

In [13]:
tokenizer = BertTokenizer.from_pretrained(CONFIG.MODEL_NAME)

def tokenize_and_align_labels(df, tokenizer, label_map, max_length=CONFIG.MAX_LENGTH):
    input_ids = []
    attention_masks = []
    token_type_ids = []
    label_ids = []

    for _, row in df.iterrows():
        text = row['token']
        labels = ['O']*len(text)  

        h_start, h_end = row['h']['pos']
        t_start, t_end = row['t']['pos']
        labels[h_start:h_end] = ['H']*(h_end - h_start)
        labels[t_start:t_end] = ['T']*(t_end - t_start)

        encoded_dict = tokenizer.encode_plus(text,
                                             add_special_tokens=True,      
                                             max_length=max_length,       
                                             padding='max_length',
                                             return_attention_mask=True,  
                                             is_split_into_words=True,
                                             return_tensors='pt')

        numeric_labels = [label_map[label] for label in labels]
        numeric_labels = [label_map['O']] + numeric_labels[:max_length-2] + [label_map['O']]*(max_length - len(numeric_labels) - 1)

        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])
        token_type_ids.append(encoded_dict['token_type_ids'])
        label_ids.append(torch.tensor(numeric_labels))

    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    token_type_ids = torch.cat(token_type_ids, dim=0)
    label_ids = torch.stack(label_ids, dim=0)
    return input_ids, attention_masks, token_type_ids, label_ids

label_map = {'O': 0, 'H': 1, 'T': 2}  

train_inputs, train_masks, train_type_ids, train_labels = tokenize_and_align_labels(train_df, tokenizer, label_map)
val_inputs, val_masks, val_type_ids, val_labels = tokenize_and_align_labels(val_df, tokenizer, label_map)
test_inputs, test_masks, test_type_ids, test_labels = tokenize_and_align_labels(test_df, tokenizer, label_map)

In [14]:
model = BertForTokenClassification.from_pretrained(CONFIG.MODEL_NAME, num_labels=len(label_map))
device = torch.device(CONFIG.DEVICE_NAME)
model.to(device)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, el

In [16]:
batch_size = CONFIG.BATCH_SIZE
train_dataset = TensorDataset(train_inputs, train_masks, train_labels)
val_dataset = TensorDataset(val_inputs, val_masks, val_labels)

train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=batch_size)
validation_dataloader = DataLoader(val_dataset, sampler=SequentialSampler(val_dataset), batch_size=batch_size)

In [17]:
torch.mps.empty_cache()

optimizer = AdamW(model.parameters(), 
                  lr=CONFIG.LEARNING_RATE, 
                  eps=CONFIG.EPS)

epochs = CONFIG.EPOCHS



In [24]:
for epoch in tqdm(range(epochs)):
    print(f"-- Epoch {epoch} Started --")
    model.train()
    total_loss = 0

    for step, batch in tqdm(enumerate(train_dataloader)):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        model.zero_grad()
        outputs = model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    avg_train_loss = total_loss / len(train_dataloader)

    model.eval()
    eval_loss = 0
    for batch in validation_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        with torch.no_grad():
            outputs = model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
            loss = outputs.loss
            eval_loss += loss.item()

    avg_val_loss = eval_loss / len(validation_dataloader)
    print(f'Epoch {epoch}: Training loss: {avg_train_loss}, Validation loss: {avg_val_loss}')

  0%|          | 0/3 [00:00<?, ?it/s]

-- Epoch 0 Started --


0it [00:00, ?it/s]

Epoch 0: Training loss: 0.010388185922857328, Validation loss: 0.006958442331450258
-- Epoch 1 Started --


0it [00:00, ?it/s]

Epoch 1: Training loss: 0.006082082999623049, Validation loss: 0.006232926230679484
-- Epoch 2 Started --


0it [00:00, ?it/s]

Epoch 2: Training loss: 0.004418197163885244, Validation loss: 0.008072486340484403


In [25]:
test_dataset = TensorDataset(test_inputs, test_masks, test_labels)
test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size=CONFIG.BATCH_SIZE)

In [30]:
def evaluate_model(test_dataloader, model, label_map):
    model.eval()
    predictions, true_labels = [], []

    for batch in tqdm(test_dataloader):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        with torch.no_grad():
            outputs = model(b_input_ids, attention_mask=b_input_mask)

        logits = outputs.logits
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
        true_labels.extend(label_ids)

    flat_predictions, flat_true_labels = [], []
    for prediction, true_label in zip(predictions, true_labels):
        for p, t in zip(prediction, true_label):
            if t != label_map['O']:  
                flat_predictions.append(p)
                flat_true_labels.append(t)

    accuracy = accuracy_score(flat_true_labels, flat_predictions)
    f1 = f1_score(flat_true_labels, flat_predictions, average='weighted') 

    return accuracy, f1

test_accuracy, test_f1 = evaluate_model(test_dataloader, model, label_map)
print(f"Test Accuracy: {test_accuracy}")
print(f"Test F1 Score: {test_f1}")

  0%|          | 0/170 [00:00<?, ?it/s]

Test Accuracy: 0.6643122020130673
Test F1 Score: 0.7817070605781964


In [36]:
import plotly.express as px
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)

px.bar(x = ['accuracy', 'f1_score'],
       y = [test_accuracy, test_f1],
       text = [test_accuracy, test_f1],
       template = 'simple_white',
       color = ['accuracy', 'f1_score'])