<a href="https://colab.research.google.com/github/KayvanShah1/usc-csci-544-assignments-hw/blob/main/hw4/CSCI544_HW4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies

## Installation

In [3]:
!pip install transformers datasets accelerate
!pip install ipython-autotime

Collecting transformers
  Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m57.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.14.6-py3-none-any.whl (493 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m493.7/493.7 kB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.24.1-py3-none-any.whl (261 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.19.0-py3-none-any.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.2/311.2 kB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86

## Imports

In [4]:
import os
import shutil
from typing import List, Tuple, Dict

import itertools
from collections import Counter

from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

import csv
import numpy as np
import pandas as pd

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from datasets import load_dataset
import transformers

from dataclasses import dataclass

%load_ext autotime

time: 452 µs (started: 2023-11-09 07:23:51 +00:00)


# Config

In [5]:
# Set the current working directory
try:
    os.chdir("/content/drive/MyDrive/Colab Notebooks/CSCI544/HW4")
except:
    pass


class PathConfig:
    # Get the current dir
    CURRENT_DIR = os.getcwd()

    # Glove embedding path
    GLOVE_100d_File = os.path.join(CURRENT_DIR, "glove.6B.100d.txt")


class DatasetConfig:
    # General Info
    name = "conll2003"

    # Processing
    cols_to_drop =  ["id", "pos_tags", "chunk_tags"]
    rename_cols = {"ner_tags": "labels"}

    # Preprocessing
    THRESHOLD = 3
    PAD_TOKEN = "<pad>"
    UNKNOWN_TOKEN = "<unk>"
    embedding_size = 100

    # NER Tags list and converter dictionaries
    ner_tag2idx = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}
    ner_idx2tag = {v: k for k, v in ner_tag2idx.items()}

    NUM_NER_TAGS = len(ner_tag2idx)
    SPECIAL_TOKEN_TAG = -100

time: 185 ms (started: 2023-11-09 07:23:51 +00:00)


# Helper Functions & Support Scripts

## Accelarator Configuration

In [6]:
def get_device():
    if torch.cuda.is_available():
        # Check if GPU is available
        return torch.device("cuda")
    else:
        # Use CPU if no GPU or TPU is available
        return torch.device("cpu")

device = get_device()
device

device(type='cpu')

time: 7.65 ms (started: 2023-11-09 07:23:52 +00:00)


## CoNLL evaluation functions

In [7]:
%%bash
if [ ! -f conlleval.py ]; then
    echo "Downloading conlleval.py ..."
    wget https://raw.githubusercontent.com/sighsmile/conlleval/master/conlleval.py
else
    echo "File conlleval.py already exists"
fi

File conlleval.py already exists
time: 262 ms (started: 2023-11-09 07:23:52 +00:00)


In [8]:
from conlleval import evaluate

time: 608 ms (started: 2023-11-09 07:23:52 +00:00)


## Helper functions

In [9]:
# Load glove embeddings to dictionary
def load_glove_embeddings(path):
    """
    pretrained_model = load_glove_embeddings(PathConfig.GLOVE_100d_File)
    """
    embeddings = pd.read_csv(
        PathConfig.GLOVE_100d_File, sep=" ", quoting=csv.QUOTE_NONE, header=None, index_col=0
    )
    embeddings = {key: val.values for key, val in embeddings.T.items()}

    # Add Special token vectors
    embeddings[DatasetConfig.UNKNOWN_TOKEN] = np.full(DatasetConfig.embedding_size, -1)
    embeddings[DatasetConfig.PAD_TOKEN] = np.zeros(DatasetConfig.embedding_size)

    return embeddings

time: 993 µs (started: 2023-11-09 07:23:53 +00:00)


# Download Glove Embeddings

In [10]:
%%bash
if [ ! -f glove.6B.zip ]; then
    echo "Downloading glove.6B.zip..."
    wget http://nlp.stanford.edu/data/glove.6B.zip -y
    unzip -o glove.6B.zip
else
    echo "File glove.6B.zip already exists"
fi

File glove.6B.zip already exists
time: 17 ms (started: 2023-11-09 07:23:53 +00:00)


In [11]:
pretrained_model = load_glove_embeddings(PathConfig.GLOVE_100d_File)

time: 29.2 s (started: 2023-11-09 07:23:53 +00:00)


# Dataset Preparation

## Process Data

In [12]:
dataset = load_dataset("conll2003")
dataset = dataset.remove_columns(DatasetConfig.cols_to_drop)
for old_name, new_name in DatasetConfig.rename_cols.items():
    dataset = dataset.rename_column(old_name, new_name)

Downloading builder script:   0%|          | 0.00/9.57k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/983k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/14041 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3250 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3453 [00:00<?, ? examples/s]

time: 10.3 s (started: 2023-11-09 07:24:22 +00:00)


In [13]:
dataset

DatasetDict({
    train: Dataset({
        features: ['tokens', 'labels'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['tokens', 'labels'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['tokens', 'labels'],
        num_rows: 3453
    })
})

time: 8.5 ms (started: 2023-11-09 07:24:32 +00:00)


## EDA

In [14]:
df = pd.DataFrame(dataset["train"])
df.head()

Unnamed: 0,tokens,labels
0,"[EU, rejects, German, call, to, boycott, Briti...","[3, 0, 7, 0, 0, 0, 7, 0, 0]"
1,"[Peter, Blackburn]","[1, 2]"
2,"[BRUSSELS, 1996-08-22]","[5, 0]"
3,"[The, European, Commission, said, on, Thursday...","[0, 3, 4, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, ..."
4,"[Germany, 's, representative, to, the, Europea...","[5, 0, 0, 0, 0, 3, 4, 0, 0, 0, 1, 2, 0, 0, 0, ..."


time: 826 ms (started: 2023-11-09 07:24:32 +00:00)


## Word to index mapper

In [15]:
# Count occurences of the words using itertools and Counter
word_frequency = Counter(itertools.chain(*dataset['train']['tokens']))

# Discard words with frequency below THRESHOLD
word_frequency = {
    word: freq
    for word, freq in word_frequency.items()
    if freq >= DatasetConfig.THRESHOLD
}

# Generate indexes
word2idx = {
    word: index
    for index, word in enumerate(word_frequency.keys(), start=2)
}

# Add special tokens
word2idx[DatasetConfig.PAD_TOKEN] = 0
word2idx[DatasetConfig.UNKNOWN_TOKEN] = 1

time: 438 ms (started: 2023-11-09 07:24:33 +00:00)


# Create a Pytorch dataset

In [16]:
@dataclass
class DatasetItem:
    embeddings: torch.Tensor
    targets: torch.Tensor
    original_length: int


class NERDatasetCustom(Dataset):
    def __init__(self, dataset, split, tokenizer, embedding_type="custom"):
        self.name = DatasetConfig.name
        self.dataset = dataset[split]
        self.tokenizer = tokenizer

        # Options: [custom, glove, transformer]
        self.embedding_type = embedding_type

    def __len__(self):
        return self.dataset.num_rows

    def tokenize(self, tokens):
        """
        Code to convert all tokens to their respective indexes
        """
        return [
            self.tokenizer.get(token, self.tokenizer[DatasetConfig.UNKNOWN_TOKEN])
            for token in tokens
        ]

    def __getitem__(self, idx):
        if idx >= self.__len__():
            raise IndexError

        item = self.dataset[idx]

        item["input_ids"] = self.tokenize(item["tokens"])

        embeddings = item["input_ids"]
        targets = item["labels"]
        seq_len = len(targets)

        return DatasetItem(
            torch.tensor(embeddings, dtype=torch.long),
            torch.tensor(targets, dtype=torch.long),
            seq_len
        )

time: 2.88 ms (started: 2023-11-09 07:24:33 +00:00)


In [17]:
def collate_fn(data: DatasetItem):
    embeddings, targets, og_len = [], [], []

    for item in data:
        embeddings.append(item.embeddings)
        targets.append(item.targets)
        og_len.append(item.original_length)

    # Pad the embeddings sequence
    embeddings = nn.utils.rnn.pad_sequence(
        embeddings, batch_first=True, padding_value=word2idx[DatasetConfig.PAD_TOKEN]
    )
    targets = nn.utils.rnn.pad_sequence(
        targets, batch_first=True, padding_value=DatasetConfig.SPECIAL_TOKEN_TAG
    )

    return {"embeddings": embeddings, "targets": targets, "original_length": og_len}

time: 613 µs (started: 2023-11-09 07:24:33 +00:00)


# Training & Evaluation loop

In [30]:
def train_and_evaluate(
    model,
    train_data_loader, valid_data_loader,
    optimizer, loss_fn,
    device,
    num_epochs,
    checkpoint=False,
    path="model.pt",
    early_stopping_patience=5
):
    """
    Trains and evaluates the model.

    Args:
        model (nn.Module): The neural network model.
        train_data_loader (DataLoader): The DataLoader for training data.
        valid_data_loader (DataLoader): The DataLoader for validation data.
        optimizer (torch.optim): The optimizer for updating model weights.
        loss_fn: The loss function.
        device (torch.device): The device to perform computations.
        num_epochs (int): The number of epochs.
        checkpoint (bool, optional): Whether to save model checkpoints.
        path (str, optional): The path to save the model.
        early_stopping_patience (int, optional): Number of epochs to wait before early stopping.

    Returns:
        nn.Module: The best model.

    """
    # Create directory for saving checkpoint model states
    if checkpoint:
        dirname = path.split(".")[0]
        checkpoint_path = os.path.join(dirname)
        if os.path.exists(checkpoint_path):
            shutil.rmtree(checkpoint_path)
        os.makedirs(dirname)

    best_loss = float('inf')
    no_improvement_count = 0
    best_model = None

    for epoch in range(num_epochs):
        # Train Step
        model.train()
        train_loss = 0.0

        progress_bar = tqdm(train_data_loader, desc=f'Epoch {epoch+1}/{num_epochs}')

        for batch in progress_bar:
            embeddings = batch['embeddings'].to(device, dtype=torch.long, non_blocking=True)
            labels = batch['targets'].to(device, dtype=torch.long, non_blocking=True)
            seq_lengths = batch["original_length"]

            optimizer.zero_grad()

            outputs = model(embeddings, seq_lengths)

            outputs = outputs.view(-1, outputs.shape[-1])
            labels = labels.view(-1)
            loss = loss_fn(outputs, labels)

            loss.backward()
            optimizer.step()

            train_loss += loss.item() * embeddings.size(1)

        train_loss /= len(train_data_loader.dataset)

        # Validation Step
        model.eval()
        valid_loss = 0.0

        with torch.no_grad():
            for batch in valid_data_loader:
                embeddings = batch['embeddings'].to(device, dtype=torch.long, non_blocking=True)
                labels = batch['targets'].to(device, dtype=torch.long, non_blocking=True)
                seq_lengths = batch["original_length"]

                outputs = model(embeddings, seq_lengths)

                outputs = outputs.view(-1, outputs.shape[-1])
                labels = labels.view(-1)
                loss = loss_fn(outputs, labels)

                valid_loss += loss.item() * embeddings.size(1)

            valid_loss /= len(valid_data_loader.dataset)

        epoch_log = (
            f"Train Loss : {round(train_loss, 4)},"
            f" Validation Loss: {round(valid_loss, 4)}"
        )
        print(epoch_log)

        # Check for improvement in validation loss
        if valid_loss < best_loss:
            # Save checkpoint if needed
            if checkpoint:
                cp = os.path.join(checkpoint_path, f"{dirname}_epoch{epoch}_loss{valid_loss:.4f}.pt")
                torch.save(model.state_dict(), cp)
                print(f"Validation loss improved from {best_loss:.4f}--->{valid_loss:.4f}")
                print(f"Saved Checkpoint to '{cp}'")

            best_loss = valid_loss
            best_model = model
            no_improvement_count = 0
        else:
            no_improvement_count += 1

            # Early stopping condition
            if no_improvement_count >= early_stopping_patience:
                print(f"No improvement for {early_stopping_patience} epochs. Stopping early.")
                break

    if checkpoint:
        # Save the best model
        best_model_path = os.path.join(checkpoint_path, f"{dirname}-best.pt")
        torch.save(best_model.state_dict(), best_model_path)
        print(f"Saved best model to '{os.path.relpath(best_model_path)}'")

    # Save current model
    torch.save(model.state_dict(), path)

    return best_model

time: 34.6 ms (started: 2023-11-09 07:36:57 +00:00)


# Hyperparameters

In [19]:
TRAIN_BATCH_SIZE = 128
VALID_BATCH_SIZE = 64
TEST_BATCH_SIZE = 32
NUM_EPOCHS = 5

time: 565 µs (started: 2023-11-09 07:24:33 +00:00)


# Bidirectional LSTM model

In [20]:
class BiLSTM(nn.Module):
    def __init__(
        self, vocab_size, embedding_dim, num_tags,
        hidden_size, num_layers, lstm_output_size, dropout_val
    ):
        """
        Recurrent Neural Network (RNN) model for sequence data processing.

        Args:
            vocab_size (int): Size of vocabulary
            embedding_dim (int): Dimension of the input features.
            num_tags (int): Number of output classes.
            hidden_size (int): Number of units in the hidden layers.
            num_layers (int): Number of recurrent layers.
            lstm_output_size (int): Size of the output from the LSTM layer.
            dropout_val (float): Dropout probability.

        """
        super(BiLSTM, self).__init__()

        # Model Attributes
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Model Layer Definition
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

        self.lstm = nn.LSTM(
            embedding_dim, hidden_size, num_layers, batch_first=True, bidirectional=True
        )

        self.fc = nn.Linear(hidden_size * 2, lstm_output_size)
        self.dropout = nn.Dropout(dropout_val)
        self.elu = nn.ELU(alpha=0.01)
        self.classifier = nn.Linear(lstm_output_size, num_tags)

    def init_hidden(self, batch_size):
        hidden = (
            torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).to(device),
            torch.zeros(self.num_layers * 2, batch_size, self.hidden_size).to(device)
        )
        return hidden

    def forward(self, x, seq_len):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size)

        # Embedding Layer
        embeds = self.embedding(x).float()

        # LSTM layer
        packed_embeds = nn.utils.rnn.pack_padded_sequence(
            embeds, seq_len, batch_first=True, enforce_sorted=False
        )
        out, _ = self.lstm(packed_embeds, hidden)
        out, _ = nn.utils.rnn.pad_packed_sequence(out, batch_first=True)

        # Apply fully connected layer for final prediction
        out = self.dropout(out)
        out = self.fc(out)
        out = self.elu(out)
        out = self.classifier(out)

        return out

time: 1.85 ms (started: 2023-11-09 07:24:33 +00:00)


In [21]:
train_dataset = NERDatasetCustom(
    dataset = dataset,
    split='train',
    tokenizer = word2idx,
    embedding_type="default",
)

valid_dataset = NERDatasetCustom(
    dataset = dataset,
    split='validation',
    tokenizer = word2idx,
    embedding_type="default",
)

train_dataset = NERDatasetCustom(
    dataset = dataset,
    split='test',
    tokenizer = word2idx,
    embedding_type="default",
)

train_data_loader = DataLoader(
    train_dataset,
    batch_size=TRAIN_BATCH_SIZE,
    drop_last=True,
    shuffle=True,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    train_dataset,
    batch_size=VALID_BATCH_SIZE,
    drop_last=True,
    shuffle=True,
    collate_fn=collate_fn
)

test_data_loader = DataLoader(
    train_dataset,
    batch_size=TEST_BATCH_SIZE,
    drop_last=True,
    shuffle=False,
    collate_fn=collate_fn
)

time: 1.09 ms (started: 2023-11-09 07:24:33 +00:00)


In [22]:
vocab_size = len(word2idx)
embedding_dim = 100
hidden_size = 256
output_size = 128
num_layers = 1
dropout_val = 0.33
num_tags = DatasetConfig.NUM_NER_TAGS

time: 733 µs (started: 2023-11-09 07:24:33 +00:00)


In [84]:
net = BiLSTM(
    vocab_size, embedding_dim, num_tags,
    hidden_size, num_layers, output_size,
    dropout_val
).to(device)

criterion = nn.CrossEntropyLoss(ignore_index=DatasetConfig.SPECIAL_TOKEN_TAG).to(device)
optimizer = optim.Adam(net.parameters(), lr=0.01)

best_model = train_and_evaluate(
    model=net,
    train_data_loader=train_data_loader,
    valid_data_loader=valid_data_loader,
    optimizer=optimizer,
    loss_fn=criterion,
    device=device,
    num_epochs=15,
    checkpoint=True,
    path="bilstm_custom_embeddings.pt",
    early_stopping_patience=5
)

Epoch 1/15: 100%|██████████| 26/26 [00:20<00:00,  1.27it/s]


Train Loss : 0.3941, Validation Loss: 0.4391
Validation loss improved from inf--->0.4391
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch0_loss0.4391.pt'


Epoch 2/15: 100%|██████████| 26/26 [00:20<00:00,  1.26it/s]


Train Loss : 0.2004, Validation Loss: 0.2305
Validation loss improved from 0.4391--->0.2305
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch1_loss0.2305.pt'


Epoch 3/15: 100%|██████████| 26/26 [00:21<00:00,  1.23it/s]


Train Loss : 0.1145, Validation Loss: 0.1359
Validation loss improved from 0.2305--->0.1359
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch2_loss0.1359.pt'


Epoch 4/15: 100%|██████████| 26/26 [00:21<00:00,  1.21it/s]


Train Loss : 0.0745, Validation Loss: 0.0874
Validation loss improved from 0.1359--->0.0874
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch3_loss0.0874.pt'


Epoch 5/15: 100%|██████████| 26/26 [00:23<00:00,  1.13it/s]


Train Loss : 0.0552, Validation Loss: 0.0634
Validation loss improved from 0.0874--->0.0634
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch4_loss0.0634.pt'


Epoch 6/15: 100%|██████████| 26/26 [00:22<00:00,  1.17it/s]


Train Loss : 0.0403, Validation Loss: 0.0467
Validation loss improved from 0.0634--->0.0467
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch5_loss0.0467.pt'


Epoch 7/15: 100%|██████████| 26/26 [00:22<00:00,  1.15it/s]


Train Loss : 0.0301, Validation Loss: 0.034
Validation loss improved from 0.0467--->0.0340
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch6_loss0.0340.pt'


Epoch 8/15: 100%|██████████| 26/26 [00:20<00:00,  1.24it/s]


Train Loss : 0.0252, Validation Loss: 0.0252
Validation loss improved from 0.0340--->0.0252
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch7_loss0.0252.pt'


Epoch 9/15: 100%|██████████| 26/26 [00:21<00:00,  1.21it/s]


Train Loss : 0.02, Validation Loss: 0.0192
Validation loss improved from 0.0252--->0.0192
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch8_loss0.0192.pt'


Epoch 10/15: 100%|██████████| 26/26 [00:21<00:00,  1.22it/s]


Train Loss : 0.0159, Validation Loss: 0.016
Validation loss improved from 0.0192--->0.0160
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch9_loss0.0160.pt'


Epoch 11/15: 100%|██████████| 26/26 [00:21<00:00,  1.20it/s]


Train Loss : 0.0134, Validation Loss: 0.0128
Validation loss improved from 0.0160--->0.0128
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch10_loss0.0128.pt'


Epoch 12/15: 100%|██████████| 26/26 [00:21<00:00,  1.22it/s]


Train Loss : 0.0131, Validation Loss: 0.0129


Epoch 13/15: 100%|██████████| 26/26 [00:20<00:00,  1.26it/s]


Train Loss : 0.0114, Validation Loss: 0.0138


Epoch 14/15: 100%|██████████| 26/26 [00:20<00:00,  1.29it/s]


Train Loss : 0.0107, Validation Loss: 0.0095
Validation loss improved from 0.0128--->0.0095
Saved Checkpoint to 'bilstm_custom_embeddings/bilstm_custom_embeddings_epoch13_loss0.0095.pt'


Epoch 15/15: 100%|██████████| 26/26 [00:21<00:00,  1.23it/s]


Train Loss : 0.0103, Validation Loss: 0.0097
Saved best model to 'bilstm_custom_embeddings/bilstm_custom_embeddings-best.pt'
time: 6min 23s (started: 2023-11-09 09:36:07 +00:00)


In [89]:
def evaluate_model(model, data_loader):
    all_preds = []
    all_labels = []

    model.eval()

    with torch.no_grad():
        for batch in data_loader:
            embeddings = batch['embeddings'].to(device, dtype=torch.long, non_blocking=True)
            labels = batch['targets'].to(device, dtype=torch.long, non_blocking=True)
            seq_lengths = batch["original_length"]

            outputs = model(embeddings, seq_lengths)

            preds = torch.argmax(outputs, dim=2)

            preds = preds.detach().cpu().numpy()
            labels = labels.detach().cpu().numpy()

            for pred, label, length in zip(preds, labels, seq_lengths):
                pred = [DatasetConfig.ner_idx2tag.get(p, 'O') for p in pred[:length]]
                label = [DatasetConfig.ner_idx2tag.get(l, 'O') for l in label[:length]]
                all_preds.append(pred)
                all_labels.append(label)

    # Evaluate using conlleval
    precision, recall, f1 = evaluate(
        itertools.chain(*all_labels), itertools.chain(*all_preds)
    )

    return precision, recall, f1

time: 1.94 ms (started: 2023-11-09 09:45:24 +00:00)


In [92]:
precision, recall, f1 = evaluate_model(best_model, test_data_loader)
print(f'Precision: {precision}, Recall: {recall}, F1 Score: {f1}')

processed 46014 tokens with 5602 phrases; found: 5546 phrases; correct: 5457.
accuracy:  98.04%; (non-O)
accuracy:  99.61%; precision:  98.40%; recall:  97.41%; FB1:  97.90
              LOC: precision:  98.19%; recall:  98.13%; FB1:  98.16  1657
             MISC: precision:  97.92%; recall:  94.96%; FB1:  96.42  673
              ORG: precision:  98.58%; recall:  96.95%; FB1:  97.76  1615
              PER: precision:  98.63%; recall:  98.20%; FB1:  98.41  1601
Precision: 98.39523981247747, Recall: 97.41163870046412, F1 Score: 97.90096878363833
time: 4.44 s (started: 2023-11-09 09:48:26 +00:00)


# References

1. https://huggingface.co/datasets/conll2003
2. https://huggingface.co/docs/datasets/installation
3. https://huggingface.co/docs/transformers/installation
4. https://stackoverflow.com/questions/37793118/load-pretrained-glove-vectors-in-python
5. https://stackoverflow.com/a/52070223/12639940
6. https://github.com/sighsmile/conlleval
7. https://nlp.stanford.edu/data/glove.6B.zip
8. https://stats.stackexchange.com/questions/248715/selection-of-values-for-padding-tokens-in-sentence-classification-with-word-embe
