## CS310 Natural Language Processing
## Assignment 4. Long Short Term Memory (LSTM) Network for Named Entity Recognition (NER)

**Total points**: 50 + (10 bonus)

In this assignment, you will implement a Long Short Term Memory (LSTM) network for Named Entity Recognition (NER). 

Re-use the code in Lab 5.

### 0. Import Necessary Libraries

In [42]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
import re
from collections import Counter, defaultdict
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle

from utils import Indexer, read_ner_data_from_connl, get_batch

from metrics import MetricsHandler


In [43]:
torch.manual_seed(42)
np.random.seed(42)

os.environ["CUDA_VISIBLE_DEVICES"] = "3" 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"使用设备: {device}")

DATA_DIR = '/home/stu_12310401/nlp/SUSTech-NLP25/Ass4/data'
GLOVE_PATH = '/home/stu_12310401/nlp/SUSTech-NLP25/Ass4/glove.6B.100d.txt'

BATCH_SIZE = 128
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
NUM_LAYERS = 2
DROPOUT = 0.5
LEARNING_RATE = 0.001
NUM_EPOCHS = 10

使用设备: cuda


In [44]:
class NERDataset(Dataset):
    def __init__(self, sentences, tags, word_to_idx, tag_to_idx):
        self.sentences = sentences
        self.tags = tags
        self.word_to_idx = word_to_idx
        self.tag_to_idx = tag_to_idx
        
    def __len__(self):
        return len(self.sentences)
    
    def __getitem__(self, idx):
        words = self.sentences[idx]
        tags = self.tags[idx]
        
        word_idxs = [self.word_to_idx.get(word.lower(), self.word_to_idx['<UNK>']) for word in words]
        tag_idxs = [self.tag_to_idx[tag] for tag in tags]
        
        return torch.tensor(word_idxs), torch.tensor(tag_idxs)

In [45]:
def build_vocab(sentences, tags, min_freq=1):
    word_counts = Counter()
    for sentence in sentences:
        word_counts.update([word.lower() for word in sentence])
    
    word_to_idx = {'<PAD>': 0, '<UNK>': 1}
    for word, count in word_counts.items():
        if count >= min_freq:
            word_to_idx[word] = len(word_to_idx)
    
    tag_counts = Counter()
    for sentence_tags in tags:
        tag_counts.update(sentence_tags)
    
    tag_to_idx = {'<PAD>': 0}
    for tag in tag_counts:
        tag_to_idx[tag] = len(tag_to_idx)
    
    return word_to_idx, tag_to_idx

In [46]:
def load_glove_embeddings(glove_path, word_to_idx, embedding_dim=100):
    """加载预训练的GloVe词向量"""
    embeddings = np.random.uniform(-0.25, 0.25, (len(word_to_idx), embedding_dim))
    embeddings[0] = np.zeros(embedding_dim)
    
    word_count = 0
    with open(glove_path, 'r', encoding='utf-8') as f:
        for line in tqdm(f, desc="加载GloVe词向量"):
            values = line.split()
            word = values[0]
            if word.lower() in word_to_idx:
                vector = np.array(values[1:], dtype='float32')
                embeddings[word_to_idx[word.lower()]] = vector
                word_count += 1
    
    print(f"加载了 {word_count}/{len(word_to_idx)} 个词的预训练词向量")
    return torch.FloatTensor(embeddings)

In [47]:
def collate_fn(batch):
    batch.sort(key=lambda x: len(x[0]), reverse=True)
    sentences, tags = zip(*batch)
    
    lengths = [len(s) for s in sentences]
    max_len = max(lengths)
    
    padded_sentences = torch.zeros(len(sentences), max_len).long()
    padded_tags = torch.zeros(len(sentences), max_len).long()
    
    for i, (sentence, tag) in enumerate(zip(sentences, tags)):
        end = lengths[i]
        padded_sentences[i, :end] = sentence[:end]
        padded_tags[i, :end] = tag[:end]
    
    return padded_sentences, padded_tags, torch.tensor(lengths)

In [48]:
def load_data(file_path):

    sentences = []
    tags = []
    
    sentence = []
    sentence_tags = []
    
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if line == '' or line.startswith('-DOCSTART-'):
                if sentence:
                    sentences.append(sentence)
                    tags.append(sentence_tags)
                    sentence = []
                    sentence_tags = []
            else:
                parts = line.split()
                if len(parts) >= 4:  
                    word = parts[0]
                    tag = parts[3]
                    sentence.append(word)
                    sentence_tags.append(tag)
    
    if sentence:
        sentences.append(sentence)
        tags.append(sentence_tags)
    
    return sentences, tags

### 1. Build the Model

In [49]:
class BiLSTM_NER(nn.Module):
    def __init__(self, vocab_size, tag_size, embedding_dim, hidden_dim, num_layers, dropout, pretrained_embeddings=None):
        super(BiLSTM_NER, self).__init__()
        

        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        if pretrained_embeddings is not None:
            self.embedding.weight = nn.Parameter(pretrained_embeddings)
        

        self.lstm = nn.LSTM(embedding_dim, 
                           hidden_dim // 2,  
                           num_layers=num_layers, 
                           bidirectional=True,
                           batch_first=True,
                           dropout=dropout if num_layers > 1 else 0)
        

        self.fc = nn.Linear(hidden_dim, tag_size)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, lengths):

        embedded = self.dropout(self.embedding(x))
        
        packed = nn.utils.rnn.pack_padded_sequence(embedded, lengths.cpu(), batch_first=True)
        
        outputs, _ = self.lstm(packed)
        
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True)
        
        outputs = self.dropout(outputs)
        
        logits = self.fc(outputs)
        
        return logits

In [50]:


class BiLSTM_CRF_NER(nn.Module):
    def __init__(self, vocab_size, tag_size, embedding_dim, hidden_dim, num_layers, dropout, pretrained_embeddings=None):
        super(BiLSTM_CRF_NER, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        if pretrained_embeddings is not None:
            self.embedding.weight = nn.Parameter(pretrained_embeddings)
        
        self.lstm = nn.LSTM(embedding_dim, 
                           hidden_dim // 2,  
                           num_layers=num_layers, 
                           bidirectional=True,
                           batch_first=True,
                           dropout=dropout if num_layers > 1 else 0)
        
        self.fc = nn.Linear(hidden_dim, tag_size)
        self.dropout = nn.Dropout(dropout)
        
        # 添加CRF层
        self.transitions = nn.Parameter(torch.randn(tag_size, tag_size))
        self.start_transitions = nn.Parameter(torch.randn(tag_size))
        self.end_transitions = nn.Parameter(torch.randn(tag_size))
        
    def _forward_alg(self, emissions, mask):
        batch_size, seq_length, tag_size = emissions.size()
        
        score = self.start_transitions + emissions[:, 0]
        
        for i in range(1, seq_length):
            broadcast_score = score.unsqueeze(2)
            broadcast_emissions = emissions[:, i].unsqueeze(1)
            
            next_score = broadcast_score + self.transitions + broadcast_emissions
            
            next_score = torch.logsumexp(next_score, dim=1)
            
            score = torch.where(mask[:, i].unsqueeze(1), next_score, score)
        
        score = score + self.end_transitions
        
        return torch.logsumexp(score, dim=1)
    
    def _score_sentence(self, emissions, tags, mask):

        batch_size, seq_length, tag_size = emissions.size()

        score = self.start_transitions[tags[:, 0]]
        score += emissions[torch.arange(batch_size), 0, tags[:, 0]]
        
        for i in range(1, seq_length):
            valid_scores = emissions[torch.arange(batch_size), i, tags[:, i]]
            valid_scores += self.transitions[tags[:, i-1], tags[:, i]]
            score += valid_scores * mask[:, i]
        
        last_tag_indices = mask.sum(1).long() - 1
        last_tags = tags[torch.arange(batch_size), last_tag_indices]
        score += self.end_transitions[last_tags]
        
        return score
    
    def _viterbi_decode(self, emissions, mask):
        batch_size, seq_length, tag_size = emissions.size()
        
        score = self.start_transitions + emissions[:, 0]
        history = []
        
        for i in range(1, seq_length):
            broadcast_score = score.unsqueeze(2)
            broadcast_emission = emissions[:, i].unsqueeze(1)
            
            next_score = broadcast_score + self.transitions + broadcast_emission
            
            next_score, indices = next_score.max(dim=1)
            
            score = torch.where(mask[:, i].unsqueeze(1), next_score, score)
            history.append(indices)
        
        score += self.end_transitions
        
        best_score, best_tag = score.max(dim=1)
        
        best_path = torch.zeros(batch_size, seq_length, dtype=torch.long, device=emissions.device)
        best_path[:, -1] = best_tag
        
        for i in range(len(history) - 1, -1, -1):
            best_tag = history[i].gather(1, best_tag.unsqueeze(1)).squeeze(1)
            best_path[:, i] = best_tag
        
        return best_path, best_score
    
    def forward(self, x, lengths):
        batch_size, max_len = x.size()
        
        mask = torch.zeros(batch_size, max_len, dtype=torch.bool, device=x.device)
        for i, length in enumerate(lengths):
            mask[i, :length] = 1
        
        embedded = self.dropout(self.embedding(x))
        
        packed = nn.utils.rnn.pack_padded_sequence(embedded, lengths.cpu(), batch_first=True)
        outputs, _ = self.lstm(packed)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True)
        
        outputs = self.dropout(outputs)
        emissions = self.fc(outputs)
        
        return emissions
    
    def neg_log_likelihood(self, x, lengths, tags):
        batch_size, max_len = x.size()
        mask = torch.zeros(batch_size, max_len, dtype=torch.bool, device=x.device)
        for i, length in enumerate(lengths):
            mask[i, :length] = 1
        
        emissions = self.forward(x, lengths)
        
        log_Z = self._forward_alg(emissions, mask)
        gold_score = self._score_sentence(emissions, tags, mask)
        
        return log_Z - gold_score
    
    def decode(self, x, lengths):
        batch_size, max_len = x.size()
        mask = torch.zeros(batch_size, max_len, dtype=torch.bool, device=x.device)
        for i, length in enumerate(lengths):
            mask[i, :length] = 1
        
        emissions = self.forward(x, lengths)
        
        return self._viterbi_decode(emissions, mask)

### 2. Train and Evaluate

In [51]:
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    
    for sentences, tags, lengths in tqdm(train_loader, desc="Training"):
        sentences = sentences.to(device)
        tags = tags.to(device)
        
        optimizer.zero_grad()
        logits = model(sentences, lengths)
        
        loss = 0
        for i in range(logits.size(0)):
            loss += criterion(logits[i, :lengths[i]], tags[i, :lengths[i]])
        loss /= logits.size(0)
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(train_loader)

In [52]:
def train_crf(model, train_loader, optimizer, device):
    model.train()
    total_loss = 0
    
    for sentences, tags, lengths in tqdm(train_loader, desc="Train CRF"):
        sentences = sentences.to(device)
        tags = tags.to(device)
        lengths = lengths.to(device)
        
        optimizer.zero_grad()
        loss = model.neg_log_likelihood(sentences, lengths, tags)
        loss = loss.mean()
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(train_loader)

In [53]:
def evaluate(model, data_loader, tag_to_idx, idx_to_tag, metrics_handler, device):
    model.eval()
    metrics_handler = MetricsHandler(classes=list(tag_to_idx.keys()))
    
    with torch.no_grad():
        for sentences, tags, lengths in tqdm(data_loader, desc="Evaluate "):
            sentences = sentences.to(device)
            tags = tags.to(device)

            logits = model(sentences, lengths)

            for i in range(logits.size(0)):
                length = lengths[i]
                logits_i = logits[i, :length]
                tags_i = tags[i, :length]
                
                # 贪心
                _, predicted = torch.max(logits_i, dim=1)
                
                pred_tags = [idx_to_tag[idx.item()] for idx in predicted]
                true_tags = [idx_to_tag[idx.item()] for idx in tags_i]
                
                metrics_handler.update(pred_tags, true_tags)
    
    metrics_handler.collect()
    metrics = metrics_handler.get_metrics()
    
    f1_scores = metrics["F1-score"]
    latest_f1 = f1_scores[-1] if f1_scores else 0.0
    
    return {"f1": latest_f1, "metrics": metrics}

In [54]:
def evaluate_crf(model, data_loader, tag_to_idx, idx_to_tag, metrics_handler, device):
    model.eval()
    metrics_handler = MetricsHandler(classes=list(tag_to_idx.keys()))
    
    with torch.no_grad():
        for sentences, tags, lengths in tqdm(data_loader, desc="Evaluate CRF"):
            sentences = sentences.to(device)
            tags = tags.to(device)
            lengths = lengths.to(device)
            
            # 使用Viterbi解码
            best_paths, _ = model.decode(sentences, lengths)
            
            for i in range(best_paths.size(0)):
                length = lengths[i].item()
                predicted = best_paths[i, :length]
                tags_i = tags[i, :length]
                
                pred_tags = [idx_to_tag[idx.item()] for idx in predicted]
                true_tags = [idx_to_tag[idx.item()] for idx in tags_i]
                
                metrics_handler.update(pred_tags, true_tags)
    
    metrics_handler.collect()
    metrics = metrics_handler.get_metrics()
    
    f1_scores = metrics["F1-score"]
    latest_f1 = f1_scores[-1] if f1_scores else 0.0
    
    return {"f1": latest_f1, "metrics": metrics}

In [55]:
from utils import get_tag_indices_from_scores
from metrics import MetricsHandler

labels_str = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC']
labels_int = list(range(len(labels_str)))
train_metrics = MetricsHandler(labels_int)

train_sentences, train_tags = load_data(os.path.join(DATA_DIR, 'train.txt'))
dev_sentences, dev_tags = load_data(os.path.join(DATA_DIR, 'dev.txt'))
test_sentences, test_tags = load_data(os.path.join(DATA_DIR, 'test.txt'))

word_to_idx, tag_to_idx = build_vocab(train_sentences, train_tags)
idx_to_tag = {idx: tag for tag, idx in tag_to_idx.items()}

TAGSET_SIZE = len(tag_to_idx)

print(f"TARGET SIZE: {TAGSET_SIZE}")

pretrained_embeddings = load_glove_embeddings(GLOVE_PATH, word_to_idx, EMBEDDING_DIM)

train_dataset = NERDataset(train_sentences, train_tags, word_to_idx, tag_to_idx)
dev_dataset = NERDataset(dev_sentences, dev_tags, word_to_idx, tag_to_idx)
test_dataset = NERDataset(test_sentences, test_tags, word_to_idx, tag_to_idx)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)



TARGET SIZE: 10


加载GloVe词向量: 400000it [00:02, 162196.07it/s]

加载了 18415/21011 个词的预训练词向量





In [56]:
model = BiLSTM_NER(
    vocab_size=len(word_to_idx),
    tag_size=len(tag_to_idx),
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    num_layers=NUM_LAYERS,
    dropout=DROPOUT,
    pretrained_embeddings=pretrained_embeddings
).to(device)

criterion = nn.CrossEntropyLoss(ignore_index=tag_to_idx['<PAD>'])
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [57]:
metrics_handler = MetricsHandler(classes=list(range(TAGSET_SIZE)))
metrics_handler_crf = MetricsHandler(classes=list(range(TAGSET_SIZE)))

In [58]:
if os.path.exists('bilstm_ner_model.pt'):
    print("It has model checkpoint, loading...")
    checkpoint = torch.load('bilstm_ner_model.pt')
    model.load_state_dict(checkpoint['model_state_dict'])
    word_to_idx = checkpoint['word_to_idx']
    tag_to_idx = checkpoint['tag_to_idx']
    idx_to_tag = checkpoint['idx_to_tag']
else:
    print("Start_training...")
    train_losses = []
    dev_f1_scores = []

    for epoch in range(NUM_EPOCHS):
        train_loss = train(model, train_loader, optimizer, criterion, device)
        train_losses.append(train_loss)

        dev_metrics = evaluate(model, dev_loader, tag_to_idx, idx_to_tag, metrics_handler, device)
        dev_f1 = dev_metrics['f1']
        dev_f1_scores.append(dev_f1)

        print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {train_loss:.4f}, Dev set F1: {dev_f1:.4f}")

    torch.save({
        'model_state_dict': model.state_dict(),
        'word_to_idx': word_to_idx,
        'tag_to_idx': tag_to_idx,
        'idx_to_tag': idx_to_tag,
        'hyperparams': {
            'embedding_dim': EMBEDDING_DIM,
            'hidden_dim': HIDDEN_DIM,
            'num_layers': NUM_LAYERS,
            'dropout': DROPOUT
        }
    }, 'bilstm_ner_model.pt')
    print("BiLSTM model trained and saved.")

It has model checkpoint, loading...


  checkpoint = torch.load('bilstm_ner_model.pt')


In [59]:

crf_model = BiLSTM_CRF_NER(
    vocab_size=len(word_to_idx),
    tag_size=len(tag_to_idx),
    embedding_dim=EMBEDDING_DIM,
    hidden_dim=HIDDEN_DIM,
    num_layers=NUM_LAYERS,
    dropout=DROPOUT,
    pretrained_embeddings=pretrained_embeddings
).to(device)

crf_optimizer = optim.Adam(crf_model.parameters(), lr=LEARNING_RATE)


In [60]:
if os.path.exists('bilstm_crf_ner_model.pt'):
    print("It has CRF model checkpoint, loading...")
    checkpoint = torch.load('bilstm_crf_ner_model.pt')
    crf_model.load_state_dict(checkpoint['model_state_dict'])
    word_to_idx = checkpoint['word_to_idx']
    tag_to_idx = checkpoint['tag_to_idx']
    idx_to_tag = checkpoint['idx_to_tag']
else:
    print("Start training CRF model...")
    crf_train_losses = []
    crf_dev_f1_scores = []

    for epoch in range(NUM_EPOCHS):
        train_loss = train_crf(crf_model, train_loader, crf_optimizer, device)
        crf_train_losses.append(train_loss)
        
        dev_metrics = evaluate_crf(crf_model, dev_loader, tag_to_idx, idx_to_tag, metrics_handler_crf, device)
        dev_f1 = dev_metrics['f1']
        crf_dev_f1_scores.append(dev_f1)
        
        print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {train_loss:.4f}, Dev set F1: {dev_f1:.4f}")

    torch.save({
        'model_state_dict': crf_model.state_dict(),
        'word_to_idx': word_to_idx,
        'tag_to_idx': tag_to_idx,
        'idx_to_tag': idx_to_tag,
        'hyperparams': {
            'embedding_dim': EMBEDDING_DIM,
            'hidden_dim': HIDDEN_DIM,
            'num_layers': NUM_LAYERS,
            'dropout': DROPOUT
        }
    }, 'bilstm_crf_ner_model.pt')

Start training CRF model...


Train CRF: 100%|██████████| 110/110 [00:13<00:00,  8.42it/s]
Evaluate CRF: 100%|██████████| 26/26 [00:03<00:00,  8.40it/s]
  return x[1, 1]/(x[1, 1] + x[0, 1])
  return x[1, 1] / (x[1, 0] + x[1, 1])
  return ((1 + beta**2)*precision*recall)/(beta**2 * precision + recall)


Epoch 1/10, Loss: 11.5426, Dev set F1: 0.5564


Train CRF: 100%|██████████| 110/110 [00:12<00:00,  8.61it/s]
Evaluate CRF: 100%|██████████| 26/26 [00:03<00:00,  8.33it/s]


Epoch 2/10, Loss: 5.2235, Dev set F1: 0.6394


Train CRF: 100%|██████████| 110/110 [00:12<00:00,  8.62it/s]
Evaluate CRF: 100%|██████████| 26/26 [00:02<00:00, 10.67it/s]


Epoch 3/10, Loss: 3.4265, Dev set F1: 0.7305


Train CRF: 100%|██████████| 110/110 [00:11<00:00,  9.96it/s]
Evaluate CRF: 100%|██████████| 26/26 [00:02<00:00, 10.98it/s]


Epoch 4/10, Loss: 2.6634, Dev set F1: 0.7713


Train CRF: 100%|██████████| 110/110 [00:10<00:00, 10.01it/s]
Evaluate CRF: 100%|██████████| 26/26 [00:02<00:00, 11.03it/s]


Epoch 5/10, Loss: 2.2818, Dev set F1: 0.8028


Train CRF: 100%|██████████| 110/110 [00:11<00:00,  9.98it/s]
Evaluate CRF: 100%|██████████| 26/26 [00:02<00:00, 11.06it/s]


Epoch 6/10, Loss: 1.9735, Dev set F1: 0.8025


Train CRF: 100%|██████████| 110/110 [00:10<00:00, 10.07it/s]
Evaluate CRF: 100%|██████████| 26/26 [00:02<00:00, 11.04it/s]


Epoch 7/10, Loss: 1.7476, Dev set F1: 0.8202


Train CRF: 100%|██████████| 110/110 [00:11<00:00,  9.95it/s]
Evaluate CRF: 100%|██████████| 26/26 [00:02<00:00, 11.03it/s]


Epoch 8/10, Loss: 1.5591, Dev set F1: 0.8295


Train CRF: 100%|██████████| 110/110 [00:10<00:00, 10.05it/s]
Evaluate CRF: 100%|██████████| 26/26 [00:02<00:00, 11.05it/s]


Epoch 9/10, Loss: 1.4060, Dev set F1: 0.8397


Train CRF: 100%|██████████| 110/110 [00:11<00:00, 10.00it/s]
Evaluate CRF: 100%|██████████| 26/26 [00:02<00:00, 11.09it/s]


Epoch 10/10, Loss: 1.2792, Dev set F1: 0.8453


In [61]:


print("\nCompare BiLSTM with BILSTM-CRF：")
print("-" * 80)
print(f"{'Model + Decode method':<30} {'Dev set F1':<15} {'Test set F1':<15}")
print("-" * 80)

# BiLSTM + Greedy
greedy_dev_metrics = evaluate(model, dev_loader, tag_to_idx, idx_to_tag, metrics_handler, device)
greedy_test_metrics = evaluate(model, test_loader, tag_to_idx, idx_to_tag, metrics_handler, device)
print(f"{'BiLSTM + Greedy search':<30} {greedy_dev_metrics['f1']:<15.4f} {greedy_test_metrics['f1']:<15.4f}")

# BiLSTM-CRF + Viterbi
crf_dev_metrics = evaluate_crf(crf_model, dev_loader, tag_to_idx, idx_to_tag, metrics_handler, device)
crf_test_metrics = evaluate_crf(crf_model, test_loader, tag_to_idx, idx_to_tag, metrics_handler, device)
print(f"{'BiLSTM-CRF + Viterbi':<30} {crf_dev_metrics['f1']:<15.4f} {crf_test_metrics['f1']:<15.4f}")




Compare BiLSTM with BILSTM-CRF：
--------------------------------------------------------------------------------
Model + Decode method          Dev set F1      Test set F1    
--------------------------------------------------------------------------------


Evaluate : 100%|██████████| 26/26 [00:02<00:00, 12.69it/s]
Evaluate : 100%|██████████| 27/27 [00:01<00:00, 14.04it/s]


BiLSTM + Greedy search         0.8374          0.7860         


Evaluate CRF: 100%|██████████| 26/26 [00:02<00:00, 11.11it/s]
Evaluate CRF: 100%|██████████| 27/27 [00:02<00:00, 12.04it/s]

BiLSTM-CRF + Viterbi           0.8453          0.7927         





### 3. Other Experiments