## CS310 Natural Language Processing
## Assignment 4. Long Short Term Memory (LSTM) Network for Named Entity Recognition (NER)

**Total points**: 50 + (10 bonus)

In this assignment, you will implement a Long Short Term Memory (LSTM) network for Named Entity Recognition (NER). 

Re-use the code in Lab 5.

### 0. Import Necessary Libraries

In [None]:
from pprint import pprint
import torch.nn as nn
import torch.nn.functional as F
import torch
import numpy as np
from utils import Indexer, read_ner_data_from_connl, load_embedding_dict

In [21]:
train_words, train_tags = read_ner_data_from_connl('data/train.txt')
dev_words, dev_tags = read_ner_data_from_connl('data/dev.txt')
test_words, test_tags = read_ner_data_from_connl('data/test.txt')

train_words = [word.lower() for word in train_words]
dev_words = [word.lower() for word in dev_words]
test_words = [word.lower() for word in test_words]

In [51]:
class Indexer:
    def __init__(self, elements):
        self.element_to_idx = {e: i for i, e in enumerate(sorted(set(elements)))}
    def element_to_index(self, element):
        return self.element_to_idx.get(element, 0)  # 0 for unknown
    def get_element_to_index_dict(self):
        return self.element_to_idx
    def __len__(self):
        return len(self.element_to_idx)

In [22]:
word_indexer = Indexer(train_words)
tag_indexer = Indexer(train_tags)

In [50]:
class NERDataset:
    def __init__(self, words, tags, word_indexer, tag_indexer):
        self.words = words
        self.tags = tags
        self.word_indexer = word_indexer
        self.tag_indexer = tag_indexer
    
    def __len__(self):
        return len(self.words)
    
    def __getitem__(self, idx):
        word_idx = self.word_indexer.element_to_index(self.words[idx])
        tag_idx = self.tag_indexer.element_to_index(self.tags[idx])
        return word_idx, tag_idx

In [24]:
train_dataset = NERDataset(train_words, train_tags, word_indexer, tag_indexer)
dev_dataset = NERDataset(dev_words, dev_tags, word_indexer, tag_indexer)
test_dataset = NERDataset(test_words, test_tags, word_indexer, tag_indexer)

In [None]:
def get_batch(dataset, batch_size):
    words = dataset.words
    tags = dataset.tags
    for i in range(0, len(dataset), batch_size):
        batch_words = words[i:i + batch_size]
        batch_tags = tags[i:i + batch_size]
        word_indices = [dataset.word_indexer.element_to_index(w) for w in batch_words]
        tag_indices = [dataset.tag_indexer.element_to_index(t) for t in batch_tags]
        yield (torch.tensor(word_indices, dtype=torch.long),
               torch.tensor(tag_indices, dtype=torch.long))

In [32]:
batch_size = 128
train_loader = list(get_batch(train_dataset, batch_size))
dev_loader = list(get_batch(dev_dataset, batch_size))
test_loader = list(get_batch(test_dataset, batch_size))

In [30]:
glove_path = "data/glove.6B.100d.txt"
embeddings_index = load_embedding_dict(glove_path)
embedding_dim = 100
embedding_matrix = np.zeros((len(word_indexer), embedding_dim))
for word, idx in word_indexer.get_element_to_index_dict().items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[idx] = embedding_vector

100%|██████████| 400000/400000 [00:02<00:00, 157053.49it/s]


### 1. Build the Model

In [None]:
class BiLSTMNER(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, tagset_size, embedding_matrix):
        super(BiLSTMNER, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(
            torch.FloatTensor(embedding_matrix), freeze=False)  

        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=2, 
                           bidirectional=True, batch_first=True)

        self.fc = nn.Linear(hidden_dim * 2, tagset_size) 
    
    def forward(self, x):
        # x: (batch_size, seq_len)，输入是单词索引
        emb = self.embedding(x)  # (batch_size, seq_len, embedding_dim)
        lstm_out, _ = self.lstm(emb)  # (batch_size, seq_len, hidden_dim * 2)
        tag_scores = self.fc(lstm_out)  # (batch_size, seq_len, tagset_size)
        return tag_scores
    

vocab_size = len(word_indexer)
tagset_size = len(tag_indexer)
hidden_dim = 256  
model = BiLSTMNER(vocab_size, embedding_dim, hidden_dim, tagset_size, embedding_matrix)

### 2. Train and Evaluate

In [None]:
from utils import get_tag_indices_from_scores
from metrics import MetricsHandler
import torch.optim as optim

labels_str = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC']
labels_int = list(range(len(labels_str)))
train_metrics = MetricsHandler(labels_int)
dev_metrics = MetricsHandler(labels_int)
test_metrics = MetricsHandler(labels_int)

loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 修改train_model函数，修复epoch_loss计算
def train_model(model, train_loader, dev_loader, optimizer, loss_func, train_metrics, dev_metrics, num_epochs=5, device=None, train_dataset=None, dev_dataset=None, **kwargs):
    model.train()
    losses = []
    dev_f1_scores = []  
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        train_metrics = MetricsHandler(labels_int) 
        
        # 训练循环
        for batch in train_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            output = model(inputs)  # (batch_size, seq_len, tagset_size)

            output = output.view(-1, tagset_size)
            targets_flat = targets.view(-1)

            loss = loss_func(output, targets_flat)
            loss.backward()
            optimizer.step()

            predictions = get_tag_indices_from_scores(output.detach().cpu().numpy())
            train_metrics.update(predictions, targets_flat.cpu().numpy())
            running_loss += loss.item() * inputs.size(0)
        
        train_metrics.collect()
        epoch_loss = running_loss / len(train_dataset)
        losses.append(epoch_loss)
        train_f1 = train_metrics.get_metrics()["F1-score"][-1]
        

        dev_loss, dev_metrics = evaluate_model(model, dev_loader, loss_func, dev_metrics, device=device, dataset=dev_dataset)
        dev_metrics.collect()
        dev_f1 = dev_metrics.get_metrics()["F1-score"][-1]
        dev_f1_scores.append(dev_f1) 
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Train F1: {train_f1:.4f}, Dev F1: {dev_f1:.4f}")

    return model, train_metrics, dev_metrics, losses, dev_f1_scores


In [None]:
def evaluate_model(model, data_loader, loss_func, eval_metrics, device=None, dataset=None, **kwargs):

    model.eval()
    eval_metrics = MetricsHandler(labels_int)
    total_loss = 0.0
    with torch.no_grad():
        for batch in data_loader:
            inputs, targets = batch
            inputs = inputs.to(device)
            targets = targets.to(device)

            output = model(inputs)

            output = output.view(-1, tagset_size)
            targets_flat = targets.view(-1)

            loss = loss_func(output, targets_flat)
            total_loss += loss.item() * inputs.size(0)

            predictions = get_tag_indices_from_scores(output.cpu().numpy())
            eval_metrics.update(predictions, targets_flat.cpu().numpy())


    eval_loss = total_loss / len(dataset)
    return eval_loss, eval_metrics

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

num_epochs = 10  
print("开始训练...")


train_metrics = MetricsHandler(labels_int)
dev_metrics = MetricsHandler(labels_int)


model, train_metrics, dev_metrics, losses, dev_f1_scores = train_model(
    model, train_loader, dev_loader, optimizer, loss_func, train_metrics, dev_metrics,
    num_epochs=num_epochs, device=device, train_dataset=train_dataset, dev_dataset=dev_dataset
)


model_path = "bilstm_ner_model.pth"
torch.save(model.state_dict(), model_path)
print(f"模型参数已保存到 {model_path}")


print("\n前5个周期的开发集F-1分数：")
for i, f1 in enumerate(dev_f1_scores[:5], 1):
    print(f"Epoch {i}: Dev F1 Score: {f1:.4f}")
 

开始训练...
Epoch 1/10, Loss: 0.0018, Train F1: 0.9981, Dev F1: 0.8794
Epoch 2/10, Loss: 0.0019, Train F1: 0.9984, Dev F1: 0.8866
Epoch 3/10, Loss: 0.0019, Train F1: 0.9982, Dev F1: 0.8800
Epoch 4/10, Loss: 0.0012, Train F1: 0.9985, Dev F1: 0.8822
Epoch 5/10, Loss: 0.0014, Train F1: 0.9987, Dev F1: 0.8822
Epoch 6/10, Loss: 0.0016, Train F1: 0.9985, Dev F1: 0.8731
Epoch 7/10, Loss: 0.0012, Train F1: 0.9988, Dev F1: 0.8667
Epoch 8/10, Loss: 0.0014, Train F1: 0.9989, Dev F1: 0.8688
Epoch 9/10, Loss: 0.0009, Train F1: 0.9993, Dev F1: 0.8828
Epoch 10/10, Loss: 0.0005, Train F1: 0.9994, Dev F1: 0.8803
模型参数已保存到 bilstm_ner_model.pth

前5个周期的开发集F-1分数：
Epoch 1: Dev F1 Score: 0.8794
Epoch 2: Dev F1 Score: 0.8866
Epoch 3: Dev F1 Score: 0.8800
Epoch 4: Dev F1 Score: 0.8822
Epoch 5: Dev F1 Score: 0.8822


In [None]:
print("开始测试...")
test_loss, test_metrics = evaluate_model(
    model, test_loader, loss_func, test_metrics, 
    device=device, dataset=test_dataset
)
test_metrics.collect()
test_f1 = test_metrics.get_metrics()["F1-score"][-1]
print(f"Test F1 Score: {test_f1:.4f}")

开始测试...
Test F1 Score: 0.8245


### 3. Other Experiments