In [1]:
#  Imports
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import pickle

In [2]:
# Load processed data
DATA_DIR = '../../data/ner/'
Xw_train = np.load(os.path.join(DATA_DIR, 'Xw_train.npy'))
Xw_val = np.load(os.path.join(DATA_DIR, 'Xw_val.npy'))
Xp_train = np.load(os.path.join(DATA_DIR, 'Xp_train.npy'))
Xp_val = np.load(os.path.join(DATA_DIR, 'Xp_val.npy'))
Yt_train = np.load(os.path.join(DATA_DIR, 'Yt_train.npy'))
Yt_val = np.load(os.path.join(DATA_DIR, 'Yt_val.npy'))

with open(os.path.join(DATA_DIR, 'word2idx.pkl'), 'rb') as f:
    word2idx = pickle.load(f)
with open(os.path.join(DATA_DIR, 'pos2idx.pkl'), 'rb') as f:
    pos2idx = pickle.load(f)
with open(os.path.join(DATA_DIR, 'tag2idx.pkl'), 'rb') as f:
    tag2idx = pickle.load(f)
idx2tag = {i: t for t, i in tag2idx.items()}

In [3]:
# Dataset
class NERDataset(Dataset):
    def __init__(self, Xw, Xp, Yt):
        self.Xw = torch.LongTensor(Xw)
        self.Xp = torch.LongTensor(Xp)
        self.Yt = torch.LongTensor(Yt)
    def __len__(self):
        return len(self.Xw)
    def __getitem__(self, idx):
        return self.Xw[idx], self.Xp[idx], self.Yt[idx]

train_ds = NERDataset(Xw_train, Xp_train, Yt_train)
val_ds = NERDataset(Xw_val, Xp_val, Yt_val)

BATCH_SIZE = 64
train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE)

In [4]:
# Model Definition
class BiLSTM_NER(nn.Module):
    def __init__(self, vocab_size, pos_size, tag_size, emb_dim=100, pos_emb_dim=16, lstm_units=128):
        super().__init__()
        self.word_emb = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.pos_emb = nn.Embedding(pos_size, pos_emb_dim, padding_idx=0)
        self.lstm = nn.LSTM(emb_dim+pos_emb_dim, lstm_units, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(lstm_units*2, tag_size)
    def forward(self, Xw, Xp):
        w = self.word_emb(Xw)
        p = self.pos_emb(Xp)
        feats = torch.cat([w, p], dim=-1)
        lstm_out, _ = self.lstm(feats)
        out = self.fc(lstm_out)
        return out  # [batch, seq, tag_size]

VOCAB_SIZE = len(word2idx)
POS_SIZE = len(pos2idx)
TAG_SIZE = len(tag2idx)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = BiLSTM_NER(VOCAB_SIZE, POS_SIZE, TAG_SIZE).to(DEVICE)
loss_fn = nn.CrossEntropyLoss(ignore_index=0)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [5]:
# Train Loop
EPOCHS = 10
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    progress = tqdm(train_dl, desc=f"Epoch {epoch+1}/{EPOCHS}")
    for Xw, Xp, Yt in progress:
        Xw, Xp, Yt = Xw.to(DEVICE), Xp.to(DEVICE), Yt.to(DEVICE)
        optimizer.zero_grad()
        logits = model(Xw, Xp)
        loss = loss_fn(logits.view(-1, TAG_SIZE), Yt.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        progress.set_postfix(loss=total_loss / (progress.n+1))
    # Validation
    model.eval()
    val_loss, val_steps = 0, 0
    with torch.no_grad():
        for Xw, Xp, Yt in val_dl:
            Xw, Xp, Yt = Xw.to(DEVICE), Xp.to(DEVICE), Yt.to(DEVICE)
            logits = model(Xw, Xp)
            loss = loss_fn(logits.view(-1, TAG_SIZE), Yt.view(-1))
            val_loss += loss.item()
            val_steps += 1
    print(f"Epoch {epoch+1} Valid Loss: {val_loss / val_steps:.4f}")


Epoch 1/10: 100%|████████████████████| 600/600 [00:06<00:00, 94.37it/s, loss=0.298]


Epoch 1 Valid Loss: 0.1427


Epoch 2/10: 100%|███████████████████| 600/600 [00:04<00:00, 122.38it/s, loss=0.123]


Epoch 2 Valid Loss: 0.1140


Epoch 3/10: 100%|██████████████████| 600/600 [00:03<00:00, 158.83it/s, loss=0.0994]


Epoch 3 Valid Loss: 0.1045


Epoch 4/10: 100%|███████████████████| 600/600 [00:05<00:00, 117.02it/s, loss=0.079]


Epoch 4 Valid Loss: 0.0994


Epoch 5/10: 100%|██████████████████| 600/600 [00:05<00:00, 107.31it/s, loss=0.0673]


Epoch 5 Valid Loss: 0.1019


Epoch 6/10: 100%|██████████████████| 600/600 [00:05<00:00, 109.96it/s, loss=0.0566]


Epoch 6 Valid Loss: 0.0987


Epoch 7/10: 100%|██████████████████| 600/600 [00:05<00:00, 115.75it/s, loss=0.0465]


Epoch 7 Valid Loss: 0.1035


Epoch 8/10: 100%|██████████████████| 600/600 [00:04<00:00, 126.92it/s, loss=0.0375]


Epoch 8 Valid Loss: 0.1091


Epoch 9/10: 100%|██████████████████| 600/600 [00:04<00:00, 134.59it/s, loss=0.0301]


Epoch 9 Valid Loss: 0.1163


Epoch 10/10: 100%|█████████████████| 600/600 [00:04<00:00, 127.35it/s, loss=0.0231]


Epoch 10 Valid Loss: 0.1238


In [8]:
# Save Model
SAVE_DIR = '../../models/ner/'
torch.save(model.state_dict(), os.path.join(SAVE_DIR, "best_ner_model.pt"))
print("Model saved as best_ner_model.pt")

Model saved as best_ner_model.pt
