## **BiLSTM (Bidirectional LSTM) + CRF**

BiLSTM captures bidirectional context in sequences, while CRF ensures the predictions adhere to valid label sequences.

**Imports**

In [1]:
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModel




**BiLSTM MODEL**

In [2]:
class BiLSTM_CRF(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.crf = nn.Linear(output_dim, output_dim)  # Simplified CRF logic

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        emissions = self.fc(lstm_out)
        return emissions  # Use CRF decode logic here

**Simplified Example**

In [3]:
model = BiLSTM_CRF(input_dim=768, hidden_dim=128, output_dim=10)
dummy_input = torch.rand(1, 5, 768)  # (batch_size, seq_len, input_dim)
outputs = model(dummy_input)
print(outputs)

tensor([[[-0.0333, -0.2842,  0.0870,  0.1059, -0.0011,  0.1217,  0.2354,
           0.0664, -0.0202,  0.1199],
         [-0.0543, -0.3156, -0.0060,  0.0705,  0.0257,  0.1533,  0.3117,
           0.1667,  0.0335,  0.0938],
         [ 0.0477, -0.3543,  0.0035, -0.0046, -0.0024,  0.1611,  0.2928,
           0.0502, -0.0333,  0.1238],
         [ 0.0564, -0.2324, -0.0074, -0.0454, -0.0717,  0.1942,  0.2631,
           0.0911, -0.0428,  0.1295],
         [ 0.0419, -0.1573, -0.0115, -0.1022, -0.1245,  0.1408,  0.2492,
           0.1591, -0.0429,  0.0188]]], grad_fn=<ViewBackward0>)
