# Named Entity Recognition

The Assignment has been divided into the following 3 parts:
  1. English
  2. Hindi
  3. Bangla

With each model having the following steps in coarse and fine grain setting:
1. Dataset Preparation
2. Feature Extraction
3. Model Definition
4. Training the Model
6. Model Evaluation

Fine grain setting has 36 * 3(I,O,B) tags  
Coarse grain setting has 6 * 3(I,O,B) tags


In [None]:
%pip install pytorch-lightning -q -U
import torch
from sklearn.metrics import classification_report

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m716.4/716.4 KB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 KB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.6/264.6 KB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.2/114.2 KB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.8/158.8 KB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## English 
Download the folder from "https://drive.google.com/drive/folders/1BZ_usq8IOBKxszRn0snlp-BqVB2Skuo1?usp=sharing" 

In [None]:
#file locations in gdrive, kindly download the Multiconer folder from above link in the main gdrive page 
train_path = '/content/drive/MyDrive/KGP_Courses/Deep_Learning/Assignment_2/Dataset/EN-English/en_train.conll'
dev_path = '/content/drive/MyDrive/KGP_Courses/Deep_Learning/Assignment_2/Dataset/EN-English/en_dev.conll'
test_path = '/content/drive/MyDrive/KGP_Courses/Deep_Learning/Assignment_2/Dataset/EN-English/en_test.conll'


#### Tag mapping logic

In [None]:
#tag mapping to map fine to coarse grain setting
tag_dict = {
    'AnatomicalStructure':'Medical',
    'Disease':'Medical',
    'MedicalProcedure':'Medical',
    'Medication/Vaccine':'Medical',
    'Symptom':'Medical',
    'Clothing':'Product',
    'Drink':'Product',
    'Food':'Product',
    'OtherPROD':'Product',
    'Vehicle':'Product',
    'Artist':'Person',
    'Athlete':'Person',
    'Cleric':'Person',
    'OtherPER':'Person',
    'Politician':'Person',
    'Scientist':'Person',
    'SportsManager':'Person',
    'AerospaceManufacturer':'Group',
    'CarManufacturer':'Group',
    'MusicalGRP':'Group',
    'ORG':'Group',
    'OtherCORP':'Group',
    'PrivateCorp':'Group',
    'PublicCorp':'Group',
    'SportsGRP':'Group',
    'TechCORP':'Group',
    'ArtWork':'CreativeWorks',
    'MusicalWork':'CreativeWorks',
    'OtherCW':'CreativeWorks',
    'Software':'CreativeWorks',
    'VisualWork':'CreativeWorks',
    'WrittenWork':'CreativeWorks',
    'Facility':'Location',
    'HumanSettlement':'Location',
    'OtherLOC':'Location',
    'Station':'Location',
    }


### Parsing Function for both fine and grain setting

In [None]:
def dataset_parser(file, setting):
  pairs = []
  with open(file, 'r', encoding='utf-8') as f:
    lines = f.readlines()
    list_of_dict_pairs = []
    for l in lines:
      l = l.strip()
      if l.startswith('#'):
        continue  # Ignore Empty and comment lines
      if l=='':
        if(len(list_of_dict_pairs)>0):
          pairs.append(list_of_dict_pairs)
          list_of_dict_pairs = []
        continue
      parts = l.split(' _ _ ')
      word = parts[0]
      tag = parts[-1]
      if setting == 'C': #coarse setting
        if len(tag)>=2:
          if tag[0]=='B' and tag[1]=='-':
            tag = "B-" + tag_dict[tag[2:]]
          elif tag[0]=='I' and tag[1]=='-':
            tag = "I-" + tag_dict[tag[2:]]
          elif tag[0]=='O' and tag[1]=='-':
            tag = "O-" + tag_dict[tag[2:]]
      list_of_dict_pairs.append({'word': word, 'tag': tag})
    if(len(list_of_dict_pairs)>0):
      pairs.append(list_of_dict_pairs)
  return pairs

In [None]:
train_dataset_coarse = dataset_parser(train_path, 'C')
test_dataset_coarse = dataset_parser(test_path, 'C')
dev_dataset_coarse = dataset_parser(dev_path, 'C')
print(len(train_dataset_coarse))
print(len(test_dataset_coarse))
print(len(dev_dataset_coarse))
# print(train_dataset_coarse[0])
le = 0
for x in train_dataset_coarse:
    le += len(x)
print(le)
le = 0
for x in test_dataset_coarse:
    le += len(x)
print(le)
le = 0
for x in dev_dataset_coarse:
    le += len(x)
print(le)

16778
249980
871
253011
3773671
13323


In [None]:
SEQ_LEN = 33

In [None]:
def preprocessor(data, word_dict, tag_dict):
    # Extract sentences and tags
    words = [[pair['word'].lower() for pair in sentence] for sentence in data]
    tags = [[pair['tag'] for pair in sentence] for sentence in data]

    for i in range(len(words)):
        while len(words[i]) < SEQ_LEN:
            words[i].append('<PAD>')
            tags[i].append('<PAD>')

        if len(words[i]) > SEQ_LEN:
            words[i] = words[i][:SEQ_LEN]
            tags[i] = tags[i][:SEQ_LEN]
    for temp_tags in tags:
        for tag in temp_tags:
            if tag not in tag_dict:
                tag_dict[tag] = len(tag_dict)
    
    for temp_words in words:
        for word in temp_words:
            if word not in word_dict:
                word_dict[word] = len(word_dict)

    # Convert words and tags to indices
    X = torch.tensor([[word_dict.get(word, 1) for word in sentence] for sentence in words], dtype=torch.int).type(torch.LongTensor)
    Y = torch.tensor([[tag_dict[tag] for tag in sentence] for sentence in tags], dtype=torch.int).type(torch.LongTensor)
    
    return X, Y

In [None]:
word_dict_coarse = {"<PAD>": 0, "<UNK>": 1}
tag_dict_coarse = {"<PAD>": 0}

train_X_coarse, train_Y_coarse = preprocessor(train_dataset_coarse, word_dict_coarse, tag_dict_coarse)
dev_X_coarse, dev_Y_coarse = preprocessor(dev_dataset_coarse, word_dict_coarse, tag_dict_coarse)
test_X_coarse, test_Y_coarse = preprocessor(test_dataset_coarse, word_dict_coarse, tag_dict_coarse)

# Print the sizes of the datasets
print(f"Number of training examples: {len(train_X_coarse)}")
print(f"Number of validation examples: {len(dev_X_coarse)}")
print(f"Number of testing examples: {len(test_X_coarse)}")
print(f"Number of words in the vocabulary: {len(word_dict_coarse)}")
print(f"Number of tags in the vocabulary: {len(tag_dict_coarse)}")
print(train_X_coarse[0])

Number of training examples: 16778
Number of validation examples: 871
Number of testing examples: 249980
Number of words in the vocabulary: 242180
Number of tags in the vocabulary: 14
tensor([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])


In [None]:
train_dataset_fine = dataset_parser(train_path, 'F')
test_dataset_fine = dataset_parser(test_path, 'F')
dev_dataset_fine = dataset_parser(dev_path, 'F')


In [None]:
word_dict_fine = {"<PAD>": 0, "<UNK>": 1}
tag_dict_fine = {"<PAD>": 0}

train_X_fine, train_Y_fine = preprocessor(train_dataset_fine, word_dict_fine, tag_dict_fine)
dev_X_fine, dev_Y_fine = preprocessor(dev_dataset_fine, word_dict_fine, tag_dict_fine)
test_X_fine, test_Y_fine = preprocessor(test_dataset_fine, word_dict_fine, tag_dict_fine)

# Print the sizes of the datasets
print(f"Number of training examples: {len(train_X_fine)}")
print(f"Number of validation examples: {len(dev_X_fine)}")
print(f"Number of testing examples: {len(test_X_fine)}")
print(f"Number of words in the vocabulary: {len(word_dict_fine)}")
print(f"Number of tags in the vocabulary: {len(tag_dict_fine)}")
print(train_X_fine[0])

Number of training examples: 16778
Number of validation examples: 871
Number of testing examples: 249980
Number of words in the vocabulary: 242180
Number of tags in the vocabulary: 68
tensor([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])


#### Model Definition
Model is defined using PyTorch Lightning's LightningModule class, which allows to organize the training logic into separate methods, making the code easier to understand and maintain.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl

class NER(pl.LightningModule):
    def __init__(self, vocab_size, tagset_size, embedding_dim, hidden_dim, num_layers=1, bidirectional=False):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim) #B * seq_len, B * seq_len * embedding_dim
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, num_layers=num_layers, bidirectional=bidirectional)
        #B * seq_len * embedding_dim -> B * seq_len * hidden_dim 
        
        if bidirectional:
            self.fc = nn.Linear(2*hidden_dim, tagset_size)
        else:
            self.fc = nn.Linear(hidden_dim, tagset_size)
        self.loss_fn = nn.CrossEntropyLoss()
    
    def forward(self, x):
        embeds = self.embedding(x)
        #pass the input to the model and calculate softmax to know probabilities
        lstm_out, _ = self.lstm(embeds)
        tag_space = self.fc(lstm_out)
        tag_scores = nn.functional.log_softmax(tag_space, dim=2)
        return tag_scores
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.loss_fn(y_hat.view(-1, y_hat.shape[-1]), y.view(-1))
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.loss_fn(y_hat.view(-1, y_hat.shape[-1]), y.view(-1))
        self.log('val_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.loss_fn(y_hat.view(-1, y_hat.shape[-1]), y.view(-1))
        self.log('test_loss', loss)
        return loss
    
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters())
        return optimizer

The class defines constructor, forward propagation logic, validation logic, test logic and optimization configuration to define model.

 `pl.LightningModule` is inherited to get access to PyTorch Lightning's training loop.

#### Training the Model
 PyTorch Lightning's Trainer class is used to train our model to take care of setting up the training loop, optimizing the model, and handling GPU acceleration.

In [None]:
from torch.utils.data import DataLoader, TensorDataset
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

EMBEDDING_DIM = 100
HIDDEN_DIM    = 530
NUM_EPOCHS    = 13 
BATCH_SIZE    = 10

train_dataset_coarse = TensorDataset(train_X_coarse, train_Y_coarse)
train_loader_coarse = DataLoader(train_dataset_coarse, batch_size=BATCH_SIZE, shuffle=True)

val_dataset_coarse = TensorDataset(dev_X_coarse, dev_Y_coarse)
val_loader_coarse = DataLoader(val_dataset_coarse, batch_size=BATCH_SIZE)

test_dataset_coarse = TensorDataset(test_X_coarse, test_Y_coarse)
test_loader_coarse = DataLoader(test_dataset_coarse, batch_size=BATCH_SIZE)

train_dataset_fine = TensorDataset(train_X_fine, train_Y_fine)
train_loader_fine = DataLoader(train_dataset_fine, batch_size=BATCH_SIZE, shuffle=True)

val_dataset_fine = TensorDataset(dev_X_fine, dev_Y_fine)
val_loader_fine = DataLoader(val_dataset_fine, batch_size=BATCH_SIZE)

test_dataset_fine = TensorDataset(test_X_fine, test_Y_fine)
test_loader_fine = DataLoader(test_dataset_fine, batch_size=BATCH_SIZE)

#### Uncomment the below lines of code to train the coarse model

In [None]:
model = NER(vocab_size=len(word_dict_coarse), tagset_size=len(tag_dict_coarse), embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, bidirectional=True)
early_stopping = EarlyStopping(monitor="val_loss", patience=4, mode="min")
trainer = pl.Trainer(max_epochs=NUM_EPOCHS, callbacks=[early_stopping])
trainer.fit(model, train_dataloaders=train_loader_coarse, val_dataloaders=val_loader_coarse)
trainer.test(dataloaders=test_loader_coarse)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | embedding | Embedding        | 24.2 M
1 | lstm      | LSTM             | 2.7 M 
2 | fc        | Linear           | 14.9 K
3 | loss_fn   | CrossEntropyLoss | 0     
-----------------------------------------------
26.9 M    Trainable params
0         Non-trainable params
26.9 M    Total params
107.650   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/lightning_logs/version_0/checkpoints/epoch=6-step=11746.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/lightning_logs/version_0/checkpoints/epoch=6-step=11746.ckpt


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.33054500818252563}]

In [None]:
torch.save(model.state_dict(), 'drive/MyDrive/KGP_Courses/Deep_Learning/Assignment_2/Model_Parameters/EN-English/en_coarse.pt')

In [None]:
drive.mount('/content/drive')
path = '/content/drive/MyDrive/KGP_Courses/Deep_Learning/Assignment_2/Model_Parameters/EN-English/en_coarse.pt'
#download the en_coarse.pt from 

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
en_coarse = NER(vocab_size=len(word_dict_coarse), tagset_size=len(tag_dict_coarse), embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, bidirectional=True)
en_coarse.load_state_dict(torch.load(path))

<All keys matched successfully>

In [None]:
en_coarse

NER(
  (embedding): Embedding(242180, 100)
  (lstm): LSTM(100, 530, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=1060, out_features=14, bias=True)
  (loss_fn): CrossEntropyLoss()
)

In [None]:
def accuracy(tag_to_idx_coarse,test_dataset_coarse,model):
  # define idx_to_tag
  idx_to_tag = {idx: tag for tag, idx in tag_to_idx_coarse.items()}

  # define device
  device = torch.device('cuda')

  # Create a dataloader for the test set
  test_loader = DataLoader(test_dataset_coarse, batch_size=BATCH_SIZE)

  # Set the model to evaluation mode
  model.eval()

  y_true = []
  y_pred = []

  with torch.no_grad():
      for x, y in test_loader:
          # Move the data to the device
          x = x.to(device)
          y = y.to(device)
          model = model.to(device)

          # Forward pass
          y_hat = model(x)

          # Compute the predicted tags
          y_pred += [idx_to_tag[i] for i in y_hat.argmax(-1).cpu().numpy().flatten().tolist()]

          # Compute the true tags
          y_true += [idx_to_tag[i] for i in y.cpu().numpy().flatten().tolist()]

  print(classification_report(y_true, y_pred))

In [None]:
accuracy(tag_dict_coarse,test_dataset_coarse,en_coarse)

                 precision    recall  f1-score   support

          <PAD>       1.00      1.00      1.00   4476021
B-CreativeWorks       0.52      0.39      0.44     62126
        B-Group       0.47      0.46      0.46     60026
     B-Location       0.63      0.59      0.61     67899
      B-Medical       0.39      0.25      0.30     22491
       B-Person       0.80      0.67      0.73    137674
      B-Product       0.24      0.23      0.23     27579
I-CreativeWorks       0.66      0.47      0.55    107467
        I-Group       0.62      0.51      0.56     74142
     I-Location       0.71      0.65      0.68     63019
      I-Medical       0.49      0.30      0.37     10614
       I-Person       0.83      0.69      0.75    153769
      I-Product       0.27      0.20      0.23     17506
              O       0.92      0.97      0.94   2969007

       accuracy                           0.94   8249340
      macro avg       0.61      0.53      0.56   8249340
   weighted avg       0.94   

#### Uncomment the below lines of code to train the fine model

In [None]:
model = NER(vocab_size=len(word_dict_fine), tagset_size=len(tag_dict_fine), embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, bidirectional=True)
early_stopping = EarlyStopping(monitor="val_loss", patience=4, mode="min")
trainer = pl.Trainer(max_epochs=NUM_EPOCHS, callbacks=[early_stopping])
trainer.fit(model, train_dataloaders=train_loader_fine, val_dataloaders=val_loader_fine)

trainer.test(dataloaders=test_loader_fine)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | embedding | Embedding        | 24.2 M
1 | lstm      | LSTM             | 2.7 M 
2 | fc        | Linear           | 72.1 K
3 | loss_fn   | CrossEntropyLoss | 0     
-----------------------------------------------
27.0 M    Trainable params
0         Non-trainable params
27.0 M    Total params
107.879   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn(
INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at /content/lightning_logs/version_1/checkpoints/epoch=6-step=11746.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at /content/lightning_logs/version_1/checkpoints/epoch=6-step=11746.ckpt


Testing: 0it [00:00, ?it/s]

[{'test_loss': 0.42844489216804504}]

In [None]:
torch.save(model.state_dict(), 'drive/MyDrive/KGP_Courses/Deep_Learning/Assignment_2/Model_Parameters/EN-English/en_fine.pt')

In [None]:
path = '/content/drive/MyDrive/KGP_Courses/Deep_Learning/Assignment_2/Model_Parameters/EN-English/en_fine.pt'
#download the en_coarse.pt from 

en_fine = NER(vocab_size=len(word_dict_fine), tagset_size=len(tag_dict_fine), embedding_dim=EMBEDDING_DIM, hidden_dim=HIDDEN_DIM, bidirectional=True)
en_fine.load_state_dict(torch.load(path))

<All keys matched successfully>

In [None]:
en_fine

NER(
  (embedding): Embedding(242180, 100)
  (lstm): LSTM(100, 530, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=1060, out_features=68, bias=True)
  (loss_fn): CrossEntropyLoss()
)

In [None]:
accuracy(tag_dict_fine,test_dataset_fine,en_fine)

                         precision    recall  f1-score   support

                  <PAD>       1.00      1.00      1.00   4476021
B-AerospaceManufacturer       0.27      0.43      0.33      1015
  B-AnatomicalStructure       0.34      0.26      0.29      5838
              B-ArtWork       0.17      0.22      0.19      1270
               B-Artist       0.53      0.58      0.55     57034
              B-Athlete       0.49      0.49      0.49     27629
      B-CarManufacturer       0.31      0.28      0.29      2984
               B-Cleric       0.32      0.23      0.27      4732
             B-Clothing       0.23      0.09      0.13      2244
              B-Disease       0.46      0.21      0.29      5623
                B-Drink       0.24      0.19      0.21      2246
             B-Facility       0.35      0.41      0.38     16184
                 B-Food       0.14      0.11      0.13      5317
      B-HumanSettlement       0.74      0.53      0.62     41102
     B-MedicalProcedure 

#### Use the pretrained model

In [None]:
def predict(tag_to_idx_coarse,word_to_idx_coarse,line,model):
  # define idx_to_tag
  idx_to_tag = {idx: tag for tag, idx in tag_to_idx_coarse.items()}

  # define device
  device = torch.device('cuda')

  # Set the model to evaluation mode
  model.eval()

  y_pred = []
  x = []

  sent = line.split()
  if(len(sent)>30):
    i=0  
    for word in sent:
      if word in word_to_idx_coarse:
        x.append(word_to_idx_coarse[word])
      else:
        x.append(1)
      if i==30:
         break
      i=i+1
  else:
    i=0  
    for word in sent:
      if word in word_to_idx_coarse:
        x.append(word_to_idx_coarse[word])
      else:
        x.append(1)
    while i != 30-len(sent):
      x.append(0)
      i=i+1

  x = torch.tensor([x])
  with torch.no_grad():
      # Move the data to the device
      x = x.to(device)
      model = model.to(device)
      # Forward pass
      y_hat = model(x)

      # Compute the predicted tags
      y_pred += [idx_to_tag[i] for i in y_hat.argmax(-1).cpu().numpy().flatten().tolist()]
      # print(y_pred)
  return y_pred   

In [None]:
y_pred = predict(tag_dict_coarse,word_dict_coarse, "robert gottschalk 1939 academy award winner and founder of pranavision ",en_coarse)
y_pred

['B-Person',
 'I-Person',
 'O',
 'B-CreativeWorks',
 'I-CreativeWorks',
 'O',
 'O',
 'O',
 'O',
 'B-Group',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>']

In [None]:
y_pred = predict(tag_dict_fine,word_dict_fine, "robert gottschalk 1939 academy award winner and founder of pranavision ",en_fine)
y_pred

['B-OtherPER',
 'I-OtherPER',
 'O',
 'B-VisualWork',
 'I-VisualWork',
 'O',
 'O',
 'O',
 'O',
 'B-ORG',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>',
 '<PAD>']