## **1. Find the corresponding positive values for NER, POS, Chunk tags**

In [None]:
Ner_Tag = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']
Ner_Number = [i for i in range(len(Ner_Tag))]
Ner = list(zip(Ner_Tag,Ner_Number))
print(Ner)

[('O', 0), ('B-PER', 1), ('I-PER', 2), ('B-ORG', 3), ('I-ORG', 4), ('B-LOC', 5), ('I-LOC', 6), ('B-MISC', 7), ('I-MISC', 8)]


In [None]:
POS_Tag = ['"', "''", '#', '$', '(', ')', ',', '.', ':', '``', 'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNP', 'NNPS', 'NNS', 'NN|SYM', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'SYM', 'TO', 'UH', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB']
POS_Number = [i for i in range(len(POS_Tag))]
POS = list(zip(POS_Tag,POS_Number))
print(POS)

[('"', 0), ("''", 1), ('#', 2), ('$', 3), ('(', 4), (')', 5), (',', 6), ('.', 7), (':', 8), ('``', 9), ('CC', 10), ('CD', 11), ('DT', 12), ('EX', 13), ('FW', 14), ('IN', 15), ('JJ', 16), ('JJR', 17), ('JJS', 18), ('LS', 19), ('MD', 20), ('NN', 21), ('NNP', 22), ('NNPS', 23), ('NNS', 24), ('NN|SYM', 25), ('PDT', 26), ('POS', 27), ('PRP', 28), ('PRP$', 29), ('RB', 30), ('RBR', 31), ('RBS', 32), ('RP', 33), ('SYM', 34), ('TO', 35), ('UH', 36), ('VB', 37), ('VBD', 38), ('VBG', 39), ('VBN', 40), ('VBP', 41), ('VBZ', 42), ('WDT', 43), ('WP', 44), ('WP$', 45), ('WRB', 46)]


In [None]:
Chunk_Tag = ['O', 'B-ADJP', 'I-ADJP', 'B-ADVP', 'I-ADVP', 'B-CONJP', 'I-CONJP', 'B-INTJ', 'I-INTJ', 'B-LST', 'I-LST', 'B-NP', 'I-NP', 'B-PP', 'I-PP', 'B-PRT', 'I-PRT', 'B-SBAR', 'I-SBAR', 'B-UCP', 'I-UCP', 'B-VP', 'I-VP']
Chunk_Number = [i for i in range(len(Chunk_Tag))]
Chunk = list(zip(Chunk_Tag,Chunk_Number))
print(Chunk)

[('O', 0), ('B-ADJP', 1), ('I-ADJP', 2), ('B-ADVP', 3), ('I-ADVP', 4), ('B-CONJP', 5), ('I-CONJP', 6), ('B-INTJ', 7), ('I-INTJ', 8), ('B-LST', 9), ('I-LST', 10), ('B-NP', 11), ('I-NP', 12), ('B-PP', 13), ('I-PP', 14), ('B-PRT', 15), ('I-PRT', 16), ('B-SBAR', 17), ('I-SBAR', 18), ('B-UCP', 19), ('I-UCP', 20), ('B-VP', 21), ('I-VP', 22)]


# **2. Data Preprocessing for BERT Model (Apply Hugging Face Data)**

### (1) Hugging Face Dataset Conll2003 Exploration

In [None]:
!pip install datasets



In [None]:
from datasets import load_dataset
dataset = load_dataset('conll2003')

Reusing dataset conll2003 (/root/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/40e7cb6bcc374f7c349c83acd1e9352a4f09474eb691f64f364ee62eb65d0ca6)


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
print(dataset.keys())
data_train = dataset['train']
data_validation = dataset['validation']
data_test = dataset['test']
print(len(data_train),len(data_validation),len(data_test))
data_train

dict_keys(['train', 'validation', 'test'])
14041 3250 3453


Dataset({
    features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
    num_rows: 14041
})

In [None]:
print(len(data_train['ner_tags']))
print(type(data_train['ner_tags']))

14041
<class 'list'>


In [None]:
type(dataset)

datasets.dataset_dict.DatasetDict

In [None]:
dataset['train']['tokens'][0]

['EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'lamb', '.']

In [None]:
print(data_train['id'][0])
print(data_train['tokens'][0])
#print(data_train['pos_tags'][0])
#print(data_train['chunk_tags'][0])
print(data_train['ner_tags'][0])

0
['EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'lamb', '.']
[3, 0, 7, 0, 0, 0, 7, 0, 0]


### (2) Covert Data to BERT Input Style

In [None]:
!pip install transformers seqeval[gpu]

Collecting transformers
  Using cached transformers-4.13.0-py3-none-any.whl (3.3 MB)
Installing collected packages: transformers
Successfully installed transformers-4.13.0


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast, BertConfig, BertForTokenClassification

In [None]:
MAX_LEN = 128     
TRAIN_BATCH_SIZE = 4
TEST_BATCH_SIZE = 2
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
class Preprocess_Data(Dataset):
  def __init__(self, dataset, tokenizer, max_len, usage): #usage -> train, validation, test

        self.len = len(dataset[usage])
        self.data = dataset[usage]
        self.tokenizer = tokenizer
        self.max_len = max_len

  def __getitem__(self, index):


        sentence = self.data['tokens'][index]
        word_labels = self.data['ner_tags'][index]



        encoding = self.tokenizer(sentence,
                              is_split_into_words=True,
                              return_offsets_mapping=True,  
                              truncation=True, 
                              max_length=self.max_len)
        


        labels = word_labels

        encoded_labels = np.ones(len(encoding["offset_mapping"]), dtype=int) * -100
        

        i = 0
        for idx, mapping in enumerate(encoding["offset_mapping"]):
          if mapping[0] == 0 and mapping[1] != 0:
            # overwrite label
            encoded_labels[idx] = labels[i]
            i += 1


        item = {key: torch.as_tensor(val) for key, val in encoding.items()}
        item['labels'] = torch.as_tensor(encoded_labels)
        
        return item

  def __len__(self):
        return self.len

In [None]:
sentence = ["The", "European", "Commission", "said", "on", "Thursday", "it", "disagreed", "with", "German", "advice", "to", "consumers", "to", "shun", "British", "lamb", "until", "scientists", "determine", "whether", "mad", "cow", "disease", "can", "be", "transmitted", "to", "sheep", "."]
encoding = tokenizer(sentence,
                      is_split_into_words=True,
                      return_offsets_mapping=True,  #Set to True to return (char_start, char_end) for each token (default False)
                      padding='max_length', 
                      truncation=True, 
                      max_length=MAX_LEN)
encoding

{'input_ids': [101, 1996, 2647, 3222, 2056, 2006, 9432, 2009, 18335, 2007, 2446, 6040, 2000, 10390, 2000, 18454, 2078, 2329, 12559, 2127, 6529, 5646, 3251, 5506, 11190, 4295, 2064, 2022, 11860, 2000, 8351, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [None]:
print(encoding)

{'input_ids': [101, 1996, 2647, 3222, 2056, 2006, 9432, 2009, 18335, 2007, 2446, 6040, 2000, 10390, 2000, 18454, 2078, 2329, 12559, 2127, 6529, 5646, 3251, 5506, 11190, 4295, 2064, 2022, 11860, 2000, 8351, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [None]:
encoding.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'offset_mapping'])

In [None]:
training_set = Preprocess_Data(dataset, tokenizer, MAX_LEN, 'train')
validation_set = Preprocess_Data(dataset, tokenizer, MAX_LEN, 'validation')
testing_set = Preprocess_Data(dataset, tokenizer, MAX_LEN, 'test')
print(len(training_set),len(validation_set),len(testing_set))

14041 3250 3453


In [None]:
training_set[0]

{'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0]),
 'input_ids': tensor([  101,  7327, 19164,  2446,  2655,  2000, 17757,  2329, 12559,  1012,
           102,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     

In [None]:

for token, label in zip(tokenizer.convert_ids_to_tokens(training_set[0]["input_ids"]), training_set[0]["labels"]):
  print('{0:10}  {1}'.format(token, label))

[CLS]       -100
eu          3
rejects     0
german      7
call        0
to          0
boycott     0
british     7
lamb        0
.           0
[SEP]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]       -100
[PAD]  

In [None]:
# Define the Dataloader
training_loader = DataLoader(training_set, batch_size = TRAIN_BATCH_SIZE, shuffle=True,num_workers=0)
validation_loader = DataLoader(validation_set,batch_size = TRAIN_BATCH_SIZE, shuffle=True,num_workers=0)
testing_loader = DataLoader(testing_set,batch_size = TEST_BATCH_SIZE, shuffle=True,num_workers=0)

In [None]:
print(len(training_loader),len(validation_loader),len(testing_loader))

3511 813 1727


# **3. Define the Model**

### 1) Train the Model

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Mon Dec 13 06:10:39 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    23W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


In [None]:
# EPOCHS = 2
LEARNING_RATE = 1e-05
MAX_GRAD_NORM = 10

In [None]:
# Define the model by just BertForTokenClassification
model = BertForTokenClassification.from_pretrained('bert-base-uncased', num_labels=len(Ner_Tag))
#model.to(device)

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

In [None]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

NameError: ignored

In [None]:
def train(epoch):
    tr_loss, tr_accuracy = 0, 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()
    
    for idx, batch in enumerate(training_loader):

        # if idx >200:
        #   break
        
        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        labels = batch['labels'].to(device, dtype = torch.long)

        outputs = model(input_ids=ids, attention_mask=mask, labels=labels)
        loss = outputs[0]
        tr_logits = outputs[1]
        tr_loss += loss.item()

        nb_tr_steps += 1
        nb_tr_examples += labels.size(0)
        
        if idx % 100==0:
            loss_step = tr_loss/nb_tr_steps
            print(f"Training loss per 100 training steps: {loss_step}")

            if idx!=0:
              time_spent = time.time() - start_time

              print("--- %s seconds ---" % (time_spent))
            start_time = time.time() 
           
        # compute training accuracy
        flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
        active_logits = tr_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)

        # print('flattened_targets', flattened_targets)
        # print('active_logits, ', active_logits)
        # print('flattened_predictions, ', flattened_predictions)
        # print('Logits 0, ', active_logits[0])
        # # print("real labels",flattened_targets)
        # # print("real prediction",flattened_predictions)
        
        # only compute accuracy at active labels
        active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        #active_labels = torch.where(active_accuracy, labels.view(-1), torch.tensor(-100).type_as(labels))
        # print('active_accuracy ', active_accuracy)

        labels = torch.masked_select(flattened_targets, active_accuracy)
        # print('labels ', labels)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)
        # print('predictions ',predictions)

        tr_labels.extend(labels)
        tr_preds.extend(predictions)

        tmp_tr_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy
    

        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=MAX_GRAD_NORM
        )
        

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    print(f"Training loss epoch: {epoch_loss}")
    print(f"Training accuracy epoch: {tr_accuracy}")



    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
    
    with torch.no_grad():
        for idx, batch in enumerate(validation_loader):

            # if idx >200:
            #   break
            
            ids = batch['input_ids'].to(device, dtype = torch.long)
            mask = batch['attention_mask'].to(device, dtype = torch.long)
            labels = batch['labels'].to(device, dtype = torch.long)
            #print("real labels",labels)
            
            outputs= model(input_ids=ids, attention_mask=mask, labels=labels)
            loss = outputs[0]
            eval_logits = outputs[1]
            eval_loss += loss.item()

            nb_eval_steps += 1
            nb_eval_examples += labels.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            # compute evaluation accuracy
            flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
            active_logits = eval_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            
            # only compute accuracy at active labels
            active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)
        
            labels = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(labels)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy

    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    print(f"Validation Loss: {eval_loss}")
    print(f"Validation Accuracy: {eval_accuracy}")

In [None]:
len(training_loader),len(validation_loader),len(testing_loader)

(3511, 813, 1727)

In [None]:
import time

In [None]:
EPOCHS = 3

for epoch in range(EPOCHS):


    print(f"Training epoch: {epoch + 1}")
    train(epoch)
    labels, predictions = valid(model, testing_loader)

    labels_value = [[New_NerDict[i.item()] for i in labels]]
    pred_value = [[New_NerDict[i.item()] for i in predictions]]

    print(classification_report(labels_value, pred_value,digits=4))


Training epoch: 1
Training loss per 100 training steps: 2.2886404991149902
Training loss per 100 training steps: 0.8377493709915935
--- 132.19996309280396 seconds ---
Training loss per 100 training steps: 0.6041384562108647
--- 131.6453320980072 seconds ---
Training loss per 100 training steps: 0.4790204551033799
--- 133.02992749214172 seconds ---
Training loss per 100 training steps: 0.40123000084666394
--- 130.93287420272827 seconds ---
Training loss per 100 training steps: 0.3511638321752052
--- 131.74452304840088 seconds ---
Training loss per 100 training steps: 0.31391230177098634
--- 132.75093364715576 seconds ---
Training loss per 100 training steps: 0.28538044925862804
--- 132.82799410820007 seconds ---
Training loss per 100 training steps: 0.2634756712969067
--- 130.9553461074829 seconds ---
Training loss per 100 training steps: 0.24499407883234256
--- 132.77265453338623 seconds ---
Training loss per 100 training steps: 0.23024931896550507
--- 131.6167459487915 seconds ---
Tra

### 2) Evaluate the Model

In [None]:
def valid(model, testing_loader):
    # put model in evaluation mode
    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
    
    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):
            
            ids = batch['input_ids'].to(device, dtype = torch.long)
            mask = batch['attention_mask'].to(device, dtype = torch.long)
            labels = batch['labels'].to(device, dtype = torch.long)

        #             outputs = model(input_ids=ids, attention_mask=mask, labels=labels)
        # loss = outputs[0]
        # tr_logits = outputs[1]
        # tr_loss += loss.item()
            
            outputs= model(input_ids=ids, attention_mask=mask, labels=labels)
            loss = outputs[0]
            eval_logits = outputs[1]

            eval_loss += loss.item()

            nb_eval_steps += 1
            nb_eval_examples += labels.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            # compute evaluation accuracy
            flattened_targets = labels.view(-1)
            active_logits = eval_logits.view(-1, model.num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) 
            
            # only compute accuracy at active labels
            active_accuracy = labels.view(-1) != -100 
        
            labels = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(labels)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy



    
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    print(f"Validation Loss: {eval_loss}")
    print(f"Validation Accuracy: {eval_accuracy}")

    return eval_labels, eval_preds


In [None]:
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP/Final Project/Code/Model Ensemble/ensemble_uncased_Conll2003_HF (data)/model_weights_3.pth"))
model.to(device)
from seqeval.metrics import classification_report
New_NerDict = dict((v,k) for k,v in dict(Ner).items())
# New_NerDict

labels, predictions = valid(model, testing_loader)

New_NerDict = dict((v,k) for k,v in dict(Ner).items())
New_NerDict
labels_value = [[New_NerDict[i.item()] for i in labels]]
pred_value = [[New_NerDict[i.item()] for i in predictions]]

print(classification_report(labels_value, pred_value,digits=4))

Validation loss per 100 evaluation steps: 0.0027766586281359196
Validation loss per 100 evaluation steps: 0.15524056149866503
Validation loss per 100 evaluation steps: 0.14751979589306047
Validation loss per 100 evaluation steps: 0.1387258027119897
Validation loss per 100 evaluation steps: 0.1383085971629361
Validation loss per 100 evaluation steps: 0.12925377590327916
Validation loss per 100 evaluation steps: 0.12609747562778473
Validation loss per 100 evaluation steps: 0.13111245162518428
Validation loss per 100 evaluation steps: 0.1304963320599631
Validation loss per 100 evaluation steps: 0.1265003492870661
Validation loss per 100 evaluation steps: 0.12939825952942322
Validation loss per 100 evaluation steps: 0.1280090639924307
Validation loss per 100 evaluation steps: 0.1257178423247153
Validation loss per 100 evaluation steps: 0.12639208758910656
Validation loss per 100 evaluation steps: 0.1301280092606395
Validation loss per 100 evaluation steps: 0.12929564502079474
Validation lo

In [None]:
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP/Final Project/Code/Model Ensemble/ensemble_uncased_Conll2003_HF (data)/model_weights_2.pth"))
model.to(device)
from seqeval.metrics import classification_report
New_NerDict = dict((v,k) for k,v in dict(Ner).items())
# New_NerDict

labels, predictions = valid(model, testing_loader)

New_NerDict = dict((v,k) for k,v in dict(Ner).items())
New_NerDict
labels_value = [[New_NerDict[i.item()] for i in labels]]
pred_value = [[New_NerDict[i.item()] for i in predictions]]

print(classification_report(labels_value, pred_value,digits=4))

Validation loss per 100 evaluation steps: 0.017757471650838852
Validation loss per 100 evaluation steps: 0.08469126398109204
Validation loss per 100 evaluation steps: 0.07809033597149395
Validation loss per 100 evaluation steps: 0.0934187232423303
Validation loss per 100 evaluation steps: 0.10198358428577993
Validation loss per 100 evaluation steps: 0.11075056659021952
Validation loss per 100 evaluation steps: 0.10700300819012522
Validation loss per 100 evaluation steps: 0.10849449036317577
Validation loss per 100 evaluation steps: 0.10439855224685017
Validation loss per 100 evaluation steps: 0.1055319395327852
Validation loss per 100 evaluation steps: 0.11058808769564087
Validation loss per 100 evaluation steps: 0.11172829193431728
Validation loss per 100 evaluation steps: 0.11182309398852168
Validation loss per 100 evaluation steps: 0.11308905004093422
Validation loss per 100 evaluation steps: 0.11674190448808508
Validation loss per 100 evaluation steps: 0.12168450026227796
Validatio

In [None]:
len(testing_loader)

1727

In [None]:
model_1 = BertForTokenClassification.from_pretrained('bert-base-cased', num_labels=len(Ner_Tag))
model_1.load_state_dict(torch.load("/content/drive/MyDrive/NLP/Final Project/Code/Baseline/Baseline Saved Model/pytorch_model.bin"))
model_1.to(device)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

RuntimeError: ignored

In [None]:
pip install seqeval



In [None]:
from seqeval.metrics import classification_report
New_NerDict = dict((v,k) for k,v in dict(Ner).items())
New_NerDict

{0: 'O',
 1: 'B-PER',
 2: 'I-PER',
 3: 'B-ORG',
 4: 'I-ORG',
 5: 'B-LOC',
 6: 'I-LOC',
 7: 'B-MISC',
 8: 'I-MISC'}

In [None]:
labels, predictions = valid(model_1, testing_loader)

New_NerDict = dict((v,k) for k,v in dict(Ner).items())
New_NerDict
labels_value = [[New_NerDict[i.item()] for i in labels]]
pred_value = [[New_NerDict[i.item()] for i in predictions]]

print(classification_report(labels_value, pred_value,digits=4))

              precision    recall  f1-score   support

         LOC     0.9334    0.9166    0.9249      1666
        MISC     0.7435    0.7721    0.7575       702
         ORG     0.8414    0.9073    0.8731      1661
         PER     0.9547    0.9653    0.9600      1615

   micro avg     0.8870    0.9098    0.8983      5644
   macro avg     0.8682    0.8903    0.8789      5644
weighted avg     0.8888    0.9098    0.8989      5644



In [None]:
from datasets import load_metric
metric = load_metric("seqeval")

### 3) Save Model

In [None]:
import os

directory = "./model"

if not os.path.exists(directory):
    os.makedirs(directory)

# save vocabulary of the tokenizer
tokenizer.save_vocabulary(directory)
# save the model weights and its configuration file
model.save_pretrained(directory)
print('All files saved')

In [None]:
#torch.save(model, 'model.pth')

#torch.save(model.state_dict(), 'model_weights.pth')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
