In [None]:
!pip install torchtext --upgrade
!pip install transformers
!pip install pytorch_lightning
!pip install xmltodict

Requirement already up-to-date: torchtext in /usr/local/lib/python3.7/dist-packages (0.9.1)
Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/d5/43/cfe4ee779bbd6a678ac6a97c5a5cdeb03c35f9eaebbb9720b036680f9a2d/transformers-4.6.1-py3-none-any.whl (2.2MB)
[K     |████████████████████████████████| 2.3MB 7.6MB/s 
[?25hCollecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |████████████████████████████████| 901kB 43.9MB/s 
Collecting huggingface-hub==0.0.8
  Downloading https://files.pythonhosted.org/packages/a1/88/7b1e45720ecf59c6c6737ff332f41c955963090a18e72acbcbeac6b25e86/huggingface_hub-0.0.8-py3-none-any.whl
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47f162e725dcac87c521c11/tokenizers-0.10.3-cp37-cp37m-manylinux_2_

In [None]:
!git clone https://github.com/siat-nlp/MAMS-for-ABSA.git

Cloning into 'MAMS-for-ABSA'...
remote: Enumerating objects: 91, done.[K
remote: Total 91 (delta 0), reused 0 (delta 0), pack-reused 91[K
Unpacking objects: 100% (91/91), done.


In [None]:
import os

data_folder = os.sep.join(["MAMS-for-ABSA","data","MAMS-ATSA","raw"])
training_file = [os.sep.join([data_folder, "train.xml"])]
dev_file = [os.sep.join([data_folder, "val.xml"])]
test_file = [os.sep.join([data_folder, "test.xml"])]


In [None]:
# here go all the imports
import torch
from torch import nn
from torch.utils.data import Dataset
from torchtext import data
from torchtext.vocab import Vectors

from transformers import BertTokenizer, BertModel


from pprint import pprint
from tqdm import tqdm
from torchtext.vocab import Vocab
from collections import Counter, defaultdict
import random
import numpy as np

from typing import *

import json
import re

import pytorch_lightning as pl
from torch.utils.data import DataLoader
import torch.optim as optim

from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint

from sklearn.metrics import f1_score
from sklearn.metrics import classification_report 

import xmltodict

Set up the seed and deterministic algorithms in order to have reproducible results

In [None]:
SEED = 96

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True 

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### Load Data

In [None]:
class MyDataset(Dataset):

    def __init__(self, 
                 input_files, 
                 device="cpu"):
        """
        Args:
            input_file (list of strings or paths): each element is a path to a dataset to be loaded.
            device (string): device where to put tensors (cpu or cuda).
        """
        # open files and load them to list of dictionaries
        self.input_files = input_files
        with open(input_files[0]) as fin:
            self.data = xmltodict.parse(fin.read())["sentences"]["sentence"]
        
        # associate lables to OH lists
        self.classification_labels = {
            "positive" : [1,0,0],
            "negative" : [0,1,0],
            "neutral" : [0,0,1]
        }

        self.device = device

        # count for each class the number of entries
        self.classes_number = {"negative":0, "positive":0, "neutral":0}

        self.__init_data__()


    def __init_data__(self):
        self.encoded_data = list()
        
        # for each phrase
        for i in range(len(self.data)):
            # get the text and lowerize it
            text = self.data[i]["text"].lower()
            
            hw_shape_targets = [ [ [int (aspect_term["@from"]), int (aspect_term["@to"])], aspect_term["@term"],  aspect_term["@polarity"] ]  for aspect_term in self.data[i]["aspectTerms"]["aspectTerm"]]


            # for each target
            for target in hw_shape_targets:
                
                # replace the whole target with a special token
                text_with_target = text[:target[0][0]] + " <target-term> " + text[target[0][1]:] 
                
                # check the label
                label = target[2] 
                assert label in ["negative", "positive", "conflict", "neutral"]
                
                # update the counter of the obtained class
                self.classes_number[label] += 1

                # associate the label to a OH list
                label = self.classification_labels[label]

                # pass the label to a tensor 
                label = torch.Tensor(label).float().to(self.device)
                self.encoded_data.append({"inputs":text_with_target, 
                                          "outputs":label})

    def get_classes_number(self):
        return self.classes_number

    def __len__(self):
        return len(self.encoded_data)

    def __getitem__(self, idx):
        return self.encoded_data[idx]
    

In [None]:
dataset = MyDataset(training_file)
classes_number = dataset.get_classes_number()

In [None]:
# build the weights for the loss, so that classes with a lower number of entries
# will have higher cost
total_entries = classes_number["positive"] + classes_number["negative"] + classes_number["neutral"]
weights = [(total_entries-classes_number["positive"])/total_entries, (total_entries-classes_number["negative"])/total_entries, (total_entries-classes_number["neutral"])/total_entries]

In [None]:
class MyModel(nn.Module):
    
    """
        hparams: dict with dropout and num_classes informations
        tokenizer: the tokenizer used to tokenize the sentences
        
    """

    def __init__(self, hparams, tokenizer):
        super(MyModel, self).__init__()

        # download the bert pretrained model        
        self.bert = BertModel.from_pretrained('bert-large-uncased', output_hidden_states=True)
        
        # resize the embeddings since I added a new token (<target-term>)
        self.bert.resize_token_embeddings(len(tokenizer))

        #initialize dropout and two FC layers
        self.dropout = nn.Dropout(hparams.dropout)

        # 1024 is the output size for bert large
        # 4 * 1024 since I am going to concatenate the last 4 layers
        self.lin1 = torch.nn.Linear(4*1024, hparams.num_classes) 

    def forward(self, input, indices):

        bert_out = self.bert(**input)

        # get the output of the last 4 layers relative to the <target-term> word_piece
        last = bert_out.hidden_states[-1][range(input["input_ids"].shape[0]), indices, :]
        last_but_one = bert_out.hidden_states[-2][range(input["input_ids"].shape[0]), indices, :]
        last_but_two = bert_out.hidden_states[-3][range(input["input_ids"].shape[0]), indices, :]
        last_but_three = bert_out.hidden_states[-4][range(input["input_ids"].shape[0]), indices, :]

        # stack the outputs
        bert_out = torch.hstack((last, last_but_one, last_but_two, last_but_three))
        last, last_but_one, last_but_two, last_but_three = None, None, None, None
        
        # dropout
        bert_out = self.dropout(bert_out)

        # FC
        out = self.lin1(bert_out)

        return out

        

#### Pytorch Lightning Module (Train, Dev and Test code)

In [None]:
class MyLightningModule(pl.LightningModule):

    
    def __init__(self, hparams, tokenizer, *args, **kwargs):
        super(MyLightningModule, self).__init__(*args, **kwargs)
    
        # save hparams
        self.save_hyperparameters(hparams)
    
        # initialize the loss function as a weighted CrossEntropyLoss for multiclass classification 
        self.loss_function = nn.CrossEntropyLoss(weight=self.hparams.weights)
        
        # initialize the model we want to train
        self.model = MyModel(self.hparams, tokenizer)

        self.logits = []
        self.labels_indices = []
        self.arg_maxs = []
    
    # This performs a forward pass of the model
    # returns the predicted logits
    """
        x is a dictionary containing the inputs_ids and the attention mask: the parameters of bert
        indices is a list containing the index of the target-term for each sentence
    """
    
    def forward(self, x, indices):
        logits = self.model(x, indices)

        return logits

    """
        batch -> dict containing 
                inputs: tokenized sentences (sequence of integers) padded
                        and attention mask
                outputs: correct labelling
                indices: position of the <target-term> wordpiece in each sentence
    """
    def training_step(self, batch, batch_nb):

        inputs = batch['inputs'] # bert input
        labels = batch['outputs'] # the true labels
        indices = batch['indices'] # position of the target-term word piece

        # get the predicted logits
        logits = self.forward(inputs, indices)
        
        # apply softmax in order to obtain a probability distribution over the labels
        preds = nn.Softmax(-1)(logits)
        
        # get the predicted label and the correct one
        arg_max = torch.argmax(preds, dim=-1)
        labels_indices = torch.argmax(labels, dim=-1)
      
        # compute the loss
        loss = self.loss_function(logits, labels_indices)
        
        self.log('train_loss', loss, prog_bar=True)
        
        return loss
    

    # validation step -> model in eval state 
    """
        batch -> dict containing 
                inputs: tokenized sentences (sequence of integers) padded
                outputs: correct labelling
                indices: position of the <target-term> word piece in each sentence
    """
    def validation_step(self, batch, batch_nb):
        inputs = batch['inputs']
        labels = batch['outputs']
        indices = batch['indices']
        

        # get the predicted logits
        logits = self.forward(inputs, indices)
        
        # apply softmax in order to obtain a probability distribution over the labels
        preds = nn.Softmax(-1)(logits)
        
        # get the predicted label and the correct one
        arg_max = torch.argmax(preds, dim=-1)
        labels_indices = torch.argmax(labels, dim=-1)
        
        # compute the loss and the f1 score
        valid_loss = self.loss_function(logits, labels_indices)
        sample_f1 = f1_score(labels_indices.detach().cpu(), arg_max.detach().cpu(), average="macro")
          
        self.log('valid_loss', valid_loss, prog_bar=True)
        self.log('valid_f1', sample_f1, prog_bar=True)


    # test step -> model in eval state
    """
        batch -> dict containing 
                inputs: tokenized sentences (sequence of integers) padded
                outputs: correct labelling
                indices: position of the <target-term> word piece in each sentence
    """
    def test_step(self, batch, batch_nb):
        inputs = batch['inputs']
        labels = batch['outputs']
        indices = batch['indices']

        # get the predicted logits
        logits = self.forward(inputs, indices)
       
        # apply softmax in order to obtain a probability distribution over the labels
        preds = nn.Softmax(-1)(logits)
        
        # get the predicted label and the correct one
        arg_max = torch.argmax(preds, dim=-1)
        labels_indices = torch.argmax(labels, dim=-1)

        self.logits += logits.detach().cpu().tolist()
        self.labels_indices += labels_indices.detach().cpu().tolist()
        self.arg_maxs += arg_max.detach().cpu().tolist()

        # compute the loss and the f1 score
        #test_loss = self.loss_function(self.logits, self.labels_indices)
        sample_f1 = f1_score(self.labels_indices, self.arg_maxs, average="macro")
        f1_per_class = f1_score(self.labels_indices, self.arg_maxs, average=None)


        #self.log('test_loss', test_loss, prog_bar=True)
        self.log('test_f1', sample_f1, prog_bar=True)
        self.log('f1 positive', f1_per_class[0], prog_bar=True)
        self.log('f1 negative', f1_per_class[1], prog_bar=True)
        self.log('f1 neutal', f1_per_class[2], prog_bar=True)

        print(classification_report(self.labels_indices, self.arg_maxs))

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.000_01)


In [None]:
# get the bert tokenizer and add a new special token (for the target term)
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased', additional_special_tokens=["<target-term>"])

# get the id of the target term
target_id = tokenizer("<target-term>", return_tensors="pt")["input_ids"][0][1]

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.





In [None]:
""" 
data is a list of dictionaries containing each 
       inputs : the sentences where the aspect terms has been replaced with the special token
       outputs: the true labels
"""
def collate_fn(data):
    # build a list of sentences
    X = [entry["inputs"] for entry in data]
    
    # tokenize each sentence and pad them 
    X = tokenizer(X, return_tensors="pt", padding=True)
    
    # stack the true labels
    y = torch.vstack([entry["outputs"] for entry in data]) 
    
    # get theindex for each sentence where the target term is
    index = [(X["input_ids"][i] == target_id).nonzero(as_tuple=True)[0] for i in range(X["input_ids"].shape[0])]

    return {"inputs":X, "outputs":y, "indices":index}

In [None]:
class MyLightningDataModule(pl.LightningDataModule):
    """
        training_file: list of the two training files paths
        dev_file: list of the two dev files paths
        collate_fn: the collete fun
    """
    def __init__(self, training_file, dev_file, test_file, collate_fn, device="cpu"):
        super().__init__()
        self.training_file = training_file
        self.dev_file = dev_file
        self.test_file = test_file
        self.collate_fn = collate_fn
        self.device = device

    def setup(self, stage=None):
      # initialize the two datasets
      self.trainingset = MyDataset(self.training_file, device=self.device)
      self.devset = MyDataset(self.dev_file, device=self.device)
      self.testset = MyDataset(self.test_file, device=self.device)


    def train_dataloader(self):
        return DataLoader(self.trainingset, batch_size=32, collate_fn = self.collate_fn, shuffle=True)
    
    def val_dataloader(self):
        return DataLoader(self.devset, batch_size=256, collate_fn = self.collate_fn)
    
    def test_dataloader(self):
        return DataLoader(self.testset, batch_size=len(self.devset)//5, collate_fn = self.collate_fn)
    

#### Define Hyperparams and start Training

In [None]:
# params of the model
hparams = { 
    'num_classes': 3, 
    'dropout': 0.3,
    'weights': torch.tensor(weights)
    }

In [None]:
# define early stopping and checkpoint callbacks
early_stop_callback = EarlyStopping(
   monitor="valid_loss",
   min_delta=0.00,
   patience=3,
   verbose=False,
   mode="min"
)


checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints",
    filename="best_model",
    monitor = "valid_loss",
    mode = "min"
)
# initialize the data module and the trainer
data_module = MyLightningDataModule(training_file, dev_file, test_file, collate_fn, device=device)
trainer = pl.Trainer(val_check_interval=1.0, max_epochs=20, gpus=1 if device!=torch.device('cpu') else None, callbacks=[early_stop_callback, checkpoint_callback])

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [None]:
# initialize the model and train it
model = MyLightningModule(hparams, tokenizer=tokenizer)
trainer.fit(model, datamodule=data_module)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=571.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1344997306.0, style=ProgressStyle(descr…




Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | loss_func

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




#### Model Evaluation

In [None]:
# test the overfitted model
trainer.test(model, test_dataloaders=data_module.test_dataloader())

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

              precision    recall  f1-score   support

           0       0.84      0.81      0.82        83
           1       0.74      0.80      0.77        65
           2       0.86      0.85      0.85       118

    accuracy                           0.82       266
   macro avg       0.81      0.82      0.82       266
weighted avg       0.83      0.82      0.82       266

              precision    recall  f1-score   support

           0       0.82      0.79      0.80       151
           1       0.79      0.78      0.78       144
           2       0.82      0.85      0.83       237

    accuracy                           0.81       532
   macro avg       0.81      0.80      0.81       532
weighted avg       0.81      0.81      0.81       532

              precision    recall  f1-score   support

           0       0.82      0.79      0.81       238
           1       0.77      0.80      0.79       194
           2       0.85      0.84      0.84       366

    accuracy        

[{'f1 negative': 0.7867175936698914,
  'f1 neutal': 0.8440611362457275,
  'f1 positive': 0.8152337074279785,
  'test_f1': 0.8153374195098877}]

In [None]:
# load the best model (the one with lower loss)
chk_path = "checkpoints/best_model.ckpt"
model = MyLightningModule.load_from_checkpoint(chk_path, hparams, tokenizer=tokenizer)

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
# test the best model
trainer.test(model, test_dataloaders=data_module.test_dataloader())

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

              precision    recall  f1-score   support

           0       0.83      0.82      0.82        83
           1       0.77      0.78      0.78        65
           2       0.83      0.83      0.83       118

    accuracy                           0.82       266
   macro avg       0.81      0.81      0.81       266
weighted avg       0.82      0.82      0.82       266

              precision    recall  f1-score   support

           0       0.80      0.81      0.81       151
           1       0.79      0.76      0.78       144
           2       0.80      0.81      0.81       237

    accuracy                           0.80       532
   macro avg       0.80      0.80      0.80       532
weighted avg       0.80      0.80      0.80       532

              precision    recall  f1-score   support

           0       0.80      0.83      0.81       238
           1       0.78      0.79      0.78       194
           2       0.84      0.81      0.82       366

    accuracy        

[{'f1 negative': 0.7735458612442017,
  'f1 neutal': 0.8203343749046326,
  'f1 positive': 0.816323459148407,
  'test_f1': 0.8034012317657471}]

Min Loss:
              
              precision    recall  f1-score   support

           0       0.80      0.83      0.82       400
           1       0.75      0.78      0.77       329
           2       0.84      0.80      0.82       607

        acc                            0.81      1336
        M avg      0.80      0.81      0.80      1336
        W avg      0.81      0.81      0.81      1336

Last Model:
              
              precision    recall  f1-score   support

           0       0.83      0.81      0.82       400
           1       0.76      0.82      0.79       329
           2       0.85      0.84      0.84       607

        acc                            0.82      1336
        M avg      0.82      0.82      0.82      1336
        W avg      0.83      0.82      0.82      1336

In [None]:
# load TensorBoard 
%load_ext tensorboard

In [None]:
%tensorboard --logdir lightning_logs/
