**Install required dependecies and extensions**

In [2]:
!pip install transformers
!pip install sklearn
!pip install PyDrive
%load_ext tensorboard
!pip install tensorboard

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/98/87/ef312eef26f5cecd8b17ae9654cdd8d1fae1eb6dbd87257d6d73c128a4d0/transformers-4.3.2-py3-none-any.whl (1.8MB)
[K     |████████████████████████████████| 1.8MB 14.4MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 55.5MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/71/23/2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97/tokenizers-0.10.1-cp37-cp37m-manylinux2010_x86_64.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 52.3MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp37-none-any.whl size=893262 sha256=b15abea26f

**Import required libs**

In [3]:
import os
import math
import logging
import numpy as np

from sklearn import metrics

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torch.nn.init import xavier_uniform_
from torch.optim import Adam
from torch.utils.data import Dataset

from torch.utils.tensorboard import SummaryWriter

from transformers import BertModel
from transformers import BertTokenizer
from transformers.models.bert.modeling_bert import BertPooler, BertSelfAttention

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

from google.colab import drive

**Define data utils**

In [4]:
def pad_and_truncate(sequence, maxlen, dtype='int64', padding='post', truncating='post', value=0):
    x = (np.ones(maxlen) * value).astype(dtype)
    if truncating == 'pre':
        trunc = sequence[-maxlen:]
    else:
        trunc = sequence[:maxlen]
    trunc = np.asarray(trunc, dtype=dtype)
    if padding == 'post':
        x[:len(trunc)] = trunc
    else:
        x[-len(trunc):] = trunc
    return x

class Tokenizer4Bert:
    def __init__(self, max_seq_len, pretrained_bert_name):
        self.tokenizer = BertTokenizer.from_pretrained(pretrained_bert_name)
        self.max_seq_len = max_seq_len

    def text_to_sequence(self, text, reverse=False, padding='post', truncating='post'):
        sequence = self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(text))
        if len(sequence) == 0:
            sequence = [0]
        if reverse:
            sequence = sequence[::-1]
        return pad_and_truncate(sequence, self.max_seq_len, padding=padding, truncating=truncating)


class SADataset(Dataset):
    def __init__(self, fname, tokenizer):
        with open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore') as fin:
            lines = fin.readlines()

        all_data = []
        for i in range(0, len(lines), 3):
            text_left, _, text_right = [s.lower().strip() for s in lines[i].partition("$T$")]
            aspect = lines[i + 1].lower().strip()
            polarity = lines[i + 2].strip()

            text_indexes = tokenizer.text_to_sequence(text_left + " " + aspect + " " + text_right)
            polarity = int(polarity) + 1

            text_len = np.sum(text_indexes != 0)
            aspect_len = np.sum(aspect != 0)
            concat_bert_indexes = tokenizer.text_to_sequence('[CLS] ' + text_left + " " + aspect + " " + text_right + ' [SEP] ' + aspect + " [SEP]")
            concat_segments_indexes = [0] * (text_len + 2) + [1] * (aspect_len + 1)
            concat_segments_indexes = pad_and_truncate(concat_segments_indexes, tokenizer.max_seq_len)

            text_bert_indexes = tokenizer.text_to_sequence("[CLS] " + text_left + " " + aspect + " " + text_right + " [SEP]")
            aspect_bert_indexes = tokenizer.text_to_sequence("[CLS] " + aspect + " [SEP]")

            data = {
                'polarity': polarity,
                'concat_bert_indexes': concat_bert_indexes,
                'concat_segments_indexes': concat_segments_indexes,
                'text_bert_indexes': text_bert_indexes,
                'aspect_bert_indexes': aspect_bert_indexes
            }

            all_data.append(data)
        self.data = all_data

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return len(self.data)


**Load train/test data**

ABSC twitter dataset 

In [5]:
drive.mount('/content/gdrive')

train_path = F"/content/gdrive/MyDrive/ML/sentiment/dataset/ABSA/train.raw" 
test_path = F"/content/gdrive/MyDrive/ML/sentiment/dataset/ABSA/test.raw" 

Mounted at /content/gdrive


**Bert models**

1. Bert based.
  This model uses bert pooler output (b_s, embedding_size) and pass it through dense layer to get prediction.
2. Local context foces BERT model. 
This model create calculate representation from global context and local context and concatenate results to get better result for aspect based sentiment classification. To get more info read original papper. https://www.researchgate.net/publication/335238076_LCF_A_Local_Context_Focus_Mechanism_for_Aspect-Based_Sentiment_Classification

In [6]:
class BertBased(nn.Module):
  def __init__(self, bert, opt):
    super(BertBased, self).__init__()
    self.bert = bert
    self.dropout = nn.Dropout(opt['dropout'])
    self.dense = nn.Linear(opt['bert_dim'], opt['polarities_dim'])
  
  def forward(self, inputs):
    text_bert_indexes, bert_segment_ids = inputs[0], inputs[1]
    bert_output = self.bert(text_bert_indexes, token_type_ids=bert_segment_ids)
    pooled_output = self.dropout(bert_output.pooler_output)
    out = self.dense(pooled_output)
    return out

In [7]:
class SelfAttention(nn.Module):
  def __init__(self, config, opt):
    super(SelfAttention, self).__init__()
    self.opt = opt
    self.config = config
    self.SA = BertSelfAttention(config)
    self.tanh = torch.nn.Tanh()

  def forward(self, inputs):
    zero_tensor = torch.tensor(np.zeros((inputs.size(0), 1, 1, self.opt['max_seq_len']),
                                            dtype=np.float32), dtype=torch.float32).to(self.opt['device'])
    SA_out = self.SA(inputs, zero_tensor)
    return self.tanh(SA_out[0])

# Local Context Focus
class LFC_BERT(nn.Module):
  def __init__(self, bert, opt):
    super(LFC_BERT, self).__init__()

    self.bert_spc = bert
    self.opt = opt

    self.bert_local = bert
    self.dropout = nn.Dropout(opt['dropout'])
    self.bert_SA = SelfAttention(bert.config, opt)
    self.linear_double = nn.Linear(opt['bert_dim'] * 2, opt['bert_dim'])
    self.linear_single = nn.Linear(opt['bert_dim'], opt['bert_dim'])
    self.bert_pooler = BertPooler(bert.config)
    self.dense = nn.Linear(opt['bert_dim'], opt['polarities_dim'])

  def feature_dymanic_mask(self, text_local_indexes, aspect_indexes):
    texts = text_local_indexes.cpu().numpy()
    asps = aspect_indexes.cpu().numpy()
    mask_len = self.opt['SRD']
    masked_text_raw_indexes = np.ones((text_local_indexes.size(0), self.opt['max_seq_len'], self.opt['bert_dim']), dtype=np.float32)

    for text_i, asp_i in zip(range(len(texts)), range(len(asps))):
      asp_len = np.count_nonzero(asps[asp_i]) - 2
      try:
        asp_begin = np.argwhere(texts[text_i] == asps[asp_i][1])[0][0]
      except:
        continue
      if asp_begin >= mask_len:
        mask_begin = asp_begin - mask_len
      else:
        mask_begin = 0
      for i in range(mask_begin):
        masked_text_raw_indexes[text_i][i] = np.zeros((self.opt['bert_dim']), dtype=np.float)
      for j in range(asp_begin + asp_len + mask_len, self.opt['max_seq_len']):
        masked_text_raw_indexes[text_i][j] = np.zeros((self.opt['bert_dim']), dtype=np.float)
    masked_text_raw_indexes = torch.from_numpy(masked_text_raw_indexes)
    return masked_text_raw_indexes.to(self.opt['device'])

  def forward(self, inputs):
    text_bert_indexes = inputs[0]
    bert_segments_ids = inputs[1]
    text_local_indexes = inputs[2]
    aspect_indexes = inputs[3]

    bert_spc_out = self.bert_spc(text_bert_indexes, token_type_ids=bert_segments_ids)
    bert_spc_out = self.dropout(bert_spc_out.last_hidden_state)

    bert_local_out = self.bert_local(text_local_indexes)
    bert_local_out = self.dropout(bert_local_out.last_hidden_state)


    masked_local_text_vec = self.feature_dymanic_mask(text_local_indexes, aspect_indexes)
    bert_local_out = torch.mul(bert_local_out, masked_local_text_vec)

    out_cat = torch.cat((bert_local_out, bert_spc_out), dim=-1)
    mean_pool = self.linear_double(out_cat)
    self_attention_out = self.bert_SA(mean_pool)
    pooled_out = self.bert_pooler(self_attention_out)
    dense_out = self.dense(pooled_out)

    return dense_out

**Create Train Task**

In [17]:
logging.basicConfig(level=logging.INFO)

class TrainTask:
    def __init__(self, opt):
        self.opt = opt
        self.summary_writer = SummaryWriter(comment=opt['model_name'])

        tokenizer = Tokenizer4Bert(opt['max_seq_len'], opt['pretrained_bert_name'])
        bert = BertModel.from_pretrained(opt['pretrained_bert_name'])
        self.model = opt['model_class'](bert, opt).to(opt['device'])

        self.trainset = SADataset(train_path, tokenizer)
        self.testset = SADataset(test_path, tokenizer)
        self.valset = self.testset

    def train(self, criterion, optimizer, train_data_loader, val_data_loader):
        max_val_acc = 0
        max_val_f1 = 0
        global_step = 0
        path = None
        for i_epoch in range(self.opt['num_epoch']):
            logging.info('>' * 100)
            logging.info('epoch: {}'.format(i_epoch))
            n_correct, n_total, loss_total = 0, 0, 0
            # switch model to training mode
            self.model.train()
            for i_batch, batch in enumerate(train_data_loader):
                global_step += 1
                # clear gradient accumulators
                optimizer.zero_grad()

                inputs = [batch[col].to(self.opt['device']) for col in self.opt['input_columns']]
                outputs = self.model(inputs)
                targets = batch['polarity'].to(self.opt['device'])

                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                n_correct += (torch.argmax(outputs, -1) == targets).sum().item()
                n_total += len(outputs)
                loss_total += loss.item() * len(outputs)
                if global_step % self.opt['log_step'] == 0:
                    train_acc = n_correct / n_total
                    train_loss = loss_total / n_total
                    logging.info('loss: {:.4f}, acc: {:.4f}'.format(train_loss, train_acc))

            train_acc, train_f1 = self.evaluate_acc_f1(train_data_loader)
            val_acc, val_f1 = self.evaluate_acc_f1(val_data_loader)

            self.summary_writer.add_scalar("Accuracy/train", train_acc, i_epoch + 1)
            self.summary_writer.add_scalar("F1/train", train_f1, i_epoch + 1)

            self.summary_writer.add_scalar("Accuracy/test", val_acc, i_epoch + 1)
            self.summary_writer.add_scalar("F1/test", val_f1, i_epoch + 1)

            logging.info('Accuracy: {}. F1: {}'.format(val_acc, val_f1))

            if val_acc > max_val_acc:
                max_val_acc = val_acc
                max_val_epoch = i_epoch
                model_name = self.opt['model_name']
                accuracy = round(val_acc, 4)
                path = F"/content/gdrive/MyDrive/ML/sentiment/models/{model_name}_{accuracy}"
                torch.save(self.model.state_dict(), path)
                logging.info('>> saved: {}'.format(path))
            if val_f1 > max_val_f1:
                max_val_f1 = val_f1

        return path

    def evaluate_acc_f1(self, data_loader):
        n_correct, n_total = 0, 0
        t_targets_all, t_outputs_all = None, None
        # switch model to evaluation mode
        self.model.eval()
        with torch.no_grad():
            for i_batch, t_batch in enumerate(data_loader):
                t_inputs = [t_batch[col].to(self.opt['device']) for col in self.opt['input_columns']]
                t_targets = t_batch['polarity'].to(self.opt['device'])
                t_outputs = self.model(t_inputs)

                n_correct += (torch.argmax(t_outputs, -1) == t_targets).sum().item()
                n_total += len(t_outputs)

                if t_targets_all is None:
                    t_targets_all = t_targets
                    t_outputs_all = t_outputs
                else:
                    t_targets_all = torch.cat((t_targets_all, t_targets), dim=0)
                    t_outputs_all = torch.cat((t_outputs_all, t_outputs), dim=0)

        acc = n_correct / n_total
        f1 = metrics.f1_score(t_targets_all.cpu(), torch.argmax(t_outputs_all, -1).cpu(), labels=[0, 1, 2], average='macro')
        return acc, f1

    def run(self):
        # Loss and Optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = Adam(self.model.parameters(), lr=0.00005)

        train_data_loader = DataLoader(dataset=self.trainset, batch_size=self.opt['batch_size'], shuffle=True)
        test_data_loader = DataLoader(dataset=self.testset, batch_size=self.opt['batch_size'], shuffle=False)
        val_data_loader = DataLoader(dataset=self.valset, batch_size=self.opt['batch_size'], shuffle=False)

        best_model_path = self.train(criterion, optimizer, train_data_loader, val_data_loader)
        self.model.load_state_dict(torch.load(best_model_path))

        test_acc, test_f1 = self.evaluate_acc_f1(test_data_loader)
        logging.info('>> test_acc: {:.4f}, test_f1: {:.4f}'.format(test_acc, test_f1))

**Start training task**

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu' 

torch.device(device)

models = {
  'bert': BertBased,
  'lcf_bert': LFC_BERT
}

input_columns = {
  'bert': ['concat_bert_indexes', 'concat_segments_indexes'],
  'lcf_bert': ['concat_bert_indexes', 'concat_segments_indexes', 'text_bert_indexes', 'aspect_bert_indexes']
}

models_to_evaluate = ['bert', 'lcf_bert']

for model in models_to_evaluate:
  opt = {
    'model_name': model,
    'model_class': models[model],
    'input_columns': input_columns[model],
    'hidden_dim': 300,
    'embed_dim': 100,
    'num_epoch': 10,
    'batch_size': 16,
    'log_step': 10,
    'polarities_dim': 3,
    'max_seq_len': 85,
    'pretrained_bert_name': 'bert-base-uncased',
    'dropout': 0.1,
    'bert_dim': 768,
    'device': device,
    'SRD': 3
  }

  train_task = TrainTask(opt)
  train_task.run()

INFO:root:>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
INFO:root:epoch: 0
INFO:root:loss: 1.1098, acc: 0.4125
INFO:root:loss: 1.0646, acc: 0.4781
INFO:root:loss: 1.0338, acc: 0.5125
INFO:root:loss: 1.0380, acc: 0.5141
INFO:root:loss: 1.0336, acc: 0.5175
INFO:root:loss: 1.0351, acc: 0.5146
INFO:root:loss: 1.0387, acc: 0.5116
INFO:root:loss: 1.0460, acc: 0.5000
INFO:root:loss: 1.0467, acc: 0.4993
INFO:root:loss: 1.0489, acc: 0.4963
INFO:root:loss: 1.0481, acc: 0.4977
INFO:root:loss: 1.0480, acc: 0.4969
INFO:root:loss: 1.0491, acc: 0.4962
INFO:root:loss: 1.0499, acc: 0.4938
INFO:root:loss: 1.0487, acc: 0.4963
INFO:root:loss: 1.0497, acc: 0.4957
INFO:root:loss: 1.0474, acc: 0.4985
INFO:root:loss: 1.0445, acc: 0.5017
INFO:root:loss: 1.0450, acc: 0.5020
INFO:root:loss: 1.0477, acc: 0.4984
INFO:root:loss: 1.0493, acc: 0.4967
INFO:root:loss: 1.0499, acc: 0.4955
INFO:root:loss: 1.0511, acc: 0.4935
INFO:root:loss: 1.0510, acc: 0.4932
INFO:r

In [15]:
%tensorboard --logdir runs

test.raw  train.raw
