In [1]:
# Install dependencies
!pip uninstall -y tensorflow
!pip install transformers

Uninstalling tensorflow-2.2.0:
  Successfully uninstalled tensorflow-2.2.0
Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/27/3c/91ed8f5c4e7ef3227b4119200fc0ed4b4fd965b1f0172021c25701087825/transformers-3.0.2-py3-none-any.whl (769kB)
[K     |████████████████████████████████| 778kB 3.5MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 9.7MB/s 
[?25hCollecting sentencepiece!=0.1.92
[?25l  Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)
[K     |████████████████████████████████| 1.1MB 16.5MB/s 
Collecting tokenizers==0.8.1.rc1
[?25l  Downloading https://files.pythonhosted.org/packages/40/d0/30d5f8d221a0ed981a186c8eb986ce1c94e3a6e87f994e

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
import json
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import transformers
from tqdm import tqdm
from sklearn.metrics import f1_score, accuracy_score
from transformers import BertModel, BertTokenizer

import logging
logging.basicConfig(level=logging.ERROR)

import warnings
warnings.filterwarnings("ignore")

In [4]:
class SentimentClassifier(nn.Module):
  """
  This class defines the model architecture which is simply a fully-connected
  layer on top of a pre-trained BERT model. 
  """

  def __init__(self, BERT_MODEL):
    super(SentimentClassifier, self).__init__()
    self.bert = BertModel.from_pretrained(BERT_MODEL)
    self.drop = nn.Dropout(p=0.3)
    self.out = nn.Linear(self.bert.config.hidden_size, 3) # Number of output classes = 3

  def forward(self, ids, mask, token_type_ids):
    last_hidden_state, pooled_output = self.bert(ids, attention_mask=mask, token_type_ids=token_type_ids)
    output = self.drop(pooled_output)
    return self.out(output)

# Inference on BERT-pair NLI-M

In [None]:
class SentiHood:
  """
  This class tokenizes the input text using the pre-trained BERT tokenizer 
  (wordpiece) and returns the corresponding tensors.
  """
  
  def __init__(self, opinions_id, text, auxiliary_sentence, targets, tokenizer, max_len):
    self.opinions_id = opinions_id
    self.text = text
    self.auxiliary_sentence = auxiliary_sentence
    self.tokenizer = tokenizer
    self.max_len = max_len
    self.targets = targets

  def __len__(self):
    return len(self.targets)

  def __getitem__(self, item):
    opinions_id = self.opinions_id[item]
    text = str(self.text[item])
    auxiliary_sentence = str(self.auxiliary_sentence[item])
    targets = self.targets[item]

    text = text + ' ' + auxiliary_sentence

    inputs = self.tokenizer.encode_plus(
        text,
        add_special_tokens = True,
        max_length = self.max_len,
        pad_to_max_length = True
    )

    ids = inputs["input_ids"]
    mask = inputs["attention_mask"]
    token_type_ids = inputs["token_type_ids"]

    return {
        "ids": torch.tensor(ids, dtype=torch.long),
        "mask": torch.tensor(mask, dtype=torch.long),
        "token_type_ids": torch.tensor(token_type_ids, dtype=torch.long),
        "targets": torch.tensor(targets, dtype=torch.long),
        "opinions_id": torch.tensor(opinions_id, dtype=torch.long)
    }

In [None]:
def infer_loop_function(data_loader, model, device):
  """
  This function performs the inference on testing sets and stores the predicted
  values.
  """

  model.eval()

  df_pred = pd.DataFrame({"id": [], "predicted": [], "actual": []})

  ii = 0
  for bi, d in tqdm(enumerate(data_loader), total=len(data_loader), ncols=80):
    opinions_id = d["opinions_id"]
    ids = d["ids"]
    mask = d["mask"]
    token_type_ids = d["token_type_ids"]
    targets = d["targets"]

    opinions_id = opinions_id.to(device, dtype=torch.long)
    ids = ids.to(device, dtype=torch.long)
    mask = mask.to(device, dtype=torch.long)
    token_type_ids = token_type_ids.to(device, dtype=torch.long)
    targets = targets.to(device, dtype=torch.long)

    outputs = model(ids=ids, mask=mask, token_type_ids=token_type_ids)
    _, predicted = torch.max(outputs, 1)
    
    predicted = predicted.detach().cpu().numpy()
    targets = targets.detach().cpu().numpy()
    opinions_id = opinions_id.detach().cpu().numpy()

    for k in range(len(predicted)):
      df_pred.loc[ii] = [str(opinions_id[k]), str(predicted[k]), str(targets[k])]
      ii += 1

    df_pred.to_csv('/content/drive/My Drive/SentiHood/Bert-pair/NLI-M/PredictedValues.csv', index=False)

In [None]:
def run():
  """
  This function defines the necessary hyperparameters and models. It also 
  loads and tokenizes the testing dataset and execute the inference procedure.
  """

  TRAIN_MAX_LEN = 160
  TRAIN_BATCH_SIZE = 16
  BERT_MODEL = 'bert-base-uncased'

  testing_set_path = '/content/drive/My Drive/SentiHood/Bert-pair/NLI-M/Datasets/testing_set.csv'

  df_test = pd.read_csv(testing_set_path)
  sentiment_mapping = {
      'Positive': 0,
      'Negative': 1,
      'None': 2
  }
  df_test['sentiment'] = df_test['sentiment'].map(sentiment_mapping)
  df_test = df_test.reset_index(drop=True)

  tokenizer = transformers.BertTokenizer.from_pretrained(BERT_MODEL)

  test_dataset = SentiHood(
      opinions_id = df_test['id'].values,
      text = df_test['text'].values,
      auxiliary_sentence = df_test['auxiliary_sentence'],
      targets = df_test['sentiment'].values,
      tokenizer = tokenizer,
      max_len = TRAIN_MAX_LEN
  )
  print(f"Training Set: {len(test_dataset)}")

  test_data_loader = torch.utils.data.DataLoader(
      test_dataset,
      batch_size = TRAIN_BATCH_SIZE,
      shuffle=False
  )

  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print(f"Device: {device}")

  model = torch.load('/content/drive/My Drive/SentiHood/Bert-pair/NLI-M/Models/best.bin')
  infer_loop_function(data_loader=test_data_loader, model=model, device=device)
      
if __name__ == "__main__":
  run()

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…


Training Set: 22548
Device: cuda:0


  7%|██▉                                     | 105/1410 [00:19<04:04,  5.33it/s]

# Evaluation of BERT-pair NLI-M

In [None]:
def compute_sentiment_accuracy(df):
  """This function computes the sentiment classfication accuracy"""
  
  accuracy = df[df['predicted'] == df['actual']].shape[0]/df.shape[0] * 100
  return round(accuracy, 2)

df = pd.read_csv('/content/drive/My Drive/SentiHood/Bert-pair/NLI-M/PredictedValues.csv')
print(f'Sentiment Accuracy of BERT-pair QA-M = {compute_sentiment_accuracy(df)}%')

Sentiment Accuracy of BERT-pair QA-M = 97.28%


In [None]:
def compute_aspect_accuracy(df):
  """
  This function computes the strict aspect accuracy.
  0 => Represents that the aspect has not been detected.
  1 => Represents that the aspect has been detected.
  """
  
  df = df.replace([0, 1], 1).replace(2, 0)

  count = 0
  total = 0

  for i in range(0, df.shape[0], 12):
    true_values = df.iloc[i:i+12]['predicted']
    predicted_values = df.iloc[i:i+12]['actual']

    if (true_values == predicted_values).all():
      count += 1
    total += 1

  accuracy = float(count)/float(total)*100
  return round(accuracy, 2)

df = pd.read_csv('/content/drive/My Drive/SentiHood/Bert-pair/NLI-M/PredictedValues.csv')
print(f'Aspect Accuracy (strict) of BERT-pair NLI-M = {compute_aspect_accuracy(df)}%')

Aspect Accuracy (strict) of BERT-pair NLI-M = 74.77%


In [None]:
def compute_aspect_f1_score(df):
  """
  This function computest the macro F1 score of predicted aspects.
  0 => Represents that the aspect has not been detected.
  1 => Represents that the aspect has been detected.
  """
  
  df = df.replace([0, 1], 1).replace(2, 0)

  total_f1_score = 0
  total = 0
  
  for i in range(0, df.shape[0], 12):
    true_values = df.iloc[i:i+12]['predicted']
    predicted_values = df.iloc[i:i+12]['actual']

    total_f1_score += f1_score(true_values, predicted_values, average="macro")
    total += 1

  score = float(total_f1_score)/float(total)*100
  return round(score, 2)

df = pd.read_csv('/content/drive/My Drive/SentiHood/Bert-pair/NLI-M/PredictedValues.csv')
print(f"Aspect F1 score: {compute_aspect_f1_score(df)}")

Aspect F1 score: 90.86


# Prediction Result Analysis

This section analyses the predicted results to find the aspects and sentiments that are most and least accurate.

*Note*: Utilizing the fact that first 1491x12 entries in the loaded `df` are related to `location-1` and rest are related to `location-2`. 

In [5]:
df = pd.read_csv('/content/drive/My Drive/SentiHood/Bert-pair/NLI-M/PredictedValues.csv')

In [6]:
"""
Computes the positive_correct, positive_total, negative_correct, negative_total, 
none_correct, none_total corresponding to all the aspects of LOCATION1.
"""

aspects = ['dining', 'general', 'green-nature', 'live', 'multicultural', 'nightlife', 'price', 'quiet', 'safety', 'shopping', 'touristy', 'transit-location']
location1_aspects_result_analysis = {}

for i in range(12):
  location1_aspects_result_analysis[aspects[i]] = [[0 ,0], [0 ,0], [0 ,0]]

for i in tqdm(range(0, df['id'].unique().shape[0]*12-12, 12), ncols=80):
  for j in range(12):
    if df.loc[i+j]['actual'] == df.loc[i+j]['predicted']:
      location1_aspects_result_analysis[aspects[j]][int(df.loc[i+j]['actual'])][0] += 1
    
    location1_aspects_result_analysis[aspects[j]][int(df.loc[i+j]['actual'])][1] += 1

100%|███████████████████████████████████████| 1490/1490 [00:17<00:00, 82.91it/s]


In [8]:
df_location_aspect = pd.DataFrame({"location": [], "aspect": [], "positive_correct": [], "positive_total": [], "negative_correct": [], "negative_total": [], "none_correct": [], "none_total": [],})

ii = 0
for key in location1_aspects_result_analysis.keys():
  df_location_aspect.loc[ii] = ['LOCATION1', f"{key}", 
                                location1_aspects_result_analysis[key][0][0], 
                                location1_aspects_result_analysis[key][0][1], 
                                location1_aspects_result_analysis[key][1][0], 
                                location1_aspects_result_analysis[key][1][1], 
                                location1_aspects_result_analysis[key][2][0], 
                                location1_aspects_result_analysis[key][2][1]]
  ii += 1

In [9]:
"""
Computes the positive_correct, positive_total, negative_correct, negative_total, 
none_correct, none_total corresponding to all the aspects of LOCATION2.
"""

aspects = ['dining', 'general', 'green-nature', 'live', 'multicultural', 'nightlife', 'price', 'quiet', 'safety', 'shopping', 'touristy', 'transit-location']
location2_aspects_result_analysis = {}

for i in range(12):
  location2_aspects_result_analysis[aspects[i]] = [[0 ,0], [0 ,0], [0 ,0]]

for i in tqdm(range(df['id'].unique().shape[0]*12, df.shape[0]-12, 12), ncols=80):
  for j in range(12):
    if df.loc[i+j]['actual'] == df.loc[i+j]['predicted']:
      location2_aspects_result_analysis[aspects[j]][int(df.loc[i+j]['actual'])][0] += 1
    
    location2_aspects_result_analysis[aspects[j]][int(df.loc[i+j]['actual'])][1] += 1

100%|█████████████████████████████████████████| 387/387 [00:04<00:00, 85.23it/s]


In [10]:
for key in location2_aspects_result_analysis.keys():
  df_location_aspect.loc[ii] = ['LOCATION2', f"{key}", 
                                location2_aspects_result_analysis[key][0][0], 
                                location2_aspects_result_analysis[key][0][1], 
                                location2_aspects_result_analysis[key][1][0], 
                                location2_aspects_result_analysis[key][1][1], 
                                location2_aspects_result_analysis[key][2][0], 
                                location2_aspects_result_analysis[key][2][1]]
  ii += 1

In [11]:
"""
For every location-aspect pair, consider the percentage of only those sentiments
which have more than 10 datapoints in the entire testing set.
"""

df_location_aspect['positive_percentage'] = round(df_location_aspect['positive_correct']/df_location_aspect['positive_total']*100, 2)
df_location_aspect['negative_percentage'] = round(df_location_aspect['negative_correct']/df_location_aspect['negative_total']*100, 2)
df_location_aspect['none_percentage'] = round(df_location_aspect['none_correct']/df_location_aspect['none_total']*100, 2)

total_percentage = []
for i in range(df_location_aspect.shape[0]):
  total = 0
  value = 0

  positive_total = df_location_aspect.loc[i]['positive_total']
  negative_total = df_location_aspect.loc[i]['negative_total']
  none_total = df_location_aspect.loc[i]['none_total']

  if positive_total > 10:
    value += df_location_aspect.loc[i]['positive_percentage']
    total += 1
  else:
    df_location_aspect.loc[i, 'positive_percentage'] = 'NaN'

  if negative_total > 10:
    value += df_location_aspect.loc[i]['negative_percentage']
    total += 1
  else:
    df_location_aspect.loc[i, 'negative_percentage'] = 'NaN'
  
  if none_total > 10:
    value += df_location_aspect.loc[i]['none_percentage']
    total += 1
  else:
    df_location_aspect.loc[i, 'none_percentage'] = 'NaN'

  total_percentage.append(round(float(value)/total, 2))

df_location_aspect['total'] = total_percentage

In [12]:
df_location_aspect

Unnamed: 0,location,aspect,positive_correct,positive_total,negative_correct,negative_total,none_correct,none_total,positive_percentage,negative_percentage,none_percentage,total
0,LOCATION1,dining,29.0,30.0,0.0,2.0,1453.0,1458.0,96.67,,99.66,98.16
1,LOCATION1,general,296.0,359.0,80.0,113.0,946.0,1018.0,82.45,70.8,92.93,82.06
2,LOCATION1,green-nature,35.0,40.0,0.0,0.0,1443.0,1450.0,87.5,,99.52,93.51
3,LOCATION1,live,50.0,63.0,14.0,23.0,1375.0,1404.0,79.37,60.87,97.93,79.39
4,LOCATION1,multicultural,31.0,39.0,1.0,3.0,1444.0,1448.0,79.49,,99.72,89.6
5,LOCATION1,nightlife,57.0,62.0,1.0,2.0,1413.0,1426.0,91.94,,99.09,95.52
6,LOCATION1,price,69.0,81.0,105.0,116.0,1263.0,1293.0,85.19,90.52,97.68,91.13
7,LOCATION1,quiet,10.0,14.0,12.0,15.0,1458.0,1461.0,71.43,80.0,99.79,83.74
8,LOCATION1,safety,54.0,61.0,53.0,66.0,1350.0,1363.0,88.52,80.3,99.05,89.29
9,LOCATION1,shopping,58.0,62.0,1.0,1.0,1420.0,1427.0,93.55,,99.51,96.53


# Creating preds.jsonl

This section constructs the `preds.jsonl` file which contains model predictions and original annotations in the following json format.


```
{
  "opinions": [
    {
      "sentiment": "Positive",
      "aspect": "safety",
      "target_entity": "LOCATION1"
    }
  ],
  "id": 153,
  "text": " LOCATION1 is in Greater London and is a very safe place",
  "model_pred": [
    {
      "sentiment": ...,
      "aspect": ...,
      "target_entity":...
    },...
  ]
}
```

In [None]:
with open('/content/drive/My Drive/SentiHood/SentiHood Dataset/sentihood-test.json', 'r') as fp:
  testing_set = json.load(fp)

In [None]:
labels_to_sentiment_dict = {
    0: 'Positive',
    1: 'Negative',
    2: 'None'
}

In [None]:
BERT_MODEL = 'bert-base-uncased'
MAX_LEN = 160
locations = ['LOCATION1', 'LOCATION2']
aspects = ['dining', 'general', 'green-nature', 'live', 'multicultural', 'nightlife', 'price', 'quiet', 'safety', 'shopping', 'touristy', 'transit-location']

tokenizer = transformers.BertTokenizer.from_pretrained(BERT_MODEL)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

model = torch.load('/content/drive/My Drive/SentiHood/Bert-pair/NLI-M/Models/best.bin')

for each_example in tqdm(testing_set, ncols=80):
  id = each_example['id']
  text = each_example['text'].strip()

  each_example['model_pred'] = []

  count_location = 1
  for location in locations:
    if location in text:
      # If "location" is present in the text, then utilize the trained model
      # to predict the aspects and their corresponding sentiment of the text.

      text = text.replace(location, 'location - ' + str(count_location))
      
      for aspect in aspects:
        auxiliary_sentence = f'location - {str(count_location)} - {aspect}'
        combined_text = text + ' ' + auxiliary_sentence
        
        inputs = tokenizer.encode_plus(
            combined_text,
            add_special_tokens = True,
            max_length = MAX_LEN,
            pad_to_max_length = True
        )
        ids = torch.tensor(inputs["input_ids"], dtype=torch.long).unsqueeze(0)
        mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).unsqueeze(0)
        token_type_ids = torch.tensor(inputs["token_type_ids"], dtype=torch.long).unsqueeze(0)

        ids = ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        token_type_ids = token_type_ids.to(device, dtype=torch.long)

        outputs = model(ids=ids, mask=mask, token_type_ids=token_type_ids)
        _, predicted = torch.max(outputs, 1)

        predicted = predicted.detach().cpu().numpy()

         # If predicted sentiment is not None, then add it to the preds.jsonl.
         
        if predicted[0] != 2:
          result = {
              "sentiment": labels_to_sentiment_dict[predicted[0]],
              "aspect": aspect,
              "target_entity": location
          }
          each_example['model_pred'].append(result)
      
    count_location += 1

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…


Device: cuda:0


100%|███████████████████████████████████████| 1491/1491 [05:20<00:00,  4.66it/s]


In [None]:
with open('/content/drive/My Drive/SentiHood/Bert-pair/NLI-M/pred.jsonl', mode='w', encoding='utf-8') as fp:
  for each in testing_set:
    json_record = json.dumps(each, ensure_ascii=False)
    fp.write(json_record + '\n')