Demo for aspect-based Sentiment Analysis shown at the jambit Meetup on 6th November 2019.

Based mainly on [Ben Trevett's PyTorch Sentiment Analysis](https://github.com/bentrevett/pytorch-sentiment-analysis). The training and testing data is a prepared csv version of the restaurant data from the SemEval 2014 task on Aspect-based Sentiment Analysis.

(c) Wiltrud Kessler


In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
# Import pytorch and torchtext libraries
from torchtext import data
import torch
import torch.optim as optim
import torch.nn as nn
import time

In [0]:
# Task settings

# The data we use has sentiment polarity annotations and aspect annotations.
# There are 4 possible sentiment labels, '1', '-1', '0', and 'conflict'.
# There are two types of aspect annotations in the data, 'category' (5 different aspect categories) and 'terms' (actual aspect words).
# Chose here which setting to run: 
# Demo 1: Polarity classification -> labeltype 'category', use_aspect_label False. Play around with polarities, if you like.
# Demo 2: Aspect category classification -> labeltype 'category', use_aspect_label True.
# Demo 3: Aspect term classification -> labeltype 'term', use_aspect_label True.
labeltype = 'category'
polarities = ['1', '-1', '0', 'conflict']
use_aspect_label = True

In [4]:
# Load the data from csv

ID = data.Field()
TEXT = data.Field()
ASPECT = data.Field()
POLARITY = data.Field()
LABEL = data.LabelField()

# Select the columns of the csv file that we want to use
# field ->   sent.id          text         ex.id          aspect           polarity
if use_aspect_label:
  fields = [(None, None), ('text', TEXT), (None, None), ('label', LABEL), (None, None)] # Use aspect as label
else:
  fields = [(None, None), ('text', TEXT), (None, None), (None, None), ('label', LABEL)] # Use polarity as label

# The data is already split into training/validation/test to load with the corresponding names
prefix = 'semeval2014_restaurants_' + labeltype + "_" + ".".join(polarities)
print(f'Loading data from {prefix}')
train_data, valid_data, test_data = data.TabularDataset.splits(
                                        path = '/content/drive/My Drive/semeval',
                                        train = prefix + '_train.csv',
                                        validation = prefix + '_val.csv',
                                        test = prefix + '_test.csv',
                                        format = 'csv',
                                        fields = fields,
                                        skip_header = True
)

# Check if we loaded the right data by the number of examples
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(valid_data)}')
print(f'Number of testing examples: {len(test_data)}')

# Print some examples
print(vars(train_data.examples[1]))
print(vars(valid_data.examples[0]))
print(vars(test_data.examples[0]))

Loading data from semeval2014_restaurants_category_1.-1.0.conflict
Number of training examples: 2971
Number of validation examples: 742
Number of testing examples: 114
{'text': ['the', 'food', 'itself', 'was', 'just', 'ok', 'nothing', 'spectacular', 'but', 'the', 'service', 'was', 'awful'], 'label': 'service'}
{'text': ['i', 'love', 'the', 'fact', 'that', 'the', 'pizza', 'tastes', 'so', 'good', 'and', 'is', 'so', 'cheap'], 'label': 'food'}
{'text': ['all', 'the', 'appetizers', 'and', 'salads', 'were', 'fabulous', 'the', 'steak', 'was', 'mouth', 'watering', 'and', 'the', 'pasta', 'was', 'delicious'], 'label': 'food'}


In [5]:
# Build the vocabulary

BATCH_SIZE = 64
MAX_VOCAB_SIZE = 25_000
MAX_LABEL_SIZE = 25_000 # take all

# You can play around and throw out labels that are very rare in the dat
#MAX_LABEL_SIZE = 60 # include aspect words that occur over 10 times (roughly)
#MAX_LABEL_SIZE = 30 # include aspect words that occur over 20 times (roughly)
#MAX_LABEL_SIZE = 10 # include aspect words that occur over 50 times (roughly)

# Build the vocabulary only over the training data (test data is unknown)
TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)
LABEL.build_vocab(train_data, max_size = MAX_LABEL_SIZE)

# Look at the numbers a bit
print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
print(f"Most frequent vocabulary words: {TEXT.vocab.freqs.most_common(20)}")

print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")
print(f"Classes distribution: {LABEL.vocab.freqs}")

if use_aspect_label:
  print(f"Most frequent class words: {LABEL.vocab.freqs.most_common(MAX_LABEL_SIZE)}")
  num = sum([x[1] for x in LABEL.vocab.freqs.most_common(MAX_LABEL_SIZE)])
  if MAX_LABEL_SIZE < len(LABEL.vocab):
    print(f"Words with real label: {num} Words with default label {len(train_data)-num}")

# Move computations to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# The networks needs to be able to iterate over the data later on,
# these iterators are defined here
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE,
    sort_key=lambda x: len(x.text),
    sort_within_batch=False,
    device = device)

Unique tokens in TEXT vocabulary: 4025
Most frequent vocabulary words: [('the', 2722), ('and', 1699), ('a', 1089), ('is', 931), ('to', 874), ('i', 873), ('was', 649), ('food', 619), ('of', 614), ('for', 593), ('it', 552), ('in', 519), ('you', 433), ('but', 383), ('we', 378), ('this', 361), ('service', 359), ('with', 348), ('great', 320), ('that', 305)]
Unique tokens in LABEL vocabulary: 5
Classes distribution: Counter({'food': 989, 'anecdotes miscellaneous': 890, 'service': 475, 'ambience': 362, 'price': 255})
Most frequent class words: [('food', 989), ('anecdotes miscellaneous', 890), ('service', 475), ('ambience', 362), ('price', 255)]


In [6]:
# Define the machine learning algorithm (Recurrent Neural Network)

INPUT_DIM = len(TEXT.vocab) # each word is an input dimension
EMBEDDING_DIM = 100 # this number falls from the sky and may be tuned ;)
HIDDEN_DIM = 256 # this number falls from the sky and may be tuned ;)
OUTPUT_DIM = len(LABEL.vocab) # each label is an output dimension

# The network itself
class RNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, text):
        embedded = self.embedding(text)
        output, hidden = self.rnn(embedded)
        assert torch.equal(output[-1,:,:], hidden.squeeze(0))
        return self.fc(hidden.squeeze(0))

# Just for debugging
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Get an instance of our model
model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
print(f'The model has {count_parameters(model):,} trainable parameters')

# Set other parameters for the network (optimizer, loss function)
optimizer = optim.SGD(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
model = model.to(device) # move to GPU, if we have one
criterion = criterion.to(device) # move to GPU, if we have one

# Define the metric for evaluation = accuracy
def categorical_accuracy(preds, y):
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    correct = max_preds.squeeze(1).eq(y) # check if it is correct
    return correct.sum() / torch.FloatTensor([y.shape[0]])

# Define the actual training
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    model.train()

    for batch in iterator:
        optimizer.zero_grad()
        predictions = model(batch.text)
        loss = criterion(predictions, batch.label)
        acc = categorical_accuracy(predictions, batch.label)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

# Define the actual evaluation
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0    
    model.eval()
    
    with torch.no_grad():    
        for batch in iterator:
            predictions = model(batch.text)
            loss = criterion(predictions, batch.label)
            acc = categorical_accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

The model has 495,433 trainable parameters


In [7]:
# Actually train the model

N_EPOCHS = 10 # Set to a higher value

# Debug function to show how much time one iteration takes
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

best_valid_loss = float('inf')

# Train the model a few times on the training data and evaluate it
# on the validation data. Save the best model.
for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'absa1.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 2s
	Train Loss: 1.549 | Train Acc: 29.38%
	 Val. Loss: 1.562 |  Val. Acc: 28.67%
Epoch: 02 | Epoch Time: 0m 2s
	Train Loss: 1.503 | Train Acc: 32.88%
	 Val. Loss: 1.541 |  Val. Acc: 29.32%
Epoch: 03 | Epoch Time: 0m 2s
	Train Loss: 1.490 | Train Acc: 33.15%
	 Val. Loss: 1.532 |  Val. Acc: 29.45%
Epoch: 04 | Epoch Time: 0m 2s
	Train Loss: 1.492 | Train Acc: 33.06%
	 Val. Loss: 1.529 |  Val. Acc: 29.71%
Epoch: 05 | Epoch Time: 0m 2s
	Train Loss: 1.490 | Train Acc: 33.01%
	 Val. Loss: 1.527 |  Val. Acc: 28.80%
Epoch: 06 | Epoch Time: 0m 2s
	Train Loss: 1.487 | Train Acc: 33.16%
	 Val. Loss: 1.526 |  Val. Acc: 29.06%
Epoch: 07 | Epoch Time: 0m 2s
	Train Loss: 1.490 | Train Acc: 33.02%
	 Val. Loss: 1.525 |  Val. Acc: 29.19%
Epoch: 08 | Epoch Time: 0m 2s
	Train Loss: 1.489 | Train Acc: 32.95%
	 Val. Loss: 1.525 |  Val. Acc: 28.67%
Epoch: 09 | Epoch Time: 0m 2s
	Train Loss: 1.488 | Train Acc: 33.16%
	 Val. Loss: 1.524 |  Val. Acc: 29.06%
Epoch: 10 | Epoch Time: 0m 2

In [8]:
# Evaluate the model on the test data

model.load_state_dict(torch.load('absa1.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Accuracy: {test_acc*100:.2f}%')

Test Accuracy: 36.97%


In [9]:
# Try out a few examples by hand

from nltk.tokenize import WordPunctTokenizer

def custom_tokenize(text):
    tokenizer = WordPunctTokenizer()
    tokens = tokenizer.tokenize(text)
    words = [word for word in tokens if word.isalnum()]
    return words

def predict_sentiment(model, sentence):
    model.eval() # Put model in eval mode
    tokenized = custom_tokenize(sentence)
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    length = [len(indexed)]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    predictions = torch.sigmoid(model(tensor))
    max_prediction = predictions.argmax(dim = 1)
    return predictions.tolist(), max_prediction.item()

# POLARITY examples
if not use_aspect_label:
  # Label here is human-readable, i.e. 0 is neutral, 1 is positive, -1 is negative
  sentences = [
      ("The food was delicious.", "1"),
      ("This is by far my favorite place in the neighborhood", "1"),
      ("The sushi was awful!", "-1"),
      ("Service was prompt, friendly and great.", "1"),
      ("The website and rating makes this place look wonderful but in reality it was very disappointing.", "conflict"),
      ("I know because I live nearby.", "0")
  ]

# ASPECT CATEGORY examples
if labeltype == 'category' and use_aspect_label:
   sentences = [
      ("The food was delicious.", "food"),
      ("This is by far my favorite place in the neighborhood", "anecdotes/miscellaneous"),
      ("The sushi was awful!", "food"),
      ("Service was prompt, friendly and great.", "service"),
      ("in the neighborhood it is well worth the price you pay for them.", "price")
   ]

# ASPECT TERM examples
if labeltype == 'term' and use_aspect_label:
   sentences = [
      ("The food was delicious.", "food"),
      ("The sushi was awful!", "sushi"),
      ("My pick for best pizza restaurant anywhere!", "pizza"),
      ("The atmosphere isn't the greatest , but I suppose that's how they keep the prices down .", "atmosphere"),
      ("The atmosphere isn't the greatest , but I suppose that's how they keep the prices down .", "prices"),
      ("the desert was good.", "desert")
   ]

for s in sentences:
   result = predict_sentiment(model, s[0])
   predicted_label = LABEL.vocab.itos[result[1]]
   print(f'Sentence: {s[0]}\n   {str(s[1]) == predicted_label}! - Gold label: {s[1]} Predicted label: {predicted_label} Probabilities: {result[0]}')

Sentence: The food was delicious.
   False! - Gold label: food Predicted label: anecdotes miscellaneous Probabilities: [[0.4999088943004608, 0.6480996012687683, 0.5152636170387268, 0.47140341997146606, 0.5139840841293335]]
Sentence: This is by far my favorite place in the neighborhood
   False! - Gold label: anecdotes/miscellaneous Predicted label: price Probabilities: [[0.40385672450065613, 0.5065933465957642, 0.5013625621795654, 0.35134658217430115, 0.5374458432197571]]
Sentence: The sushi was awful!
   False! - Gold label: food Predicted label: service Probabilities: [[0.4620286524295807, 0.5587091445922852, 0.5747478604316711, 0.5376664400100708, 0.465562641620636]]
Sentence: Service was prompt, friendly and great.
   False! - Gold label: service Predicted label: ambience Probabilities: [[0.4959174394607544, 0.5272541046142578, 0.4452691078186035, 0.5982636213302612, 0.575789749622345]]
Sentence: in the neighborhood it is well worth the price you pay for them.
   True! - Gold label