In [1]:
%load_ext autoreload
%autoreload 2

# 3. Sentiment Analysis - Evaluation

## Setup

Firstly, set up the path to the (preprocessed) dataset

In [2]:
# Path to the preprocessed data
import os

fileDir = os.path.dirname(os.path.realpath('__file__'))
absFilePathToPreprocessedDataset = os.path.join(fileDir, '../Data/training.1600000.processed.noemoticon_preprocessed.csv')
pathToPreprocessedDataset = os.path.abspath(os.path.realpath(absFilePathToPreprocessedDataset))
print (pathToPreprocessedDataset)

c:\Users\nlp_workshop\Documents\PSIML-NLPWorkshop\Data\training.1600000.processed.noemoticon_preprocessed.csv


Choose the device to run the training on:

In [3]:
device = "cpu"

Set the learning rate parameter:

In [4]:
learningRate = 0.001

## Initialization

In [5]:
import torch.nn as nn
import torch.optim as optim
from Common.TwitterDataset import TwitterDataset
from Models.ModelPerceptron import SentimentClassifierPerceptron

# Step #1: Instantiate the dataset
# instantiate the dataset
dataset = TwitterDataset.load_dataset_and_make_vectorizer(pathToPreprocessedDataset)
# get the vectorizer
vectorizer = dataset.get_vectorizer()

# Step #2: Instantiate the model
# instantiate the model
model = SentimentClassifierPerceptron(num_features=len(vectorizer.text_vocabulary), output_dim=len(vectorizer.target_vocabulary))
# send model to appropriate device
model = model.to(device)

# Step #3: Instantiate the loss function
loss_func = nn.CrossEntropyLoss()

# Step #4: Instantiate the optimizer
optimizer = optim.Adam(model.parameters(), lr=learningRate)

## Training Loop

In [6]:
from Common.Trainer import Trainer

sentiment_analysis_trainer = Trainer(
    dataset=dataset,
    model=model,
    loss_func=loss_func,
    optimizer=optimizer
)

In [7]:
# setup the chosen number of epochs
num_epochs = 50
# setup the chosen batch size
batch_size = 64

report = sentiment_analysis_trainer.train(num_epochs=num_epochs, batch_size=batch_size, device=device)

## Evaluate the results

In [8]:
def evaluate(split):
    loss, accuracy = sentiment_analysis_trainer.evaluate(split=split, device=device, batch_size=batch_size)

    print("Loss: {:.3f}".format(loss))
    print("Accuracy: {:.3f}".format(accuracy))

#### Training Set

In [9]:
evaluate(split="train")

Loss: 0.464
Accuracy: 0.831


#### Validation Set

In [10]:
evaluate(split="validation")

Loss: 0.582
Accuracy: 0.688


#### Test Set

In [11]:
evaluate(split="test")

Loss: 0.636
Accuracy: 0.625


## Inference and classifying new data points

Let's do inference on the new data. This is another evaluation method to make qualitative judgement about whether the model is working.

In [12]:
import torch

def predict(text, model, vectorizer):
    """
    Predict the sentiment of the tweet

    Args:
        text (str): the text of the tweet
        model (SentimentClassifierPerceptron): the trained model
        vectorizer (TwitterVectorizer): the corresponding vectorizer
    Returns:
        sentiment of the tweet (int), probability of that prediction (float)
    """
    # vectorize the text of the tweet
    vectorized_text = vectorizer.vectorize(text)

    # make a tensor with expected size (1, )
    vectorized_text = torch.Tensor(vectorized_text).view(1, -1)

    # run the model on the vectorized text and apply softmax activation function on the outputs
    result = model(vectorized_text, apply_softmax=True)

    # find the best class as the one with the highest probability
    probability_values, indices = result.max(dim=1)

    # take only value of the indices tensor
    index = indices.item()

    # decode the predicted target index into the sentiment, using target vocabulary
    predicted_target = vectorizer.target_vocabulary.find_index(index)

    # take only value of the probability_values tensor 
    probability_value = probability_values.item()

    return predicted_target, probability_value

Let's try the model on some examples:

In [13]:
text = "This is a good day."

predict(text, model, vectorizer)

(1.0, 0.7102847695350647)

In [14]:
text = "I was very sad yesterday."

predict(text, model, vectorizer)

(0.0, 0.5928364396095276)

In [15]:
text = "This is a book."

predict(text, model, vectorizer)

(1.0, 0.5666758418083191)

### More detailed evaluation on the Test Set

In [16]:
from sklearn.metrics import classification_report, confusion_matrix

# run the model on the tweets from test set 
y_predicted = dataset.test_df.text.apply(lambda x: predict(text=x, model=model, vectorizer=vectorizer)[0])

# compare that with labels
print(classification_report(y_true=dataset.test_df.target, y_pred=y_predicted))

# plot confusion matrix
print("Consfusion matrix:")
print(confusion_matrix(y_true=dataset.test_df.target, y_pred=y_predicted))

              precision    recall  f1-score   support

         0.0       0.66      0.65      0.65        48
         1.0       0.68      0.69      0.69        52

    accuracy                           0.67       100
   macro avg       0.67      0.67      0.67       100
weighted avg       0.67      0.67      0.67       100

Consfusion matrix:
[[31 17]
 [16 36]]
