## Notebook exploring existing state of the art models

We explored the scores of existing pre-trained model to set the preceding benchmark

The following models were tested
1. Textblob
2. Vader
3. Flair
4. Hugging Face Models

In [1]:
import os
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score

### Data loading

In [3]:
#@save
def read_imdb(data_dir, is_train):
    """Read the IMDb review dataset text sequences and labels."""
    ### YOUR CODE HERE

    data = []
    labels = []
    data_folder = 'train' if is_train else 'test'
    
    for label, label_folder in enumerate(['neg', 'pos']):
        # Retrieve full path
        full_path = os.path.join(data_dir, data_folder, label_folder)
        for text_file in os.listdir(full_path):
            # Read text
            with open(os.path.join(full_path, text_file), 'r', encoding='utf-8') as f:
                # Add text and label
                data.append(f.read())
                labels.append(label)
    
    ### END OF YOUR CODE
    return data, labels

data_dir = "../data/aclImdb"
test_data = read_imdb(data_dir, is_train=False)

### Textblob
There is both a NaiveBayers Analyzer and Pattern Analyzer

We will only be testing with PatternAnalyzer as the NaiveBayesAnalyzer is too time consuming (only relies on cpu)

In [6]:
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer,PatternAnalyzer

# Textblob Pattern Analyzer, returns (polarity=[-1,1], subjectivity=[0,1])
prediction_list = []

for sentence, y in tqdm(zip(test_data[0], test_data[1]), total=len(test_data[0])):
    result = TextBlob(sentence, analyzer=PatternAnalyzer()).sentiment
    prediction = result[0]
    if prediction >= 0:
        prediction_list.append(1) #positive
    else:
        prediction_list.append(0) #negative

print("Precision Score:", precision_score(test_data[1], prediction_list))
print("Recall Score:", recall_score(test_data[1], prediction_list))
print("Accuracy Score:", accuracy_score(test_data[1], prediction_list))
print("F1 Score:", f1_score(test_data[1], prediction_list))

100%|██████████| 25000/25000 [00:15<00:00, 1653.66it/s]

Precision Score: 0.626313288633124
Recall Score: 0.94904
Accuracy Score: 0.6914
F1 Score: 0.7546197640024173





### Vander

In [7]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

sentiment = SentimentIntensityAnalyzer()
prediction_list = []
# vaderSentiment Analyzer, returns ('neg'=score, 'neu':score, 'pos'=score, 'compound':score)
# compound is the valence score of neg, neu and pos and normalised to be within [-1,1] with -1 for negative, 1 for positive

for sentence, y in tqdm(zip(test_data[0], test_data[1]), total=len(test_data[0])):
    result = sentiment.polarity_scores(sentence)
    prediction = result['compound']
    if prediction >= 0:
        prediction_list.append(1) #positive
    else:
        prediction_list.append(0) #negative

print("Precision Score:", precision_score(test_data[1], prediction_list))
print("Recall Score:", recall_score(test_data[1], prediction_list))
print("Accuracy Score:", accuracy_score(test_data[1], prediction_list))
print("F1 Score:", f1_score(test_data[1], prediction_list))

100%|██████████| 25000/25000 [00:59<00:00, 421.95it/s]

Precision Score: 0.6479836773883821
Recall Score: 0.86384
Accuracy Score: 0.69728
F1 Score: 0.7405019887532575





### Flair

In [8]:
from flair.data import Sentence
from flair.nn import Classifier

sentiment = Classifier.load('sentiment')
prediction_list = []
# Flair Analyzer, returns (Sentence[4]: "sentence" → POSITIVE (score))

for sentence, y in tqdm(zip(test_data[0], test_data[1]), total=len(test_data[0])):
    emb_sentence = Sentence(sentence) #embed sentence
    sentiment.predict(emb_sentence)
    prediction = emb_sentence.labels[0].value
    if prediction == 'POSITIVE':
        prediction_list.append(1) #positive
    else:
        prediction_list.append(0) #negative

print("Precision Score:", precision_score(test_data[1], prediction_list))
print("Recall Score:", recall_score(test_data[1], prediction_list))
print("Accuracy Score:", accuracy_score(test_data[1], prediction_list))
print("F1 Score:", f1_score(test_data[1], prediction_list))

  attn_output = torch.nn.functional.scaled_dot_product_attention(
100%|██████████| 25000/25000 [03:12<00:00, 129.64it/s]


Precision Score: 0.9527124773960217
Recall Score: 0.84296
Accuracy Score: 0.90056
F1 Score: 0.8944821731748727


### HuggingFace DistilBert

In [9]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification

tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

prediction_list = []
# DistilBert, returns class where 0 is negative, 1 is positive

for sentence, y in tqdm(zip(test_data[0], test_data[1]), total=len(test_data[0])):
    emb_sentence = tokenizer(sentence,  padding="max_length", truncation=True, max_length=512, return_tensors="pt") #embed sentence, sentence length doesnt exceed 512 
    # Disable training
    with torch.no_grad():
        logits = model(**emb_sentence).logits
    # retrieve prediction
    predicted_class = logits.argmax().item()
    prediction_list.append(predicted_class)

print("Precision Score:", precision_score(test_data[1], prediction_list))
print("Recall Score:", recall_score(test_data[1], prediction_list))
print("Accuracy Score:", accuracy_score(test_data[1], prediction_list))
print("F1 Score:", f1_score(test_data[1], prediction_list))

100%|██████████| 25000/25000 [2:19:47<00:00,  2.98it/s]  

Precision Score: 0.9146010186757215
Recall Score: 0.86192
Accuracy Score: 0.89072
F1 Score: 0.8874794069192751





In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

tokenizer = AutoTokenizer.from_pretrained("Ibrahim-Alam/finetuning-xlnet-base-cased-on-imdb")
model = AutoModelForSequenceClassification.from_pretrained("Ibrahim-Alam/finetuning-xlnet-base-cased-on-imdb")

prediction_list = []
# Finetuned XLNet, returns class where 0 is negative, 1 is positive

for sentence, y in tqdm(zip(test_data[0], test_data[1]), total=len(test_data[0])):
    emb_sentence = tokenizer(sentence,  padding="max_length", truncation=True, max_length=128, return_tensors="pt") #embed sentence, sentence length doesnt exceed 512 
    # Disable training
    with torch.no_grad():
        logits = model(**emb_sentence).logits
    # retrieve prediction
    predicted_class = logits.argmax().item()
    prediction_list.append(predicted_class)

print("Precision Score:", precision_score(test_data[1], prediction_list))
print("Recall Score:", recall_score(test_data[1], prediction_list))
print("Accuracy Score:", accuracy_score(test_data[1], prediction_list))
print("F1 Score:", f1_score(test_data[1], prediction_list))

100%|██████████| 25000/25000 [1:13:55<00:00,  5.64it/s]

Precision Score: 0.8946113495469719
Recall Score: 0.90048
Accuracy Score: 0.8972
F1 Score: 0.8975360816521808



