### Importing Libraries

In [7]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
#nltk.downloader.download('vader_lexicon')
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from scipy.special import softmax
import pandas as pd

### Setting up the two approaches 

#### VADER

In [13]:
sia = SentimentIntensityAnalyzer()

def do_analysis_vader(text):
    return sia.polarity_scores(text)

#### Encoder Only Transformer Model

In [5]:
MODEL_NAME = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

def do_analysis_transformer(text):
    tokens = tokenizer(text, return_tensors='pt')
    output = model(**tokens)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    return scores

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### Method to Compare Approaches

In [25]:
def compare_approaches(text):
    vader_results = do_analysis_vader(text)
    transformer_result = do_analysis_transformer(text)

    results = {"Category" : ["Negative", "Neutral", "Positive"],
               "VADER" : [vader_results["neg"], vader_results["neu"], vader_results["pos"]],
               "Transformer" : [transformer_result[0], transformer_result[1], transformer_result[2]]               
               }
    results_df = pd.DataFrame(results)

    print("Text : ", text)
    print("\nResults\n", results_df)

### Comparison

#### Comparison using regular text messages

In [26]:
compare_approaches("I fell in the mud today")

Text :  I fell in the mud today

Results
    Category  VADER  Transformer
0  Negative    0.0     0.780168
1   Neutral    1.0     0.199560
2  Positive    0.0     0.020272


In [27]:
compare_approaches("I hate my life")

Text :  I hate my life

Results
    Category  VADER  Transformer
0  Negative  0.649     0.854381
1   Neutral  0.351     0.114677
2  Positive  0.000     0.030941


In [28]:
compare_approaches("Oh Great! I lost")

Text :  Oh Great! I lost

Results
    Category  VADER  Transformer
0  Negative  0.299     0.844328
1   Neutral  0.130     0.118721
2  Positive  0.571     0.036952


In [31]:
compare_approaches("You're really not that funny")

Text :  You're really not that funny

Results
    Category  VADER  Transformer
0  Negative    0.4     0.889141
1   Neutral    0.6     0.096598
2  Positive    0.0     0.014261


In [32]:
compare_approaches(":)")

Text :  :)

Results
    Category  VADER  Transformer
0  Negative    0.0     0.011871
1   Neutral    0.0     0.046397
2  Positive    1.0     0.941732


In [33]:
compare_approaches(":(")

Text :  :(

Results
    Category  VADER  Transformer
0  Negative    1.0     0.654244
1   Neutral    0.0     0.266964
2  Positive    0.0     0.078792


In [34]:
compare_approaches(":D")

Text :  :D

Results
    Category  VADER  Transformer
0  Negative    0.0     0.010884
1   Neutral    0.0     0.051635
2  Positive    1.0     0.937480


In [35]:
compare_approaches("I hate myself :)")

Text :  I hate myself :)

Results
    Category  VADER  Transformer
0  Negative  0.481     0.891915
1   Neutral  0.130     0.087239
2  Positive  0.390     0.020846


#### Comparison using data set of reviews