# A quick lil notebook to show how sentiment analysis works using the transformers package

In [12]:
# importing our handy dandy package
from transformers import pipeline
import pandas as pd

In [13]:
# declaring our task and model
task = 'sentiment-analysis'
model_name = 'distilbert-base-uncased-finetuned-sst-2-english' # honestly, picked this model because its the default

In [14]:
# getting the model
classifier = pipeline(task, model=model_name)

In [15]:
# creating a few sentences for the model to score
text = [
    'this fast food is great',
    'this fast food is bad',
    'this fast food is warm',
    'this fast food is cold',
    'this beer is cold',
    'this beer is nice and cold',
    'the lines were long',
]

In [16]:
# now running text through our model and printing the results
responses = classifier(text)
responses

[{'label': 'POSITIVE', 'score': 0.9998577833175659},
 {'label': 'NEGATIVE', 'score': 0.9998078942298889},
 {'label': 'POSITIVE', 'score': 0.9998494386672974},
 {'label': 'NEGATIVE', 'score': 0.9992634654045105},
 {'label': 'NEGATIVE', 'score': 0.9995645880699158},
 {'label': 'POSITIVE', 'score': 0.9981977343559265},
 {'label': 'NEGATIVE', 'score': 0.9940511584281921}]

In [17]:
# throwing this in a pandas df to make it a little easier to see
df = pd.DataFrame(data=responses)
df

Unnamed: 0,label,score
0,POSITIVE,0.999858
1,NEGATIVE,0.999808
2,POSITIVE,0.999849
3,NEGATIVE,0.999263
4,NEGATIVE,0.999565
5,POSITIVE,0.998198
6,NEGATIVE,0.994051


In [18]:
# now adding our text sentences in
df['text'] = text
df

Unnamed: 0,label,score,text
0,POSITIVE,0.999858,this fast food is great
1,NEGATIVE,0.999808,this fast food is bad
2,POSITIVE,0.999849,this fast food is warm
3,NEGATIVE,0.999263,this fast food is cold
4,NEGATIVE,0.999565,this beer is cold
5,POSITIVE,0.998198,this beer is nice and cold
6,NEGATIVE,0.994051,the lines were long


In [19]:
# finally rearranging the results
df[['text', 'label', 'score']]

Unnamed: 0,text,label,score
0,this fast food is great,POSITIVE,0.999858
1,this fast food is bad,NEGATIVE,0.999808
2,this fast food is warm,POSITIVE,0.999849
3,this fast food is cold,NEGATIVE,0.999263
4,this beer is cold,NEGATIVE,0.999565
5,this beer is nice and cold,POSITIVE,0.998198
6,the lines were long,NEGATIVE,0.994051


In [20]:
# we can see that its not perfect as beer is more commonly drank cold
# and I noticed it gave weird predictions when I would submit a letter
# or number such as 'a' or 3