In [4]:
# import Autotokenizer for converting text to tokens
# import AutoModelForSequenceClassification to download head of sequence classification (seeing if a sentence is positve or negative)
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# load model name we will use into checkpoint variable
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

# load Tokenizer and AutoModelForSequenceClassification and use the above model and save it to the 'tokenizer' & 'model' variables
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

In [16]:
# The raw input we will load into the model
raw_inputs = [
    "Potato wedges probably are not best for relationships.",
    "No matter how beautiful the sunset, it saddened her knowing she was one day older.",
    "Everything was going so well until I was accosted by a purple giraffe.",
    "They say people remember important moments in their life well, yet no one even remembers their own birth."
]

In [17]:
# Now we pass the inputs into the tokenizer so the text will be in a format our model can read

# we'll save it in the 'inputs' variable
inputs = tokenizer(raw_inputs, padding=True, return_tensors='pt')
inputs

{'input_ids': tensor([[  101, 14557, 17632,  2015,  2763,  2024,  2025,  2190,  2005,  6550,
          1012,   102,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0],
        [  101,  2053,  3043,  2129,  3376,  1996, 10434,  1010,  2009,  6517,
         24589,  2014,  4209,  2016,  2001,  2028,  2154,  3080,  1012,   102,
             0,     0],
        [  101,  2673,  2001,  2183,  2061,  2092,  2127,  1045,  2001, 16222,
         14122,  2098,  2011,  1037,  6379, 21025, 27528,  7959,  1012,   102,
             0,     0],
        [  101,  2027,  2360,  2111,  3342,  2590,  5312,  1999,  2037,  2166,
          2092,  1010,  2664,  2053,  2028,  2130, 17749,  2037,  2219,  4182,
          1012,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
        [1

In [18]:
# Now we can check the outputs the model will give by passing the above output the tokenizer gave us into the model

# we basically pass what was printed above into the model
outputs = model(**inputs)

# We pass logits as that is what our model outputs without the softmax layer.
# Bascially this isn't the final output
print(outputs.logits)

tensor([[ 4.3587, -3.5323],
        [ 0.9353, -0.7998],
        [-0.7023,  0.8136],
        [ 3.2423, -2.6515]], grad_fn=<AddmmBackward0>)


In [19]:
# This is the final output
import torch

predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
predictions

tensor([[9.9963e-01, 3.7393e-04],
        [8.5006e-01, 1.4994e-01],
        [1.8006e-01, 8.1994e-01],
        [9.9725e-01, 2.7489e-03]], grad_fn=<SoftmaxBackward0>)

In [23]:
# All the above steps can be achieved with the pipeline class
from transformers import pipeline

easy_model = pipeline("sentiment-analysis", model = checkpoint)

easy_model(raw_inputs)

[{'label': 'NEGATIVE', 'score': 0.9996260404586792},
 {'label': 'NEGATIVE', 'score': 0.8500564694404602},
 {'label': 'POSITIVE', 'score': 0.8199344277381897},
 {'label': 'NEGATIVE', 'score': 0.9972510933876038}]