In [None]:
# Run this to download necessary libraries (If you have done this in the past in any notebook, you don't need to do this)
!pip install transformers[sentencepiece] # HG Transformer’s Library
!pip install datasets # HG Dataset’s Library
!pip install huggingface_hub # HG sharing Library
!pip install torch torchvision -U # Pytorch
# Remember to turn on T4 GPU Accelerator under 'Runtime'->'Change runtime type'

### **`Instructions: Run Every Block of Code in Order!!!`**

In [1]:
# This is the program we are trying to replicate
from transformers import pipeline
classifier = pipeline("sentiment-analysis")
output = classifier(
    [
        "I've been waiting for a HuggingFace course my whole life.",
        "I hate this so much!",
    ]
)
print(output)
# Output
#[{'label': 'POSITIVE', 'score': 0.9598047137260437},
# {'label': 'NEGATIVE', 'score': 0.9994558095932007}]


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

[{'label': 'POSITIVE', 'score': 0.9598048329353333}, {'label': 'NEGATIVE', 'score': 0.9994558691978455}]


## **`Step 1: Intialize Tokenizer and Tokenize the Input`**

In [2]:
# Step 1: Intialize Tokenizer and Tokenize the Input
from transformers import AutoTokenizer # Import AutoTokenizer
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english" # Get Model Name
tokenizer = AutoTokenizer.from_pretrained(checkpoint) # Define tokenizer using AutoTokenizer.from_pretrained()
raw_inputs = ["I've been waiting for a HuggingFace course my whole life.","I hate this so much!"] # Define Inputs
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt") # Tokenize the inputs using defined Tokenizer
print(inputs) # Output is a dictionary, where 'input_ids' are the tokens



tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}


## **`Step 2: Intialize the AutoModel* and feed tokens into the model`**

In [9]:
# Step 2: Intialize the AutoModel* and feed tokens from before into the model
from transformers import AutoModelForSequenceClassification # Import AutoModelForSequenceClassification
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english" # Get Model from before
model = AutoModelForSequenceClassification.from_pretrained(checkpoint) # Initialize Model
outputs = model(**inputs) # Feed the outputs from the Tokenizer, unpacking the dictionary using “**”
print(outputs) # The Outputs are Logits that need to be post-processed


SequenceClassifierOutput(loss=None, logits=tensor([[-1.5607,  1.6123],
        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)


In [None]:
# NOT A STEP, but example of AutoModel in Use (IGNORE THIS If YOU WOULD LIKE)
from transformers import AutoModel # Import AutoModel
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"  # Get Model Name
autoModel = AutoModel.from_pretrained(checkpoint) # Define model using AutoModel.from_pretrained()
autoModelOutputs = autoModel(**inputs) # ** means to unpack, as outputs is a dictionary
print(autoModelOutputs.last_hidden_state.shape)


### **`Step 3: Post-Processing the Logits to get outputs`**

In [2]:
# Step 3: Post-Processing the Logits to get outputs
import torch # Import Pytorch
# Use Pytorch’s softmax to convert logits to readable outputs
# dim = -1 means to apply it to the last dimension where the logits are located
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)
# The Predictions out of standard notation are [.0402, .9580], [.9995, .0054], where the first number of each set shows how negative something is, and the second shows how positive.
