In [1]:
# Pipeline helps me to call a pretrained model
from transformers import pipeline
# this classifier has pre-trained model on sentiment analysis
classifier = pipeline("sentiment-analysis")

  from .autonotebook import tqdm as notebook_tqdm
No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [2]:
# Checking with a sample sentence
classifier(" ***aADS")


[{'label': 'NEGATIVE', 'score': 0.9825090169906616}]

In [3]:
classifier("I am happy")

[{'label': 'POSITIVE', 'score': 0.9998801946640015}]

In [4]:
# Whenever more than 1 sentences are passed, use list []
results = classifier(["Weare happy to show you the Transformers Library.","We hope you don't hate it."])

In [5]:
for result in results:
  print(f"label:{result['label']}, with score: {round(result['score'],4)}")

label:POSITIVE, with score: 0.9986
label:NEGATIVE, with score: 0.5309


In [6]:
# Downloading the model nlptown/bert-base-multilingual-uncased-sentiment
classifier = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

In [7]:
# SENTIMENT ANALYSIS is FRENCH 
# Here we are getting results in form of stars, the less stars the more bad the sentence is.
# means food is bad
classifier("La nourriture est mauvaise")

[{'label': '1 star', 'score': 0.4531075358390808}]

In [8]:
# means food is good
classifier("La nourriture est la meilleure")

[{'label': '5 stars', 'score': 0.6410296559333801}]

In [9]:
# We hope you don't hate it.
classifier("Nous espérons que vous ne le détestez pas")

[{'label': '5 stars', 'score': 0.35093826055526733}]

In [11]:
# IMPORTING AutoTokenizer, TFAutoModelForSequenceClassification 
# Tokenizer take test data and convert into numerical data. like word embedding. 
# Different tokenizer for different models 
# In order to instantiate the model we need class AutoTokenizer 
# In order to to download the model itself we use TFAutoModelForSequenceClassification

from transformers import AutoTokenizer, TFAutoModelForSequenceClassification 

In [12]:
# For Local Use
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
# Thi model only exists in PyTorch, so we use the `from_pt` flag to import the model in TensorFlow. 

model = TFAutoModelForSequenceClassification.from_pretrained(model_name, from_pt = True)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline("sentiment-analysis", model = model, tokenizer = tokenizer)


All PyTorch model weights were used when initializing TFBertForSequenceClassification.

All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


In [13]:
# Sample Sentence
classifier("I am a good boy")

[{'label': '4 stars', 'score': 0.4229269027709961}]

In [14]:
# Converting an entire sentence into tokens. 
# Tokenizers generate indexes.
inputs = tokenizer("We are happy to show you the Transformers Library.")

In [15]:
print(inputs)

{'input_ids': [101, 11312, 10320, 19308, 10114, 11391, 10855, 10103, 58263, 13299, 119, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [16]:
tf_batch = tokenizer(["We are happy to show you the Transformers Library.","We hope you don't hate it."],
                    #  padding is used for making the sentences equal
                     padding = True,
                    #  If there are any spaces, they will get removed when truncation is True
                     truncation = True,
                    #  Maximum length for a particular sentence will be 512.  
                     max_length = 512,
                     return_tensors = "tf"
                     )

In [17]:
# input_ids, attention_mask, token_type_ids
for key, value in tf_batch.items():
    print(f"{key}: {value.numpy().tolist()}")

input_ids: [[101, 11312, 10320, 19308, 10114, 11391, 10855, 10103, 58263, 13299, 119, 102], [101, 11312, 18763, 10855, 11530, 112, 162, 39487, 10197, 119, 102, 0]]
token_type_ids: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
attention_mask: [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]]
