In [2]:
from PIL.ImagePalette import negative
from transformers import AutoModelForSequenceClassification, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [None]:
from transformers import pipeline

In [3]:
from transformers import (
    SummarizationPipeline,
    TextClassificationPipeline,
    AudioClassificationPipeline,
    ImageSegmentationPipeline
    )

In [9]:
from transformers import pipeline
my_pipeline = pipeline(
    task="text-classification",
    model="distilbert-base-uncased-finetuned-sst-2-english",
)
positive_input = "Hi, welcome to this course"
print(my_pipeline(positive_input))

negative_input = "Oh no!"
print(my_pipeline(negative_input))

Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9996154308319092}]
[{'label': 'NEGATIVE', 'score': 0.994263231754303}]


# NLP and tokenization

In [11]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
sample_text = "HOWDY, hoW aRe yoU?"
normalizer = tokenizer.backend_tokenizer.normalizer.normalize_str(sample_text)
normalizer

'howdy, how are you?'

In [13]:
# using GPT2Tokenizer
from transformers import GPT2Tokenizer, DistilBertTokenizer
sample_text = "HOWDY, hoW aRe yoU?"

gpt_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt_tokens = gpt_tokenizer.tokenize(sample_text)
print(f"GPT Tokenizer result {gpt_tokens}")

distil_tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
distil_tokens = distil_tokenizer.tokenize(sample_text)
print(f"Distil Tokenizer result {distil_tokens}")

GPT Tokenizer result ['HOW', 'D', 'Y', ',', 'Ġho', 'W', 'Ġa', 'Re', 'Ġyo', 'U', '?']
Distil Tokenizer result ['how', '##dy', ',', 'how', 'are', 'you', '?']


# Text classification
- Sentiment analysis
- Question Natural Language Inference (QNLI)
- Topic modeling
- Grammatical correctness

In [14]:
from transformers import pipeline
classifier = pipeline(task="text-classification")
text = "I love it"
classifier(text)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9998799562454224}]

In [16]:
classifier = pipeline(
    task="text-classification",
    model="abdulmatinomotoso/English_Grammar_Checker"
)
text = "i write love cheese book doing"
print(classifier(text)) # [{'label': 'LABEL_0', 'score': 0.9968376159667969}] >>> LABEL_0 means unacceptable

text = "I am studying at the moment."
print(classifier(text)) # [{'label': 'LABEL_1', 'score': 0.9997439980506897}] >> LABEL_1 means acceptable

Device set to use cpu


[{'label': 'LABEL_0', 'score': 0.9968376159667969}]
[{'label': 'LABEL_1', 'score': 0.9997439980506897}]


In [27]:
# QNLI
qnli_pipeline = pipeline(
    task="text-classification",
    model="bert-base-uncased"
)
question = "Where is the capital of Bulgaria?"
answer = "Sofia is the capital of Bulgaria."
result = qnli_pipeline(f"{question} {answer}")
print(result)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu


[{'label': 'LABEL_1', 'score': 0.5798683166503906}]


In [29]:
# zero-shot classification
classifier = pipeline(
    task="zero-shot-classification",
    model="facebook/bart-large-mnli"
)

text = "There is a war in Africe"
labels = ['politics', 'science', 'technology']

classifier(text, labels)

Device set to use cpu


{'sequence': 'There is a war in Africe',
 'labels': ['politics', 'technology', 'science'],
 'scores': [0.6817732453346252, 0.1779291331768036, 0.14029757678508759]}

# Summarization

In [34]:
from transformers import pipeline
summarize = pipeline(
    task='summarization',
    model='sshleifer/distilbart-cnn-12-6',
    min_length=5,
    max_length=20,
)
text = "The transformers library, developed by Hugging Face, is used for natural language processing (NLP) tasks. It provides pre-trained models and tools for various NLP applications such as text classification, sentiment analysis, summarization, translation, and more. The library supports a wide range of transformer-based models like BERT, GPT, and T5, making it easier to implement and fine-tune these models for specific tasks."

summarize(text)

Device set to use cpu


[{'summary_text': ' The transformers library, developed by Hugging Face, is used for natural language processing'}]