In [1]:
!nvidia-smi

Wed Nov  5 12:22:31 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   54C    P8             12W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
!pip install transformers



# Hugging Face Task

In [3]:
from transformers import pipeline

#------------------------------------------------#
#                   NLP TASKS                    #
#------------------------------------------------#

'''
1. Text Classification: Assigning a category to a piece of text.
   Examples: Sentiment Analysis, Topic Classification, Spam Detection
'''
classifier = pipeline("text-classification")

'''
2. Token Classification: Assigning labels to individual tokens in a sequence.
   Examples: Named Entity Recognition (NER), Part-of-Speech Tagging
'''
token_classifier = pipeline("token-classification")

'''
3. Question Answering: Extracting an answer from a context based on a question.
'''
question_answer = pipeline("question-answering")

'''
4. Text Generation: Generating text based on a given prompt.
   Examples: Language Modeling, Story Generation
'''
text_generator = pipeline("text-generation")

'''
5. Summarization: Condensing long documents into shorter summaries.
'''
summarizer = pipeline("summarization")

'''
6. Translation: Translating text from one language to another.
'''
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")

'''
7. Text2Text Generation: General-purpose text transformation
   Examples: Summarization, Translation, Grammar Fixing, Paraphrasing
'''
text2text_generator = pipeline("text2text-generation")

'''
8. Fill-Mask: Predicting the missing (masked) token in a sentence.
'''
fill_mask = pipeline("fill-mask")


#------------------------------------------------#
#             SPEECH PROCESSING TASKS            #
#------------------------------------------------#

'''
1. Automatic Speech Recognition (ASR): Convert speech → text.
'''
asr = pipeline("automatic-speech-recognition")

'''
2. Text-to-Speech (TTS): Convert text → spoken audio.
(Only works with models that support TTS)
'''
# Example model: "facebook/fastspeech2-en-ljspeech"
# text_to_speech = pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech")


#------------------------------------------------#
#             COMPUTER VISION TASKS              #
#------------------------------------------------#

'''
1. Image Classification: Assigning a label to an image.
'''
image_classifier = pipeline("image-classification")

'''
2. Object Detection: Detecting objects in an image with bounding boxes.
'''
object_detector = pipeline("object-detection")

'''
3. Image Segmentation: Classifying each pixel in an image.
(semantic or instance segmentation)
'''
image_segmenter = pipeline("image-segmentation")

'''
4. Image-to-Text (Captioning): Generate a sentence describing an image.
'''
image_captioner = pipeline("image-to-image")  # or vision-to-text

'''
5. Zero-Shot Image Classification: Classify images without training for that class.
'''
zero_shot_image = pipeline("zero-shot-image-classification")


#------------------------------------------------#
#              MULTI-MODAL TASKS                 #
#------------------------------------------------#

'''
1. Visual Question Answering (VQA): Answer a question about an image.
'''
# vqa = pipeline("visual-question-answering")

'''
2. Document Question Answering: Extract answers from PDFs/Scanned Docs.
'''
doc_qa = pipeline("document-question-answering")

'''
3. Multi-Modal Embeddings: Text + Image embedding for retrieval/search.
'''
# multi_embedding = pipeline("feature-extraction", model="openai/clip-vit-base-patch32")


#------------------------------------------------#
#                OTHER USEFUL TASKS              #
#------------------------------------------------#

'''
1. Zero-Shot Text Classification: Classify text into labels not seen during training.
'''
zero_shot_classifier = pipeline("zero-shot-classification")

'''
2. Feature Extraction: Converts text into numerical embeddings (vector form).
Used for similarity, clustering, retrieval, etc.
'''
features = pipeline("feature-extraction")

'''
3. Conversational AI / Chat Models: Dialogue-based generation.
'''
chatbot = pipeline("conversational")

'''
4. Text Similarity / Semantic Search (using embeddings + cosine similarity)
(No direct pipeline, done via vector models like sentence-transformers)
'''

'''
5. Code Generation / Code Completion (LLM for programming tasks)
Example models: Salesforce/codegen, StarCoder, etc.
'''


#------------------------------------------------#
#              END OF TASK DEFINITIONS           #
#------------------------------------------------#


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision 4c53496 (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to openai-community/gpt2 and revision 607a30d (https://huggingface.co/openai-community/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cuda:0


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to google-t5/t5-base and revision a9723ea (https://huggingface.co/google-t5/t5-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to distilbert/distilroberta-base and revision fb53ab8 (https://huggingface.co/distilbert/distilroberta-base).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/331M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert/distilroberta-base were not used when initializing RobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to facebook/wav2vec2-base-960h and revision 22aad52 (https://huggingface.co/facebook/wav2vec2-base-960h).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/378M [00:00<?, ?B/s]

Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/163 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/291 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/159 [00:00<?, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to google/vit-base-patch16-224 and revision 3f49326 (https://huggingface.co/google/vit-base-patch16-224).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.
Device set to use cuda:0
No model was supplied, defaulted to facebook/detr-resnet-50 and revision 1d5f47b (https://huggingface.co/facebook/detr-resnet-50).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


preprocessor_config.json:   0%|          | 0.00/290 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda:0
No model was supplied, defaulted to facebook/detr-resnet-50-panoptic and revision d53b52a (https://huggingface.co/facebook/detr-resnet-50-panoptic).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/172M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/detr-resnet-50-panoptic were not used when initializing DetrForSegmentation: ['detr.model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'detr.model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'detr.model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'detr.model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


preprocessor_config.json:   0%|          | 0.00/289 [00:00<?, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to caidas/swin2SR-classical-sr-x2-64 and revision cee1c92 (https://huggingface.co/caidas/swin2SR-classical-sr-x2-64).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/172M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/48.5M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/152 [00:00<?, ?B/s]

`self.pad_size` attribute is deprecated and will be removed in v5. Use `self.size_divisor` instead
Device set to use cuda:0
No model was supplied, defaulted to openai/clip-vit-base-patch32 and revision 3d74acf (https://huggingface.co/openai/clip-vit-base-patch32).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to impira/layoutlm-document-qa and revision beed3c4 (https://huggingface.co/impira/layoutlm-document-qa).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/789 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/511M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/315 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to facebook/bart-large-mnli and revision d7645e1 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
No model was supplied, defaulted to distilbert/distilbert-base-cased and revision 6ea8117 (https://huggingface.co/distilbert/distilbert-base-cased).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/263M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


KeyError: "Unknown task conversational, available tasks are ['audio-classification', 'automatic-speech-recognition', 'depth-estimation', 'document-question-answering', 'feature-extraction', 'fill-mask', 'image-classification', 'image-feature-extraction', 'image-segmentation', 'image-text-to-text', 'image-to-image', 'image-to-text', 'keypoint-matching', 'mask-generation', 'ner', 'object-detection', 'question-answering', 'sentiment-analysis', 'summarization', 'table-question-answering', 'text-classification', 'text-generation', 'text-to-audio', 'text-to-speech', 'text2text-generation', 'token-classification', 'translation', 'video-classification', 'visual-question-answering', 'vqa', 'zero-shot-audio-classification', 'zero-shot-classification', 'zero-shot-image-classification', 'zero-shot-object-detection', 'translation_XX_to_YY']"

In [4]:
from transformers import pipeline
classifier = pipeline("sentiment-analysis")
result = classifier("I was not so happy with the Last Mission Impossible Movie")
print(result)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0


[{'label': 'NEGATIVE', 'score': 0.9996588230133057}]


In [5]:
pipeline(task = "sentiment-analysis")("I was confused with the Barbie Movie")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0


[{'label': 'NEGATIVE', 'score': 0.9992005228996277}]

In [7]:
pipeline(task = "sentiment-analysis")\
("Everyday lots of LLMs paper are published about LLMs Evaluation. \
Lots of them looks very promising. \
I am not sure if we can actually evaluate LLMs. \
There is still a lot to do. \
Don't you think?")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0


[{'label': 'POSITIVE', 'score': 0.9575052857398987}]

In [8]:
pipeline(task = "sentiment-analysis", model = "BAAI/bge-reranker-v2-m3")\
                                                                        ("Everyday lots of LLMs paper are published about LLMs Evaluation. \
                                                                        Lots of them looks very promising. \
                                                                        I am not sure if we can actually evaluate LLMs. \
                                                                        There is still a lot to do. \
                                                                        Don't you think?")

config.json:   0%|          | 0.00/795 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

Device set to use cuda:0


[{'label': 'LABEL_0', 'score': 0.00028980919159948826}]

In [10]:
pipeline(task = "sentiment-analysis", model = "textattack/facebook-bart-large-MNLI")\
                                                                        ("Everyday lots of LLMs paper are published about LLMs Evaluation. \
                                                                        Lots of them looks very promising. \
                                                                        I am not sure if we can actually evaluate LLMs. \
                                                                        There is still a lot to do. \
                                                                        Don't you think?")

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cuda:0


[{'label': 'LABEL_2', 'score': 0.9789328575134277}]

# Batch Sentiment Analysis

In [16]:
classifier = pipeline(task = "sentiment-analysis")

task_list = ["I really like Autoencoder, best models for Anomaly Detection. I can not like it more than that. For me those are the best models.", \
             "I am not sure if we CAN actually Evaluate LLMs.", \
             "Passive Agressive is the name of a Linear regression Model that so many people do not know.", \
             "I hate long meeting."]
classifier(task_list)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0


[{'label': 'POSITIVE', 'score': 0.999657392501831},
 {'label': 'NEGATIVE', 'score': 0.9995476603507996},
 {'label': 'NEGATIVE', 'score': 0.9984076619148254},
 {'label': 'NEGATIVE', 'score': 0.9881011247634888}]

In [17]:
classifier = pipeline(task = "sentiment-analysis", model= "SamLowe/roberta-base-go_emotions")

task_list = ["I really like Autoencoder, best models for Anomaly Detection. I can not like it more than that. For me those are the best models.", \
             "I am not sure if we CAN actually Evaluate LLMs.", \
             "Passive Agressive is the name of a Linear regression Model that so many people do not know.", \
             "I hate long meeting."]
classifier(task_list)

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/380 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Device set to use cuda:0


[{'label': 'admiration', 'score': 0.7806829214096069},
 {'label': 'confusion', 'score': 0.9066852331161499},
 {'label': 'neutral', 'score': 0.7646916508674622},
 {'label': 'anger', 'score': 0.7925534844398499}]

# Text Generation

In [26]:
# Use a Pipeline as a high-level helper
from transformers import pipeline
text_generator  = pipeline("text-generation", model = "distilbert/distilgpt2")
generated_text = text_generator("Today, is a rainy day in Lagos. How is the weather at your side?",
                                truncation = True,
                                num_return_sequences = 2)
print("Generated_text:\n ", generated_text[0]['generated_text'])

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated_text:
  Today, is a rainy day in Lagos. How is the weather at your side?

The weather is very clear from the sun, with a clear green sky of the sky.
You can see the sunshine moving very rapidly in the evening.
The sun is also active in the evening.
The sun is also active in the morning.
Your body is cool to the touch and no matter how cold you are, when you are sleeping, you will have an excellent feel.
You can see the sun moving in the dark.
As you are sleeping, you will notice the sun moving in the morning.
A good night will help you regain your confidence in your body.
The sun is also active in the evening.
How do you feel?
On the outside, you will see the sunlight at the edge of your eye. It is also possible to feel the sun moving in the deep and dark clouds in the night.
The sun appears very bright in the morning.
The sun is also active in the evening.
Where will you see the sun?
The sun is active in the evening.
The sun is active in the morning.
How can you feel?
The su

# Question and Answering

In [21]:
qa_model = pipeline("question-answering")
question = "What is my job, besides what is the size of your apartment?"
context = " I am developing AI models with Python. And I live is a small apartment for now!"
qa_model(question = question, context = context)

No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0


{'score': 0.6192188858985901,
 'start': 6,
 'end': 26,
 'answer': 'developing AI models'}

# Tokenization

In [25]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DistilBertTokenizer, DistilBertForSequenceClassification
model_name2 = "nlptown/bert-base-multilingual-uncased-sentiment"
mymodel2 = AutoModelForSequenceClassification.from_pretrained(model_name2)
mytokenizer2 = AutoTokenizer.from_pretrained(model_name2)

classifier = pipeline("sentiment-analysis", model = mymodel2, tokenizer = mytokenizer2)
res = classifier("I was not so happy with the Last Mission Impossible Movie")
print(res)

Device set to use cuda:0


[{'label': '2 stars', 'score': 0.44449105858802795}]


In [28]:
# Load a pre-traine tokenizer
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

# Example Text
text = ["I was not so happy with the Last Mission Impossible Movie.",\
       "But I really like the movie Titled 'The Forge'."]

# Tokenize the text
tokens = tokenizer.tokenize(text)
print("Tokens :", tokens)


Tokens : ['i', 'was', 'not', 'so', 'happy', 'with', 'the', 'last', 'mission', 'impossible', 'movie', '.', 'but', 'i', 'really', 'like', 'the', 'movie', 'titled', "'", 'the', 'forge', "'", '.']


In [29]:
# Load a pre-traine tokenizer
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

# Example Text
text = ["I was not so happy with the Last Mission Impossible Movie.",\
       "But I really like the movie Titled 'The Forge'."]

# Tokenize the text
tokens = tokenizer.tokenize(text)
print("Tokens :", tokens)


# Convert tokens to input IDs
input_ids = tokenizer.convert_tokens_to_ids(tokens)
print("Input_IDs :", input_ids)

# Encode the text (tokenization + converting to input IDs)
encoded_input = tokenizer(text)
print("Encoded Input: ", encoded_input)

# Decode the Text
decoded_output = tokenizer.decode(input_ids)
print("Decoded Output: ", decoded_output)

Tokens : ['i', 'was', 'not', 'so', 'happy', 'with', 'the', 'last', 'mission', 'impossible', 'movie', '.', 'but', 'i', 'really', 'like', 'the', 'movie', 'titled', "'", 'the', 'forge', "'", '.']
Input_IDs : [1045, 2001, 2025, 2061, 3407, 2007, 1996, 2197, 3260, 5263, 3185, 1012, 2021, 1045, 2428, 2066, 1996, 3185, 4159, 1005, 1996, 15681, 1005, 1012]
Encoded Input:  {'input_ids': [[101, 1045, 2001, 2025, 2061, 3407, 2007, 1996, 2197, 3260, 5263, 3185, 1012, 102], [101, 2021, 1045, 2428, 2066, 1996, 3185, 4159, 1005, 1996, 15681, 1005, 1012, 102]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]}
Decoded Output:  i was not so happy with the last mission impossible movie. but i really like the movie titled ' the forge '.
