In [1]:
!pip install transformers



## **HuggingFace Tasks**

In [3]:
from transformers import pipeline
#---------------------------------------------------#
#                     NLP TASKS                     #
#---------------------------------------------------#

'''
1. Text Classification: Assigning a category to a piece of text.
Sentiment Analysis
Topic Classification
Spam Detection '''

classifier = pipeline("text-classification")

'''
2. Token Classification: Assigning labels to individual tokens in a sequence.
Named Entity Recognition (NER)
Part-of-Speech Tagging
'''

token_classifier = pipeline("token-classification")

'''
3. Question Answering: Extracting an answer from a given context based on a question.
'''
question_answerer = pipeline("question-answering")

'''
4. Text Generation: Generating text based on a given prompt.
Language Modeling
Story Generation

'''

text_generator = pipeline("text-generation")

'''
5. Summarization: Condensing long documents into shorter summaries.
'''

summarizer = pipeline("summarization")

'''
Translation: Translating text from one language to another.
'''

translator = pipeline("translation",
                      model="Helsinki-NLP/opus-mt-en-fr")

'''
6. Text2Text Generation: General-purpose text transformation, including summarization and translation.
'''

text2text_generator = pipeline("text2text-generation")

'''
7. Fill-Mask: Predicting the masked token in a sequence.
'''

fill_mask = pipeline("fill-mask")

'''
8. Feature Extraction: Extracting hidden states or features from text.
'''

feature_extractor = pipeline("feature-extraction")

#---------------------------------------------------#
#             Computer Vision TASKS                 #
#---------------------------------------------------#

'''
1. Image Classification: Classifying the main content of an image.

'''

image_classifier = pipeline("image-classification")

'''
2. Object Detection: Identifying objects within an image and their bounding boxes.
'''

object_detector = pipeline("object-detection")

'''
3. Image Segmentation: Segmenting different parts of an image into classes.
'''

image_segmenter = pipeline("image-segmentation")

'''
4. Image Generation: Generating images from textual descriptions (using DALL-E or similar models).
'''

#---------------------------------------------------#
#             Speech Processing TASKS               #
#---------------------------------------------------#

'''
1. utomatic Speech Recognition (ASR): Converting spoken language into text.
'''

speech_recognizer = pipeline("automatic-speech-recognition")

'''
2. Speech Translation: Translating spoken language from one language to another.
3. Audio Classification: Classifying audio signals into predefined categories.
'''

#---------------------------------------------------#
#                   Multimodal TASKS                #
#---------------------------------------------------#

'''
1. Image Captioning: Generating a textual description of an image.
'''
image_captioner = pipeline("image-to-text")
'''
2. Visual Question Answering (VQA): Answering questions about the content of an image.
'''

#---------------------------------------------------#
#                     Other TASKS                   #
#---------------------------------------------------#
'''
1. Table Question Answering: Answering questions based on tabular data.
'''
table_qa = pipeline("table-question-answering")

'''
2. Document Question Answering: Extracting answers from documents like PDFs.

'''
doc_qa = pipeline("document-question-answering")
'''
3. Time Series Forecasting: Predicting future values in time series data (not directly supported in the main Transformers library but available through extensions).
'''

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision 4c53496 (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a Bert

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

No model was supplied, defaulted to google/vit-base-patch16-224 and revision 3f49326 (https://huggingface.co/google/vit-base-patch16-224).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.
No model was supplied, defaulted to facebook/detr-resnet-50 and revision 1d5f47b (https://huggingface.co/facebook/detr-resnet-50).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/4.59k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


preprocessor_config.json:   0%|          | 0.00/290 [00:00<?, ?B/s]

No model was supplied, defaulted to facebook/detr-resnet-50-panoptic and revision d53b52a (https://huggingface.co/facebook/detr-resnet-50-panoptic).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/11.6k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/172M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/detr-resnet-50-panoptic were not used when initializing DetrForSegmentation: ['detr.model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'detr.model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'detr.model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'detr.model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


preprocessor_config.json:   0%|          | 0.00/289 [00:00<?, ?B/s]

No model was supplied, defaulted to facebook/wav2vec2-base-960h and revision 22aad52 (https://huggingface.co/facebook/wav2vec2-base-960h).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/378M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/172M [00:00<?, ?B/s]

Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/163 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/291 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/159 [00:00<?, ?B/s]

No model was supplied, defaulted to ydshieh/vit-gpt2-coco-en and revision 5bebf1e (https://huggingface.co/ydshieh/vit-gpt2-coco-en).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/4.34k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/982M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/982M [00:00<?, ?B/s]

Config of the encoder: <class 'transformers.models.vit.modeling_vit.ViTModel'> is overwritten by shared encoder config: ViTConfig {
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 16,
  "qkv_bias": true,
  "transformers_version": "4.46.2"
}

Config of the decoder: <class 'transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel'> is overwritten by shared decoder config: GPT2Config {
  "activation_function": "gelu_new",
  "add_cross_attention": true,
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "decoder_start_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_rang

tokenizer_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/211 [00:00<?, ?B/s]

No model was supplied, defaulted to google/tapas-base-finetuned-wtq and revision e3dde19 (https://huggingface.co/google/tapas-base-finetuned-wtq).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/1.66k [00:00<?, ?B/s]

TAPAS models are not usable since `tensorflow_probability` can't be loaded. It seems you have `tensorflow_probability` installed with the wrong tensorflow version. Please try to reinstall it following the instructions here: https://github.com/tensorflow/probability.


pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/490 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/262k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/154 [00:00<?, ?B/s]

No model was supplied, defaulted to impira/layoutlm-document-qa and revision beed3c4 (https://huggingface.co/impira/layoutlm-document-qa).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/789 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/511M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/443M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/315 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

'\n3. Time Series Forecasting: Predicting future values in time series data (not directly supported in the main Transformers library but available through extensions).\n'

###**Sentiment Analysis**

In [4]:
classifier = pipeline('sentiment-analysis')
result = classifier("I am so happy, I love to eat this food, It tastes amazing")
print(result)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'label': 'POSITIVE', 'score': 0.9998908042907715}]


In [5]:
pipeline(task = "sentiment-analysis")("I hate this movie, wasted my time watching this")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'label': 'NEGATIVE', 'score': 0.9997802376747131}]

In [6]:
pipeline(task = "sentiment-analysis", model = "facebook/bart-large-mnli")\
("The product was an absolute disaster. From the moment it arrived, it was clear that the quality was subpar. The packaging was damaged, and the item itself felt cheap and poorly made. It didn’t work as advertised, and attempts to fix it were a waste of time. Customer service was equally disappointing, with long wait times and unhelpful responses. Overall, this experience was frustrating and a complete waste of money. I wouldn’t recommend this to anyone.")

config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

[{'label': 'neutral', 'score': 0.9871661067008972}]

### **Batch Sentiment Analysis**

In [7]:
classifier = pipeline(task = "sentiment-analysis")

task_list = [
"The service at the restaurant was outstanding, and the food was absolutely delicious!",  \
"The movie was too long, the plot was boring, and the acting was mediocre at best.", \
"The package arrived on time, but I haven’t opened it yet to check the contents.", \
"I love how easy this app is to use! It has made my life so much more convenient.", \
"The laptop runs slower than expected and overheats constantly. Definitely not worth the price.", \
"I attended the workshop yesterday, and it was okay. Nothing special, but not bad either.", \
]

classifier(task_list)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'label': 'POSITIVE', 'score': 0.9998849630355835},
 {'label': 'NEGATIVE', 'score': 0.9997510313987732},
 {'label': 'NEGATIVE', 'score': 0.997811496257782},
 {'label': 'POSITIVE', 'score': 0.983859658241272},
 {'label': 'NEGATIVE', 'score': 0.999792754650116},
 {'label': 'POSITIVE', 'score': 0.9967029690742493}]

In [12]:
classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)

task_list = [
"The service at the restaurant was outstanding, and the food was absolutely delicious!"
]

classifier(task_list)

[[{'label': 'admiration', 'score': 0.9546917080879211},
  {'label': 'approval', 'score': 0.03330536186695099},
  {'label': 'neutral', 'score': 0.013748474419116974},
  {'label': 'joy', 'score': 0.012229196727275848},
  {'label': 'excitement', 'score': 0.010493631474673748},
  {'label': 'gratitude', 'score': 0.006832646671682596},
  {'label': 'love', 'score': 0.005225492175668478},
  {'label': 'realization', 'score': 0.004484329838305712},
  {'label': 'pride', 'score': 0.004032491240650415},
  {'label': 'optimism', 'score': 0.0030854446813464165},
  {'label': 'surprise', 'score': 0.0029579787515103817},
  {'label': 'annoyance', 'score': 0.0029180794954299927},
  {'label': 'curiosity', 'score': 0.0028620522934943438},
  {'label': 'disapproval', 'score': 0.0028166130650788546},
  {'label': 'amusement', 'score': 0.002296942286193371},
  {'label': 'disappointment', 'score': 0.002189986640587449},
  {'label': 'sadness', 'score': 0.0017001095693558455},
  {'label': 'caring', 'score': 0.001660

### **Text Generation**

In [26]:
text_generator = pipeline("text-generation", model = "distilbert/distilgpt2")
generated_text = text_generator("Today im feeling very hot, due to sunny day",
                                truncation = True,
                                num_return_sequences = 2)
print("Generated Text:\n", generated_text[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Generated Text:
 Today im feeling very hot, due to sunny day, after the rain, I want to sleep like nothing of my life this weekend. And I want to take a good picture of myself in the sun.... this is my first chance... and I can


In [25]:
generated_text[1]['generated_text']

'Today im feeling very hot, due to sunny day to be back...it was the best. a lot of love as well, to be honest, to have been waiting for that for so long.\n\nYou are looking forward to seeing her next'

### **Question Answering**

In [27]:
qa_model = pipeline("question-answering")
question = "What is programming Language ?"
context = "Talking with computers"
qa_model(question = question, context = context)

No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


{'score': 0.9227562546730042,
 'start': 0,
 'end': 22,
 'answer': 'Talking with computers'}