In [1]:
!pip install transformers accelerate sentencepiece sacremoses torch --upgrade


Collecting transformers
  Using cached transformers-4.57.6-py3-none-any.whl.metadata (43 kB)
Collecting torch
  Using cached torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting nvidia-cudnn-cu12==9.10.2.21 (from torch)
  Using cached nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl.metadata (1.8 kB)
Collecting nvidia-cufft-cu12==11.3.3.83 (from torch)
  Using cached nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cusolver-cu12==11.7.3.90 (from torch)
  Using cached nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl.metadata (1.8 kB)
Using cached transformers-4.57.6-py3-none-any.whl (12.0 MB)
Using cached torch-2.9.1-cp312-cp312-manylinux_2_28_x86_64.whl (899.7 MB)
Using cached nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl (706.8 MB)
Using cached nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (193.1 MB)
Using

In [2]:
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util

# 1) Sentiment Analysis
sentiment = pipeline("sentiment-analysis")
print("Sentiment:", sentiment("I absolutely loved the movie. The story was gripping!"))

# 2) NLP Task: Named Entity Recognition (NER)
ner = pipeline("ner", grouped_entities=True)
text = "Elon Musk founded SpaceX and visited India in January 2024."
print("\nNER:", ner(text))

# 3) Translation (English -> French)
translator = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
print("\nTranslation:", translator("I am learning transformers and it is powerful."))

# 4) Summarization
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
long_text = """
Transformers are deep learning models that changed NLP by enabling parallel processing and attention mechanisms.
They improved tasks like translation, summarization, question answering, and text generation.
Modern LLMs are built on transformer architectures and can be adapted to many real-world applications.
"""
print("\nSummary:", summarizer(long_text, max_length=60, min_length=25, do_sample=False))

# 5) Sentence Embeddings + Similarity
model = SentenceTransformer("all-MiniLM-L6-v2")
sentences = [
    "I love studying artificial intelligence.",
    "Machine learning is a subfield of AI.",
    "The pizza was delicious and cheesy."
]
emb = model.encode(sentences, convert_to_tensor=True)

query = "AI and ML are related"
q_emb = model.encode(query, convert_to_tensor=True)

scores = util.cos_sim(q_emb, emb)[0]
ranked = sorted(list(enumerate(scores)), key=lambda x: float(x[1]), reverse=True)

print("\nSentence Embedding Similarity (query -> closest sentences):")
for idx, score in ranked:
    print(f"{sentences[idx]}  | score = {float(score):.4f}")


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu
No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision 4c53496 (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Sentiment: [{'label': 'POSITIVE', 'score': 0.9998846054077148}]


config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu



NER: [{'entity_group': 'PER', 'score': np.float32(0.99830234), 'word': 'Elon Musk', 'start': 0, 'end': 9}, {'entity_group': 'ORG', 'score': np.float32(0.9986968), 'word': 'SpaceX', 'start': 18, 'end': 24}, {'entity_group': 'LOC', 'score': np.float32(0.999785), 'word': 'India', 'start': 37, 'end': 42}]


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

Device set to use cpu



Translation: [{'translation_text': "J'apprends les transformateurs et c'est puissant."}]


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cpu



Summary: [{'summary_text': ' Transformers are deep learning models that changed NLP by enabling parallel processing . They improved tasks like translation, summarization, question answering, and text generation . Modern LLMs are built on transformer architectures and can be adapted to many real-world applications .'}]


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Sentence Embedding Similarity (query -> closest sentences):
Machine learning is a subfield of AI.  | score = 0.6148
I love studying artificial intelligence.  | score = 0.4819
The pizza was delicious and cheesy.  | score = -0.0251
