In [1]:
!pip install transformers torch sentence-transformers accelerate

Collecting sentence-transformers
  Downloading sentence_transformers-5.2.0-py3-none-any.whl.metadata (16 kB)
Collecting accelerate
  Downloading accelerate-1.12.0-py3-none-any.whl.metadata (19 kB)
Downloading sentence_transformers-5.2.0-py3-none-any.whl (493 kB)
Downloading accelerate-1.12.0-py3-none-any.whl (380 kB)
Installing collected packages: accelerate, sentence-transformers
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [sentence-transformers]/2[0m [sentence-transformers]
[1A[2KSuccessfully installed accelerate-1.12.0 sentence-transformers-5.2.0


## Encoder-Only Transformer (Text Classification / NER)
### Example: Text Classification (BERT)

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "bert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

text = "This judgment was not followed by the High Court."

inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
outputs = model(**inputs)

logits = outputs.logits
predicted_class = torch.argmax(logits, dim=1)

print("Predicted class:", predicted_class.item())

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Predicted class: 1


## Decoder-Only Transformer (Text Generation / LLM)
### Example: Text Generation (GPT-2 / LLaMA-style)

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "gpt2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

prompt = "Explain transformer models in simple words"

inputs = tokenizer(prompt, return_tensors="pt")

output = model.generate(
    **inputs,
    max_length=100,
    temperature=0.7,
    top_p=0.9
)

print(tokenizer.decode(output[0], skip_special_tokens=True))


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Explain transformer models in simple words.

The following is a list of the most common transformer models in the language.

The following is a list of the most common transformer models in the language. The following is a list of the most common transformer models in the language. The following is a list of the most common transformer models in the language. The following is a list of the most common transformer models in the language. The following is a list of the most common transformer models in the


## Encoder–Decoder Transformer (Summarization / Translation)
### Example: Summarization (T5)

In [5]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "t5-small"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

text = """
The Supreme Court observed that the prior ruling was not followed 
due to differing factual circumstances...
"""

input_text = "summarize: " + text

inputs = tokenizer(input_text, return_tensors="pt", truncation=True)

summary_ids = model.generate(
    **inputs,
    max_length=60,
    min_length=20
)

print("Summary:", tokenizer.decode(summary_ids[0], skip_special_tokens=True))


Summary: the prior ruling was not followed due to differing factual circumstances.... the supreme court observed that the prior ruling was not followed due to differing factual circumstances...


## Sentence Transformers (Embeddings / Semantic Search)
### Example: Generate Embeddings (Best for RAG)

In [7]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

sentences = [
    "The case was cited by the High Court.",
    "This judgment was overruled later."
]

embeddings = model.encode(sentences)

print("Embedding shape:", embeddings.shape)
print(embeddings)

Embedding shape: (2, 384)
[[-3.92743051e-02  7.77428225e-02  3.10510863e-03 -3.67719941e-02
   6.08768454e-03  2.79506464e-02  2.81491615e-02  1.01967551e-01
   4.00985498e-03  4.17825729e-02  9.42809880e-02  1.25331953e-01
   2.58782748e-02 -6.40802756e-02  3.72433811e-02  3.81202064e-02
   9.40848738e-02  3.11784819e-02 -1.74372438e-02  2.60961466e-02
  -2.99948407e-03  2.77408566e-02  2.01642560e-03  3.53095192e-03
  -1.96987279e-02 -4.48261872e-02 -3.28719765e-02 -5.29057644e-02
   2.23430656e-02  4.84758839e-02 -3.27321514e-02  1.03338681e-01
  -4.17378219e-03  1.51731388e-03 -1.80717297e-02  8.04686453e-03
   2.69205198e-02 -9.96926706e-03  5.83176166e-02  8.69004708e-03
   6.22390658e-02 -1.11081405e-02  4.77364846e-02  3.57239209e-02
  -8.49945024e-02  3.48009504e-02 -1.09764915e-02  3.07311527e-02
  -3.39951292e-02  3.61061953e-02 -6.07582740e-02 -2.11984403e-02
   5.02368324e-02  3.38226967e-02 -1.19538195e-02 -4.25465442e-02
   2.96754427e-02  2.07581799e-02  4.30237092e-02 

### Code-Focused Transformer (Code Understanding)
## Example: Code Embeddings (CodeBERT)

In [8]:
from transformers import AutoTokenizer, AutoModel
import torch

tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
model = AutoModel.from_pretrained("microsoft/codebert-base")

code = "def add(a, b): return a + b"

inputs = tokenizer(code, return_tensors="pt")
outputs = model(**inputs)

embedding = outputs.last_hidden_state.mean(dim=1)
print("Code embedding shape:", embedding.shape)


Code embedding shape: torch.Size([1, 768])


In [9]:
outputs

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[-0.1635,  0.3595,  0.0261,  ..., -0.2831, -0.2772,  0.3455],
         [-1.0306,  0.1121,  0.5334,  ..., -0.6773, -0.0715,  0.6466],
         [-0.7064,  0.2068, -0.1867,  ..., -0.6525, -0.2080,  0.6185],
         ...,
         [-0.1911,  0.0171, -0.0912,  ...,  0.1607, -0.5508,  0.4795],
         [-0.5272,  0.5221,  0.4231,  ..., -0.7133, -0.6207,  0.9018],
         [-0.1643,  0.3608,  0.0268,  ..., -0.2839, -0.2790,  0.3472]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[ 0.4756, -0.3428, -0.5196,  0.1175,  0.3530,  0.0713,  0.4972, -0.3266,
          0.0949, -0.2564,  0.4359,  0.0017, -0.2382,  0.1062,  0.0036,  0.5640,
          0.4242, -0.4990,  0.1064,  0.3437, -0.3256,  0.5128,  0.3463,  0.0351,
         -0.0508,  0.2522,  0.1491,  0.1061,  0.5450,  0.1033,  0.1372,  0.0893,
          0.1787,  0.0130, -0.3124, -0.0641, -0.5261,  0.1648,  0.6857, -0.2942,
         -0.3850,  0.0743,  0.02