In [1]:
import transformers

In [2]:
#Task-1 : Loaded google/flan-t5-small Pretrained Model 

In [3]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small", device_map="auto")

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
#Task-2: Text Summarizer

In [5]:
# Input text
input_text = """
Once upon a time, a farmer had a goose that laid a golden egg every day. The farmer used to sell that egg 
and earn enough money to meet their family's day-to-day needs. One day, the farmer thought that if he could 
get more such golden eggs and make a lot of money and become a wealthy person. The farmer decided to cut the 
goose and remove all the golden eggs from its stomach. As soon as they killed the bird and opened the goose’s 
stomach, they found no eggs. The foolish farmer realized they had destroyed their last resource out of greed.

"""

# Tokenize and generate summary
inputs = tokenizer("summarize: " + input_text, return_tensors="pt", max_length=2048, truncation=True).input_ids.to("cuda")
print(inputs, type(inputs))
summary_ids = model.generate(inputs, max_length = 1024)

# Decode and print the summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print("Original Text:\n", input_text)
print("\nGenerated Summary:\n", summary)

tensor([[21603,    10,  1447,  1286,     3,     9,    97,     6,     3,     9,
         17766,   141,     3,     9, 29669,    24,  7245,     3,     9,  7069,
          6182,   334,   239,     5,    37, 17766,   261,    12,  1789,    24,
          6182,    11,  3807,   631,   540,    12,   942,    70,   384,    31,
             7,   239,    18,   235,    18,  1135,   523,     5,   555,   239,
             6,     8, 17766,   816,    24,     3,    99,     3,    88,   228,
           129,    72,   224,  7069,  5875,    11,   143,     3,     9,   418,
            13,   540,    11,   582,     3,     9, 18407,   568,     5,    37,
         17766,  1500,    12,  1340,     8, 29669,    11,  2036,    66,     8,
          7069,  5875,    45,   165,  9883,     5,   282,  1116,    38,    79,
          4792,     8,  5963,    11,  2946,     8, 29669,    22,     7,  9883,
             6,    79,   435,   150,  5875,     5,    37, 27539, 17766,  5723,
            79,   141, 10932,    70,   336,  3487,  

In [6]:
#Task-3 Q/A

In [7]:
context_qa = """ Vedanta's chairman Anil Agarwal earlier this week announced the biggest investment of ₹1.54 lakh crore for setting 
up the country's first-ever semiconductor chip plant in Gujarat. This led to a strong buying on stock exchanges that drove Vedanta 
to rise nearly 18% this week. However, on the last trading day of the current week, Vedanta shares pulled back and slipped by at least 
nearly 9% on BSE after the company said, the semiconductor plant project is not under their ambit but will be undertaken by Volcan Investments.
"""
question_qa = "Anil agarwal announced how much investment ?"
input_text_qa = f"question: {question_qa} context: {context_qa}"
input_ids_qa = tokenizer(input_text_qa, return_tensors="pt").input_ids.to("cuda")
outputs_qa = model.generate(input_ids_qa)
print("Question:", question_qa)
print("Q&A Output:", tokenizer.decode(outputs_qa[0]))



Question: Anil agarwal announced how much investment ?
Q&A Output: <pad> <unk> 1.54 lakh crore</s>


In [8]:
#Task-4 Translate from english to french

In [9]:
input_text = "Translate the following English text to French: What way is it to the museum/train station? "
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")

outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

<pad>Quels sont les mesures à la station de train et à la station


In [10]:
#Task-5 Printing model layers and dimensions 

In [11]:
for name, param in model.named_parameters():
    print(f"Layer Name: {name}, Dimensions: {param.size()}")


Layer Name: shared.weight, Dimensions: torch.Size([32128, 512])
Layer Name: encoder.block.0.layer.0.SelfAttention.q.weight, Dimensions: torch.Size([384, 512])
Layer Name: encoder.block.0.layer.0.SelfAttention.k.weight, Dimensions: torch.Size([384, 512])
Layer Name: encoder.block.0.layer.0.SelfAttention.v.weight, Dimensions: torch.Size([384, 512])
Layer Name: encoder.block.0.layer.0.SelfAttention.o.weight, Dimensions: torch.Size([512, 384])
Layer Name: encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight, Dimensions: torch.Size([32, 6])
Layer Name: encoder.block.0.layer.0.layer_norm.weight, Dimensions: torch.Size([512])
Layer Name: encoder.block.0.layer.1.DenseReluDense.wi_0.weight, Dimensions: torch.Size([1024, 512])
Layer Name: encoder.block.0.layer.1.DenseReluDense.wi_1.weight, Dimensions: torch.Size([1024, 512])
Layer Name: encoder.block.0.layer.1.DenseReluDense.wo.weight, Dimensions: torch.Size([512, 1024])
Layer Name: encoder.block.0.layer.1.layer_norm.weight, Dime

In [12]:
# Task - 6: Printing total number of parameters/weights in the model

In [13]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total Parameters in the Model: {total_params}")

Total Parameters in the Model: 76961152


In [14]:
# Task-7: Setting the tensor in the final layer to all zeros - decoder.final_layer_norm.weight,

In [19]:
#model.decoder.final_layer_norm.weight.data.fill_(0.0)
model.decoder.final_layer_norm.weight.data.zero_()

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [20]:
# Task-8: Verifying Q&A task after resetting weights

In [21]:
outputs_qa_after_reset = model.generate(input_ids_qa)
print("Q&A Output After Reset:", tokenizer.decode(outputs_qa_after_reset[0]))

Q&A Output After Reset: <pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


In [22]:
# Task- 9: Replace the decoder.final_layer_norm.weight with a layer of smaller dimensions (256)

In [4]:
import torch
#new_dim = 256  and the default is 512


In [None]:
# Replace the final layer normalization weight with a layer of smaller dimensions which is 256
new_final_layer_norm_size = model.decoder.final_layer_norm.weight.data.shape[0] // 2
new_final_layer_norm = torch.nn.LayerNorm(new_final_layer_norm_size, device=model.device)
model.decoder.final_layer_norm = new_final_layer_norm

# Adjusting dependent layers diemnsions in the decoder
for name, param in model.decoder.named_parameters():
    print(name)
    if "final_layer_norm" in name:
        continue

    if "SelfAttention" in name or "EncDecAttention" in name or "DenseReluDense" in name:
        param.data = param.data[:, :new_final_layer_norm_size]
