In [4]:
import transformers
from transformers import T5ForConditionalGeneration, T5Tokenizer

In [5]:


content = '''
A HARE one day ridiculed the short feet and slow pace of the
Tortoise, who replied, laughing:  "Though you be swift as the
wind, I will beat you in a race."  The Hare, believing her
assertion to be simply impossible, assented to the proposal; and
they agreed that the Fox should choose the course and fix the
goal.  On the day appointed for the race the two started
together.  The Tortoise never for a moment stopped, but went on
with a slow but steady pace straight to the end of the course.
The Hare, lying down by the wayside, fell fast asleep.  At last
waking up, and moving as fast as he could, he saw the Tortoise
had reached the goal, and was comfortably dozing after her
fatigue.  Slow but steady wins the race. '''


### 1. Use a pre-trained google/flan-t5-small as the model.

In [6]:
model_name = "google/flan-t5-small"

In [7]:
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

### 2. Verify if the summariza'on task works.




In [5]:


text_to_summarize = 'Summarize the given content: ' + content

inputs = tokenizer.encode(text_to_summarize, return_tensors="pt", max_length=512, truncation=True)
outputs = model.generate(inputs, max_length=150, min_length=40, length_penalty=1.0, num_beams=4, early_stopping=True)

summary = tokenizer.decode(outputs[0])
print("Summary:", summary)


Summary: <pad> The Tortoise and the Tortoise were able to beat each other in the final round of the Giro d'Italia, but the Tortoise was unable to win the race.</s>


### 3. Verify if the Q&A task works.

In [6]:
question = "What is the moral of the story?"

inputs = tokenizer.encode("question: " + question + " context: " + content, return_tensors="pt", max_length=512, truncation=True)
outputs = model.generate(inputs, max_length=32,  num_beams=4, early_stopping=True)

answer = tokenizer.decode(outputs[0])
print("Answer:", answer)

Answer: <pad> The Tortoise won the race.</s>


### 4. Verify if English to French transla'on task works.

In [7]:
english_text = content

inputs = tokenizer.encode("translate English to French: " + english_text, return_tensors="pt", max_length=512, truncation=True)
outputs = model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)

french_translation = tokenizer.decode(outputs[0])
print("French Translation:", french_translation)

French Translation: <pad>A HARE à l'heure de l'heure de l'heure de Tortoise, qui a répondu à l'horse et à l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'horse de l'


In [8]:
french_text = french_translation

inputs = tokenizer.encode("translate French to English: " + french_text, return_tensors="pt", max_length=512, truncation=True)
outputs = model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)

rev_english_translation = tokenizer.decode(outputs[0])
print("Reverse English Translation:", rev_english_translation)

Reverse English Translation: <pad>A HARE at the hour of the hour of the hour of Tortoise, who answered at the horse and at the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the horse of the ho

### 5. Programmatically print the names of all the model layers and their dimensions.

In [9]:
for name, param in model.named_parameters():
    print(f"Layer: {name}, Size: {param.size()}")

Layer: shared.weight, Size: torch.Size([32128, 512])
Layer: encoder.block.0.layer.0.SelfAttention.q.weight, Size: torch.Size([384, 512])
Layer: encoder.block.0.layer.0.SelfAttention.k.weight, Size: torch.Size([384, 512])
Layer: encoder.block.0.layer.0.SelfAttention.v.weight, Size: torch.Size([384, 512])
Layer: encoder.block.0.layer.0.SelfAttention.o.weight, Size: torch.Size([512, 384])
Layer: encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight, Size: torch.Size([32, 6])
Layer: encoder.block.0.layer.0.layer_norm.weight, Size: torch.Size([512])
Layer: encoder.block.0.layer.1.DenseReluDense.wi_0.weight, Size: torch.Size([1024, 512])
Layer: encoder.block.0.layer.1.DenseReluDense.wi_1.weight, Size: torch.Size([1024, 512])
Layer: encoder.block.0.layer.1.DenseReluDense.wo.weight, Size: torch.Size([512, 1024])
Layer: encoder.block.0.layer.1.layer_norm.weight, Size: torch.Size([512])
Layer: encoder.block.1.layer.0.SelfAttention.q.weight, Size: torch.Size([384, 512])
Layer: enco

### 6. Programma'cally print the total number of parameters/weights in this model.

In [10]:
total_params = sum(p.numel() for p in model.parameters())
print("Total number of parameters/weights in the model:", total_params)

Total number of parameters/weights in the model: 76961152


### 7. Set the tensor in final layer (decoder.final_layer_norm.weight) to all zeros.

In [11]:
# Set the final layer's weight to all zeros
print("Final layer's weight tensor before setting to zeros:")
print(model.decoder.final_layer_norm.weight)
model.decoder.final_layer_norm.weight.data.fill_(0)

# Verify the final layer's weight has been set to zeros
print("Final layer's weight tensor after setting to zeros:")
print(model.decoder.final_layer_norm.weight)

Final layer's weight tensor before setting to zeros:
Parameter containing:
tensor([ 1.5583e-01,  1.6458e-01,  1.8197e-01,  2.0792e-01,  1.5886e-01,
         1.4222e-01,  1.5845e-01,  1.4269e-01,  1.3648e-01,  1.5702e-01,
         1.6670e-01,  1.3271e-01,  1.7980e-01,  3.2683e-01,  2.0897e-01,
         2.6234e-01,  1.8381e-01,  1.8566e-01,  1.8115e-01,  1.9588e-01,
         1.5456e-01,  2.1353e-01,  1.5126e-01,  1.6348e-01,  1.8062e-01,
         1.4414e-01,  1.7974e-01,  2.0646e-01,  1.7899e-01,  2.0434e-01,
         1.6415e-01,  1.4987e-01,  1.3866e-01,  2.2488e-01,  1.7041e-01,
         6.1698e-01,  1.8228e-01,  1.7578e-01,  1.6113e-01,  2.4024e-01,
         1.6280e-01,  2.2871e-01,  1.6127e-01,  1.8426e-01,  2.1641e-01,
         2.6774e-01,  1.8475e-01,  1.5955e-01,  2.5002e-01,  1.9592e-01,
         1.5467e-01,  2.0025e-01,  1.7020e-01,  1.4393e-01,  1.9788e-01,
         1.5900e-01,  1.4895e-01,  1.5042e-01,  2.6026e-01,  1.5933e-01,
         1.5081e-01,  2.0102e-01,  1.9843e-01,  1

### 8. Verify if the Q&A task works after resettng the weights of the above layer.



In [12]:
context = content
question = "What is the moral of the story?"

inputs = tokenizer.encode("question: " + question + " context: " + context, return_tensors="pt", max_length=512, truncation=True)
outputs = model.generate(inputs, max_length=32, num_beams=4, early_stopping=True)

answer = tokenizer.decode(outputs[0])
print("Answer:", answer)

Answer: <pad> sss</s>


### 9. Replace the decoder.final_layer_norm.weight with a layer of smaller dimensions and adjust all the dependent layers to match the dimension

In [10]:
import torch
import torch.nn as nn
from transformers import T5ForConditionalGeneration, T5Config

# Define the new dimension for the final layer weight
new_final_layer_dim = 256

# Load the original T5 model configuration
model_name = "google/flan-t5-small"
config = T5Config.from_pretrained(model_name)

# Adjust the final layer dimensions in the configuration
config.d_model = new_final_layer_dim

# Define a new T5 model with the modified configuration
class CustomT5ForConditionalGeneration(T5ForConditionalGeneration):
    def __init__(self, config):
        super().__init__(config)
        self.decoder.final_layer_norm = nn.LayerNorm(new_final_layer_dim)

# Instantiate the custom model
model = CustomT5ForConditionalGeneration(config)

# Verify the change in dimension of final layer weight
print("New decoder final layer weight dimension:", model.decoder.final_layer_norm.weight.size())


for name, param in model.named_parameters():
    print(f"Layer: {name}, Size: {param.size()}")

context = content
question = "What is the moral of the story?"

inputs = tokenizer.encode("question: " + question + " context: " + context, return_tensors="pt", max_length=512, truncation=True)
outputs = model.generate(inputs, max_length=32, num_beams=4, early_stopping=True)

answer = tokenizer.decode(outputs[0])
print("Answer:", answer)

New decoder final layer weight dimension: torch.Size([256])
Layer: shared.weight, Size: torch.Size([32128, 256])
Layer: encoder.block.0.layer.0.SelfAttention.q.weight, Size: torch.Size([384, 256])
Layer: encoder.block.0.layer.0.SelfAttention.k.weight, Size: torch.Size([384, 256])
Layer: encoder.block.0.layer.0.SelfAttention.v.weight, Size: torch.Size([384, 256])
Layer: encoder.block.0.layer.0.SelfAttention.o.weight, Size: torch.Size([256, 384])
Layer: encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight, Size: torch.Size([32, 6])
Layer: encoder.block.0.layer.0.layer_norm.weight, Size: torch.Size([256])
Layer: encoder.block.0.layer.1.DenseReluDense.wi_0.weight, Size: torch.Size([1024, 256])
Layer: encoder.block.0.layer.1.DenseReluDense.wi_1.weight, Size: torch.Size([1024, 256])
Layer: encoder.block.0.layer.1.DenseReluDense.wo.weight, Size: torch.Size([256, 1024])
Layer: encoder.block.0.layer.1.layer_norm.weight, Size: torch.Size([256])
Layer: encoder.block.1.layer.0.Self