In [None]:
! pip install --quiet transformers torchviz
! pip install --quiet accelerate sentencepiece datasets evaluate bitsandbytes tqdm
!pip install --quiet pytorch-lightning # pytorch wrapper
!pip install --quiet torchtext # text utilities

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m45.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m49.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for torchviz (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.3/519.3 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━

In [None]:
import torch
from torchviz import make_dot
import matplotlib.pyplot as plt

In [None]:
# Import necessary libraries
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")

# Task 2: Summarization
summarization_input = "Summarize the following text: ServiceNow, Inc. provides enterprise information technology (IT) management software. The Company designs, develops, and markets a cloud computing platform to help companies manage digital workflows for enterprise operations."
input_ids_summarization = tokenizer("summarize: " + summarization_input, return_tensors="pt").input_ids
summary_ids = model.generate(input_ids_summarization)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print("Summarization:")
print(summary)

# Task 3: Question Answering
context = "ServiceNow, Inc. provides enterprise information technology (IT) management software. The Company designs, develops, and markets a cloud computing platform to help companies manage digital workflows for enterprise operations."
question = "What is ServiceNow known for?"
input_text_qa = f"question: {question} context: {context}"
input_ids_qa = tokenizer(input_text_qa, return_tensors="pt").input_ids
answer_ids = model.generate(input_ids_qa)
answer = tokenizer.decode(answer_ids[0], skip_special_tokens=True)
print("\nQuestion Answering:")
print(answer)

# Task 4: English to French Translation
translation_input = "Translate the following English text to French: Hello, how are you?"
input_ids_translation = tokenizer("translate English to French: " + translation_input, return_tensors="pt").input_ids
translation_ids = model.generate(input_ids_translation)
translation = tokenizer.decode(translation_ids[0], skip_special_tokens=True)
print("\nTranslation:")
print(translation)





Summarization:
ServiceNow, Inc. is a company that provides IT management software for enterprise operations.

Question Answering:
IT management software

Translation:
Hello, c'est-ce-t-t-t-t-t-


In [None]:
print(model)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=512, out_features=1024, bias=False)
              (wi_1): Linear(in_features=512, out_features=1024, bias=False)
              (wo): 

In [None]:
print("\nModel Layers:")
for i, layer in enumerate(model.encoder.block):
    print(f"Block {i}: {layer}")


# Task 6: Print total number of parameters/weights in the model
total_parameters = sum(p.numel() for p in model.parameters())
print("\nTotal Parameters:", total_parameters)


Model Layers:
Block 0: T5Block(
  (layer): ModuleList(
    (0): T5LayerSelfAttention(
      (SelfAttention): T5Attention(
        (q): Linear(in_features=512, out_features=384, bias=False)
        (k): Linear(in_features=512, out_features=384, bias=False)
        (v): Linear(in_features=512, out_features=384, bias=False)
        (o): Linear(in_features=384, out_features=512, bias=False)
        (relative_attention_bias): Embedding(32, 6)
      )
      (layer_norm): T5LayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (1): T5LayerFF(
      (DenseReluDense): T5DenseGatedActDense(
        (wi_0): Linear(in_features=512, out_features=1024, bias=False)
        (wi_1): Linear(in_features=512, out_features=1024, bias=False)
        (wo): Linear(in_features=1024, out_features=512, bias=False)
        (dropout): Dropout(p=0.1, inplace=False)
        (act): NewGELUActivation()
      )
      (layer_norm): T5LayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
  )
)


## Set the tensor in final layer (decoder.final_layer_norm.weight) to all zeros.

In [None]:


# Set the weights of the final layer's normalization to zeros
model.decoder.final_layer_norm.weight.data.fill_(0.0)

# Verify if the Q&A task works after resetting the weights
question = "What is ServiceNow?"
input_text_qa = f"question: {question} context: {context}"
input_ids_qa = tokenizer(input_text_qa, return_tensors="pt").input_ids
answer_ids = model.generate(input_ids_qa)
print(input_text_qa)
print(input_ids_qa)
answer = tokenizer.decode(answer_ids[0], skip_special_tokens=True)
print("\nQuestion Answering After Resetting Weights:")
print(answer)

## The question answering does not works after resetting the weights
## setting everything to zero is responsible for loss of expressiveness and the output of a linear layer with all zero weights becomes a constant value, often zero. This happens because the multiplication of all input features by zero results in an output of zero, regardless of the input values

question: What is ServiceNow? context: ServiceNow, Inc. provides enterprise information technology (IT) management software. The Company designs, develops, and markets a cloud computing platform to help companies manage digital workflows for enterprise operations.
tensor([[  822,    10,   363,    19,  1387, 17527,    58,  2625,    10,  1387,
         17527,     6,  1542,     5,   795,  5399,   251,   748,    41,  3177,
            61,   758,   889,     5,    37,  1958,  2888,     6,  1344,     7,
             6,    11,  3212,     3,     9,  3126, 10937,  1585,    12,   199,
           688,  1865,  1125, 16101,     7,    21,  5399,  2673,     5,     1]])

Question Answering After Resetting Weights:



In [None]:
import torch.nn as nn

# Define the new dimensions for the smaller layer
new_dim = 512  # Adjust this value as needed

# Replace the final layer normalization with a smaller layer
new_final_layer_norm = nn.LayerNorm(new_dim)
model.decoder.final_layer_norm = new_final_layer_norm

# Adjust other dependent layers to match the new dimension
model.decoder.block[0].linear1 = nn.Linear(new_dim, model.config.d_model)
model.decoder.block[0].linear2 = nn.Linear(model.config.d_model, new_dim)
model.decoder.block[1].linear1 = nn.Linear(new_dim, model.config.d_model)
model.decoder.block[1].linear2 = nn.Linear(model.config.d_model, new_dim)

# Verify if the Q&A task works after modifying the model
question = "What is Hugging Face known for?"
context = "Hugging Face is known for its contributions to NLP research and its transformer-based models."
input_text_qa = f"question: {question} context: {context}"
input_ids_qa = tokenizer(input_text_qa, return_tensors="pt").input_ids

print(input_ids_qa)
# Adjust input dimension to 256
input_ids_qa = input_ids_qa[:, :256]

answer_ids = model.generate(input_ids_qa)
answer = tokenizer.decode(answer_ids[0], skip_special_tokens=True)
print("\nQuestion Answering After Modifying the Model:")
print(answer)


tensor([[  822,    10,   363,    19, 11560,  3896,  8881,   801,    21,    58,
          2625,    10, 11560,  3896,  8881,    19,   801,    21,   165,  7548,
            12,   445,  6892,   585,    11,   165, 19903,    18,   390,  2250,
             5,     1]])

Question Answering After Modifying the Model:
transformer-based models
