<a href="https://colab.research.google.com/github/AbhiRathore/llmtasks/blob/main/flanModel_basicTasks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install --upgrade pip

Collecting pip
  Downloading pip-23.3.2-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-23.3.2


In [None]:
### mounting gdrive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!cp -r  /content/drive/MyDrive/llmflan_Tasks/ /content/llmflan_Tasks/

In [None]:
!pip install --disable-pip-version-check --quiet -r /content/llmflan_Tasks/requirements.txt

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.6/4.6 MB[0m [31m91.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.7/468.7 kB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m41.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.1/317.1 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.0/21.0 MB[0m [31m76.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━

In [None]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig
from transformers import T5Tokenizer, T5ForConditionalGeneration
import yaml
import torch
import torch.nn as nn
import warnings
warnings.filterwarnings("ignore")

Below mentioned yml file can be used to add more user configuration such as sample prompts, target language etc and hence no hardcoding can be maintained to simplify the model deployment and usage

In [None]:
dict2use = {}
with open("/content/llmflan_Tasks/basicTasks/config.yml", "r") as stream:
    try:
        #print(yaml.safe_load(stream))
        dict2use = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

In [None]:

if len(dict2use) > 0:
  print("good to use")
else:
  print("empty dictionary, plz check yml file")

good to use


In [None]:
dict2use

{'task': 'Translate',
 'Translate': {'baselang': 'English', 'targetlang': 'German'}}

## Use a pre-trained google/flan-t5-small as the model.

In [None]:
def set_seed(seed):
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)



In [None]:
set_seed(27122023)
model_name='google/flan-t5-small'

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

In [None]:
# tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

## Verify if the summarization task works.

In [None]:
dialogue = 'A dollar is a unit of money used in many countries, including the United States, Australia, Canada, and New Zealand. It is represented by the symbol $ and is divided into 100 smaller units called cents'

In [None]:
prompt = f"""
Summarize the following conversation by understanding the context.

{dialogue}

Summary:
"""
inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            max_new_tokens=10,temperature = .8,
            do_sample=True,
            top_k=100,
            top_p=0.7,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id
        )[0],
        skip_special_tokens=False
    )
print(prompt,output)



Summarize the following conversation by understanding the context. 

A dollar is a unit of money used in many countries, including the United States, Australia, Canada, and New Zealand. It is represented by the symbol $ and is divided into 100 smaller units called cents

Summary:
 <pad> A dollar is a small unit of money.


## Verify if English to French translation task works.

In [None]:
targetlang = dict2use['Translate']['targetlang']

In [None]:
input_text = f"translate English to {targetlang}: How old are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

<pad> Wie ich er bitten?</s>


## Verify if the Q&A task works

In [None]:
input_text = "answer the following question : who is the president of USA?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

<pad> John McCain</s>


## Programma'cally print the names of all the model layers and their dimensions.

In [None]:
# Print information about each layer and its dimensions
for layer_num, (name, params) in enumerate(model.named_parameters()):
    print(f"Layer {layer_num + 1}: {name}, Size: {params.size()}")

Layer 1: shared.weight, Size: torch.Size([32128, 512])
Layer 2: encoder.block.0.layer.0.SelfAttention.q.weight, Size: torch.Size([384, 512])
Layer 3: encoder.block.0.layer.0.SelfAttention.k.weight, Size: torch.Size([384, 512])
Layer 4: encoder.block.0.layer.0.SelfAttention.v.weight, Size: torch.Size([384, 512])
Layer 5: encoder.block.0.layer.0.SelfAttention.o.weight, Size: torch.Size([512, 384])
Layer 6: encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight, Size: torch.Size([32, 6])
Layer 7: encoder.block.0.layer.0.layer_norm.weight, Size: torch.Size([512])
Layer 8: encoder.block.0.layer.1.DenseReluDense.wi_0.weight, Size: torch.Size([1024, 512])
Layer 9: encoder.block.0.layer.1.DenseReluDense.wi_1.weight, Size: torch.Size([1024, 512])
Layer 10: encoder.block.0.layer.1.DenseReluDense.wo.weight, Size: torch.Size([512, 1024])
Layer 11: encoder.block.0.layer.1.layer_norm.weight, Size: torch.Size([512])
Layer 12: encoder.block.1.layer.0.SelfAttention.q.weight, Size: torch.S

## Set the tensor in final layer (decoder.final_layer_norm.weight) to all zeros

In [None]:
# Get the parameters of the final layer
# Print information about each layer and its dimensions
new_tensor = 0  # Your tensor

for layer_num, (name, params) in enumerate(model.named_parameters()):
    if name == "decoder.final_layer_norm.weight":
        print(f"Layer {layer_num + 1}: {name}, Size: {params.size()}")
        with torch.no_grad():
          for param in params:
              param.copy_(new_tensor)



Layer 189: decoder.final_layer_norm.weight, Size: torch.Size([512])


In [None]:
for layer_num, (name, params) in enumerate(model.named_parameters()):
    if name == "decoder.final_layer_norm.weight":
        print(f"Layer {layer_num + 1}: {name}, param: {params}")

Layer 189: decoder.final_layer_norm.weight, param: Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

## Verify if the Q&A task works aWer reseXng the weights of the above layer

In [None]:
input_text = "answer the following question in detail by applying a prefix of question: who is the president of USA?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>


### QnA is not working

## Replace the decoder.final_layer_norm.weight with a layer of smaller dimensions and adjust all the dependent layers to match the dimension


In [None]:
set_seed(27122023)

model2adjust = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [None]:
# Print information about each layer and its dimensions
for layer_num, (name, params) in enumerate(model2adjust.named_parameters()):
    print(f"Layer {layer_num + 1}: {name}, Size: {params.size()}")

Layer 1: shared.weight, Size: torch.Size([32128, 512])
Layer 2: encoder.block.0.layer.0.SelfAttention.q.weight, Size: torch.Size([384, 512])
Layer 3: encoder.block.0.layer.0.SelfAttention.k.weight, Size: torch.Size([384, 512])
Layer 4: encoder.block.0.layer.0.SelfAttention.v.weight, Size: torch.Size([384, 512])
Layer 5: encoder.block.0.layer.0.SelfAttention.o.weight, Size: torch.Size([512, 384])
Layer 6: encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight, Size: torch.Size([32, 6])
Layer 7: encoder.block.0.layer.0.layer_norm.weight, Size: torch.Size([512])
Layer 8: encoder.block.0.layer.1.DenseReluDense.wi_0.weight, Size: torch.Size([1024, 512])
Layer 9: encoder.block.0.layer.1.DenseReluDense.wi_1.weight, Size: torch.Size([1024, 512])
Layer 10: encoder.block.0.layer.1.DenseReluDense.wo.weight, Size: torch.Size([512, 1024])
Layer 11: encoder.block.0.layer.1.layer_norm.weight, Size: torch.Size([512])
Layer 12: encoder.block.1.layer.0.SelfAttention.q.weight, Size: torch.S

In [None]:
# model2adjust = AutoModelForSeq2SeqLM.from_pretrained(model_name)

config = model2adjust.config
# Define a low-dimensional layer
low_dim_size = 128
low_dim_layer = nn.Linear(config.d_model, low_dim_size)

# Replace the last decoder with the low-dimensional layer
model2adjust.decoder.final_layer_norm = low_dim_layer

# Adjust the last layer with the low-dimensional layer
outputDim = model2adjust.lm_head.weight.data.shape[0]
model2adjust.lm_head = nn.Linear(low_dim_size, outputDim)
for layer_num, (name, params) in enumerate(model2adjust.named_parameters()):
    print(f"Layer {layer_num + 1}: {name}, Size: {params.size()}")

Layer 1: shared.weight, Size: torch.Size([32128, 512])
Layer 2: encoder.block.0.layer.0.SelfAttention.q.weight, Size: torch.Size([384, 512])
Layer 3: encoder.block.0.layer.0.SelfAttention.k.weight, Size: torch.Size([384, 512])
Layer 4: encoder.block.0.layer.0.SelfAttention.v.weight, Size: torch.Size([384, 512])
Layer 5: encoder.block.0.layer.0.SelfAttention.o.weight, Size: torch.Size([512, 384])
Layer 6: encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight, Size: torch.Size([32, 6])
Layer 7: encoder.block.0.layer.0.layer_norm.weight, Size: torch.Size([512])
Layer 8: encoder.block.0.layer.1.DenseReluDense.wi_0.weight, Size: torch.Size([1024, 512])
Layer 9: encoder.block.0.layer.1.DenseReluDense.wi_1.weight, Size: torch.Size([1024, 512])
Layer 10: encoder.block.0.layer.1.DenseReluDense.wo.weight, Size: torch.Size([512, 1024])
Layer 11: encoder.block.0.layer.1.layer_norm.weight, Size: torch.Size([512])
Layer 12: encoder.block.1.layer.0.SelfAttention.q.weight, Size: torch.S

### A low dimension (128 size) layer has replayed the last decoder block

Testing the functionality after changing the dimension

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
dialogue = "the apple is a big country but apple is also a fruit and a mobile"
prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""
inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
        model2adjust.generate(
            inputs["input_ids"],
            max_new_tokens=5,
        )[0],
        skip_special_tokens=True
    )
print(output)


violent într visibility Timişoara rel


It is working fine

### Making one function to allow user perform translation, summarization or QnA

In [None]:
model_name='google/flan-t5-small'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)


In [None]:
class llmFlan:
  def __init__(self, model,tokenizer,task,text):
    self.model = model
    self.tokenizer = tokenizer
    self.task = task
    self.text = text


  def summarization(self):
    prompt = f"""
    Summarize the following conversation by understanding the context.

    {self.text}

    Summary:
    """
    inputs = self.tokenizer(prompt, return_tensors='pt')
    output = self.tokenizer.decode(
            self.model.generate(
                inputs["input_ids"],
                max_new_tokens=10,temperature = .9,
                do_sample=True,
                top_k=30,
                top_p=0.9,
                num_return_sequences=1
            )[0],
            skip_special_tokens=False
        )
    print(prompt,output)

    return output

  def translation(self,targetlang='German'):
    input_text = f"translate English to {targetlang}: {self.text}"
    input_ids = self.tokenizer(input_text, return_tensors="pt").input_ids

    outputs = self.model.generate(input_ids)
    return self.tokenizer.decode(outputs[0])


  def QnA(self):
    print("please ask a Question")
    input_text = f"answer the following question : {self.text}"
    input_ids = self.tokenizer(input_text, return_tensors="pt").input_ids

    outputs = self.model.generate(input_ids)
    return self.tokenizer.decode(outputs[0])

  def run(self):
    if task == 'translation':
      return self.translation()
    elif task == 'summarization':
      return self.summarization()
    elif task == 'QnA':
      return self.QnA()




In [None]:
task = 'summarization'
text = 'A dollar is a unit of money used in many countries, including the United States, Australia, Canada, and New Zealand. It is represented by the symbol $ and is divided into 100 smaller units called cents'

In [None]:
llmmain = llmFlan(model,tokenizer,text,task)
llmmain.run()


    Summarize the following conversation by understanding the context.  

    summarization

    Summary:
     <pad> W: This is the most common method to track


'<pad> W: This is the most common method to track'

### some Hallucination is there and that needs to be treated

In [None]:
!cp -r llmflan_Tasks/basicTasks/ /content/drive/MyDrive/llmflan_Tasks

In [None]:
!cp '/content/drive/MyDrive/Colab Notebooks/flanModel_basicTasks.ipynb' /content/drive/MyDrive/llmflan_Tasks/basicTasks