# Flan-T5

The T5 Tokenizer uses the SentencePiece Library, so install it first.
To use INT8 weights, we have to install ´accelerate´ and ´bitsandbytes´

In [1]:
!pip install sentencepiece
!pip install datasets --quiet
!pip install transformers --quiet
!pip install evaluate --quiet
!pip install torch --quiet
!pip install wandb --quiet
!pip install scikit-learn --quiet
!pip install accelerate
!pip install bitsandbytes

Collecting sentencepiece
  Using cached sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Installing collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99
Collecting accelerate
  Using cached accelerate-0.19.0-py3-none-any.whl (219 kB)
Installing collected packages: accelerate
Successfully installed accelerate-0.19.0
Collecting bitsandbytes
  Using cached bitsandbytes-0.38.1-py3-none-any.whl (104.3 MB)
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.38.1


## flan-t5-small FP32

In [2]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

import torch

from datasets import load_dataset
import numpy as np

import scoring

from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [4]:
tokenizer_small = T5Tokenizer.from_pretrained("google/flan-t5-small")
model_small = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")

model_small.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=512, out_features=1024, bias=False)
              (wi_1): Linear(in_features=512, out_features=1024, bias=False)
              (wo): 

In [5]:
ds = load_dataset("OpenAssistant/oasst1")
df_test = ds['validation'].to_pandas()

df_english = df_test.query(f"lang == 'en'").sort_values("created_date")

df_english_prompts = df_english.query(f"role == 'prompter'")
df_english_assistants = df_english.query(f"role == 'assistant'")

Found cached dataset parquet (/home/jovyan/.cache/huggingface/datasets/OpenAssistant___parquet/OpenAssistant--oasst1-2960c57d7e52ab15/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
100%|██████████| 2/2 [00:00<00:00, 305.12it/s]


In [6]:
def compute_f1_score(model, tokenizer, df_prompts, df_assistants):
    f1_scores = []
    model.eval()
    with torch.no_grad():
        for index, prompt in df_prompts.iterrows():
            inputs = tokenizer(prompt["text"], return_tensors="pt").input_ids.to("cuda")
            outputs = model.generate(inputs, max_new_tokens=100, no_repeat_ngram_size=3)
            assistants = df_assistants.query(f'parent_id == "{prompt["message_id"]}"')
            if assistants.size > 0:
                assistant = assistants.sort_values('rank').iloc[0]
            f1_scores.append(scoring.compute_f1(tokenizer.decode(outputs[0]), assistant["text"]))

    return f1_scores

In [7]:
small_f1_scores = compute_f1_score(model_small, tokenizer_small, df_english_prompts, df_english_assistants)

Token indices sequence length is longer than the specified maximum sequence length for this model (2053 > 512). Running this sequence through the model will result in indexing errors


## flan-t5-base FP32

In [9]:
tokenizer_base = T5Tokenizer.from_pretrained("google/flan-t5-base")
model_base = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
model_base.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=768, out_features=2048, bias=False)
              (wi_1): Linear(in_features=768, out_features=2048, bias=False)
              (wo):

In [10]:
base_f1_scores = compute_f1_score(model_base, tokenizer_base, df_english_prompts, df_english_assistants)

Token indices sequence length is longer than the specified maximum sequence length for this model (2053 > 512). Running this sequence through the model will result in indexing errors


In [11]:
large_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
large_model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large", device_map="auto")

In [12]:
large_f1_score = compute_f1_score(large_model, large_tokenizer, df_english_prompts, df_english_assistants)

Token indices sequence length is longer than the specified maximum sequence length for this model (2053 > 512). Running this sequence through the model will result in indexing errors


In [13]:
print("small f1", np.array(small_f1_scores).mean())
print("base f1", np.array(base_f1_scores).mean())
print("large f1", np.array(large_f1_score).mean())

small f1 0.03732750489253997
base f1 0.03903745924302759
large f1 0.04091182158302516
