# Exploring T5

In [1]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer

In [2]:
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

## Exploring Tokenizer

In [3]:
input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids

In [4]:
input_ids

tensor([[13959,  1566,    12,  2968,    10,    37,   629,    19,  1627,     5,
             1]])

In [5]:
type(input_ids)

torch.Tensor

In [6]:
tokenizer.convert_ids_to_tokens(input_ids[0])

['▁translate',
 '▁English',
 '▁to',
 '▁German',
 ':',
 '▁The',
 '▁house',
 '▁is',
 '▁wonderful',
 '.',
 '</s>']

In [7]:
tokenizer.convert_ids_to_tokens([i for i in range(4)])

['<pad>', '</s>', '<unk>', '▁']

In [8]:
for i in input_ids[0]:
    print(tokenizer.decode(i))

translate
English
to
German
:
The
house
is
wonderful
.
</s>


In [9]:
tokenizer.decode(token_ids = input_ids[0])

'translate English to German: The house is wonderful.</s>'

In [10]:
input_ids = tokenizer("translate English to German: I am working on my machine learning skills.", 
                      return_tensors="pt").input_ids

tokenizer.convert_ids_to_tokens(input_ids[0])

['▁translate',
 '▁English',
 '▁to',
 '▁German',
 ':',
 '▁I',
 '▁am',
 '▁working',
 '▁on',
 '▁my',
 '▁machine',
 '▁learning',
 '▁skills',
 '.',
 '</s>']

## Encode Decoder

In [12]:
outputs = model.generate(input_ids)
outputs

tensor([[    0,  1674,     3,  5269,    15,    46,     3, 12289, 22622,  9588,
         22416,     7,    35,     5,     1]])

In [19]:
model.encoder

T5Stack(
  (embed_tokens): Embedding(32128, 512)
  (block): ModuleList(
    (0): T5Block(
      (layer): ModuleList(
        (0): T5LayerSelfAttention(
          (SelfAttention): T5Attention(
            (q): Linear(in_features=512, out_features=512, bias=False)
            (k): Linear(in_features=512, out_features=512, bias=False)
            (v): Linear(in_features=512, out_features=512, bias=False)
            (o): Linear(in_features=512, out_features=512, bias=False)
            (relative_attention_bias): Embedding(32, 8)
          )
          (layer_norm): T5LayerNorm()
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (1): T5LayerFF(
          (DenseReluDense): T5DenseActDense(
            (wi): Linear(in_features=512, out_features=2048, bias=False)
            (wo): Linear(in_features=2048, out_features=512, bias=False)
            (dropout): Dropout(p=0.1, inplace=False)
            (act): ReLU()
          )
          (layer_norm): T5LayerNorm()
          (dr

In [28]:
model.decoder

T5Stack(
  (embed_tokens): Embedding(32128, 512)
  (block): ModuleList(
    (0): T5Block(
      (layer): ModuleList(
        (0): T5LayerSelfAttention(
          (SelfAttention): T5Attention(
            (q): Linear(in_features=512, out_features=512, bias=False)
            (k): Linear(in_features=512, out_features=512, bias=False)
            (v): Linear(in_features=512, out_features=512, bias=False)
            (o): Linear(in_features=512, out_features=512, bias=False)
            (relative_attention_bias): Embedding(32, 8)
          )
          (layer_norm): T5LayerNorm()
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (1): T5LayerCrossAttention(
          (EncDecAttention): T5Attention(
            (q): Linear(in_features=512, out_features=512, bias=False)
            (k): Linear(in_features=512, out_features=512, bias=False)
            (v): Linear(in_features=512, out_features=512, bias=False)
            (o): Linear(in_features=512, out_features=512, bias=Fa

In [29]:
encoder_output = model.encoder(input_ids)
encoder_output

BaseModelOutputWithPastAndCrossAttentions(last_hidden_state=tensor([[[ 0.0184,  0.1329,  0.0284,  ..., -0.0143,  0.0362,  0.0983],
         [ 0.0560, -0.0842, -0.0689,  ...,  0.0856,  0.0203,  0.0782],
         [ 0.0104, -0.0200,  0.0024,  ...,  0.0013, -0.0042, -0.0387],
         ...,
         [ 0.1203, -0.0776, -0.2231,  ...,  0.0585, -0.0211, -0.1241],
         [-0.0280, -0.0638,  0.0094,  ...,  0.1404, -0.1971, -0.0281],
         [ 0.0989,  0.0318, -0.0382,  ..., -0.0037, -0.0696,  0.0254]]],
       grad_fn=<MulBackward0>), past_key_values=None, hidden_states=None, attentions=None, cross_attentions=None)

In [37]:
encoder_output.__dict__

{'last_hidden_state': tensor([[[ 0.0184,  0.1329,  0.0284,  ..., -0.0143,  0.0362,  0.0983],
          [ 0.0560, -0.0842, -0.0689,  ...,  0.0856,  0.0203,  0.0782],
          [ 0.0104, -0.0200,  0.0024,  ...,  0.0013, -0.0042, -0.0387],
          ...,
          [ 0.1203, -0.0776, -0.2231,  ...,  0.0585, -0.0211, -0.1241],
          [-0.0280, -0.0638,  0.0094,  ...,  0.1404, -0.1971, -0.0281],
          [ 0.0989,  0.0318, -0.0382,  ..., -0.0037, -0.0696,  0.0254]]],
        grad_fn=<MulBackward0>),
 'past_key_values': None,
 'hidden_states': None,
 'attentions': None,
 'cross_attentions': None}

## Explore Prefixes

### ColA

In [74]:
input_info = "cola sentence: John made Bill master of himself."
input_ids = tokenizer(input_info, return_tensors="pt").input_ids
output_ids = model.generate(input_ids)
raw_output = tokenizer.decode(output_ids[0])
output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
output

'acceptable'

In [79]:
input_info = "cola sentence: nwlqwne."
input_ids = tokenizer(input_info, return_tensors="pt").input_ids
output_ids = model.generate(input_ids)
raw_output = tokenizer.decode(output_ids[0])
output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
output

'acceptable'

### RTE

In [80]:
input_info = "rte sentence1: A smaller proportion of Yugoslavia’s Italians were settled in Slovenia (at the 1991 national census, some 3000 inhabitants of Slovenia declared themselves as ethnic Italians). sentence2: Slovenia has 3,000 inhabitants."
input_ids = tokenizer(input_info, return_tensors="pt").input_ids
output_ids = model.generate(input_ids)
raw_output = tokenizer.decode(output_ids[0])
output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
output

'not_entailment'