<a href="https://colab.research.google.com/github/JonkeyGuan/ragbook-notebooks/blob/main/notebooks/Chapter%2002%20-%20Understanding_Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q transformers accelerate bitsandbytes

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [3]:
OPT = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b", load_in_8bit=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [4]:
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")

In [5]:
inp = "The quick brown fox jumps over the lazy dog"
inp_tokenized = tokenizer( inp, return_tensors="pt" )
print( inp_tokenized['input_ids'].size() )
print( inp_tokenized )

torch.Size([1, 10])
{'input_ids': tensor([[    2,   133,  2119,  6219, 23602, 13855,    81,     5, 22414,  2335]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


In [6]:
print( OPT.model )

OPTModel(
  (decoder): OPTDecoder(
    (embed_tokens): Embedding(50272, 2048, padding_idx=1)
    (embed_positions): OPTLearnedPositionalEmbedding(2050, 2048)
    (final_layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
    (layers): ModuleList(
      (0-23): 24 x OPTDecoderLayer(
        (self_attn): OPTAttention(
          (k_proj): Linear8bitLt(in_features=2048, out_features=2048, bias=True)
          (v_proj): Linear8bitLt(in_features=2048, out_features=2048, bias=True)
          (q_proj): Linear8bitLt(in_features=2048, out_features=2048, bias=True)
          (out_proj): Linear8bitLt(in_features=2048, out_features=2048, bias=True)
        )
        (activation_fn): ReLU()
        (self_attn_layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (fc1): Linear8bitLt(in_features=2048, out_features=8192, bias=True)
        (fc2): Linear8bitLt(in_features=8192, out_features=2048, bias=True)
        (final_layer_norm): LayerNorm((2048,), eps=1e-05, e

In [12]:
inp_tokenized = {k: v.to("cuda") for k, v in inp_tokenized.items()}

In [13]:
embedded_input = OPT.model.decoder.embed_tokens(inp_tokenized['input_ids'])
print( "Layer:\t", OPT.model.decoder.embed_tokens )
print( "Size:\t", embedded_input.size() )
print( "Output:\t", embedded_input )

Layer:	 Embedding(50272, 2048, padding_idx=1)
Size:	 torch.Size([1, 10, 2048])
Output:	 tensor([[[-0.0407,  0.0519,  0.0574,  ..., -0.0263, -0.0355, -0.0260],
         [-0.0371,  0.0220, -0.0096,  ...,  0.0265, -0.0166, -0.0030],
         [-0.0455, -0.0236, -0.0121,  ...,  0.0043, -0.0166,  0.0193],
         ...,
         [ 0.0007,  0.0267,  0.0257,  ...,  0.0622,  0.0421,  0.0279],
         [-0.0126,  0.0347, -0.0352,  ..., -0.0393, -0.0396, -0.0102],
         [-0.0115,  0.0319,  0.0274,  ..., -0.0472, -0.0059,  0.0341]]],
       device='cuda:0', dtype=torch.float16, grad_fn=<EmbeddingBackward0>)


In [14]:
embed_pos_input = OPT.model.decoder.embed_positions(inp_tokenized['attention_mask'])
print( "Layer:\t", OPT.model.decoder.embed_positions )
print( "Size:\t", embed_pos_input.size() )
print( "Output:\t", embed_pos_input )

Layer:	 OPTLearnedPositionalEmbedding(2050, 2048)
Size:	 torch.Size([1, 10, 2048])
Output:	 tensor([[[-8.1406e-03, -2.6221e-01,  6.0768e-03,  ...,  1.7273e-02,
          -5.0621e-03, -1.6220e-02],
         [-8.0585e-05,  2.5000e-01, -1.6632e-02,  ..., -1.5419e-02,
          -1.7838e-02,  2.4948e-02],
         [-9.9411e-03, -1.4978e-01,  1.7557e-03,  ...,  3.7117e-03,
          -1.6434e-02, -9.9087e-04],
         ...,
         [ 3.6979e-04, -7.7454e-02,  1.2955e-02,  ...,  3.9330e-03,
          -1.1642e-02,  7.8506e-03],
         [-2.6779e-03, -2.2446e-02, -1.6754e-02,  ..., -1.3142e-03,
          -7.8583e-03,  2.0096e-02],
         [-8.6288e-03,  1.4233e-01, -1.9012e-02,  ..., -1.8463e-02,
          -9.8572e-03,  8.7662e-03]]], device='cuda:0', dtype=torch.float16,
       grad_fn=<EmbeddingBackward0>)


In [15]:
embed_position_input = embedded_input + embed_pos_input
hidden_states, _, _ = OPT.model.decoder.layers[0].self_attn( embed_position_input )
print( "Layer:\t", OPT.model.decoder.layers[0].self_attn )
print( "Size:\t", hidden_states.size() )
print( "Output:\t", hidden_states )

Layer:	 OPTAttention(
  (k_proj): Linear8bitLt(in_features=2048, out_features=2048, bias=True)
  (v_proj): Linear8bitLt(in_features=2048, out_features=2048, bias=True)
  (q_proj): Linear8bitLt(in_features=2048, out_features=2048, bias=True)
  (out_proj): Linear8bitLt(in_features=2048, out_features=2048, bias=True)
)
Size:	 torch.Size([1, 10, 2048])
Output:	 tensor([[[-0.0136, -0.0095,  0.0012,  ...,  0.0067, -0.0018,  0.0131],
         [-0.0131, -0.0100,  0.0022,  ...,  0.0088,  0.0003,  0.0124],
         [-0.0131, -0.0060,  0.0038,  ...,  0.0099,  0.0021,  0.0141],
         ...,
         [-0.0121, -0.0099,  0.0051,  ...,  0.0095,  0.0016,  0.0098],
         [-0.0120, -0.0103,  0.0052,  ...,  0.0094,  0.0012,  0.0091],
         [-0.0119, -0.0110,  0.0056,  ...,  0.0095,  0.0013,  0.0093]]],
       device='cuda:0', dtype=torch.float16, grad_fn=<MatMul8bitLtBackward>)
