In [20]:
import torch
from transformers import Data2VecTextForCausalLM, AutoTokenizer, GenerationConfig
from PIL import Image
import requests

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [10]:
version = "facebook/data2vec-text-base"
text = "Studies have been shown that owning a dog is good for you"

# AutoTokenizer

In [11]:
tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(version)
tokenizer

RobertaTokenizerFast(name_or_path='facebook/data2vec-text-base', vocab_size=50265, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'sep_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'pad_token': AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'cls_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=True)}, clean_up_tokenization_spaces=True)

## processor

In [12]:
inputs = tokenizer(
    text = text,                # 可以为列表或单个string
    return_tensors = "pt",      # 返回数据格式 np pt tf jax
    padding = True,             # 填充方式选择 [True, 'longest', 'max_length', 'do_not_pad']
    # max_length = max_length,  # 如果使用max_length要将padding设置为 "max_length"
    add_special_tokens = True,  # text添加特殊key
).to(device, torch.float16)
inputs

{'input_ids': tensor([[    0, 46000,    33,    57,  2343,    14, 15487,    10,  2335,    16,
           205,    13,    47,     2]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}

In [13]:
inputs["input_ids"]

tensor([[    0, 46000,    33,    57,  2343,    14, 15487,    10,  2335,    16,
           205,    13,    47,     2]], device='cuda:0')

# Data2VecTextForCausalLM

Data2VecText Model with a language modeling head on top for CLM fine-tuning.

In [14]:
model: Data2VecTextForCausalLM = Data2VecTextForCausalLM.from_pretrained(version, torch_dtype=torch.float16).to(device)
model

If you want to use `Data2VecTextLMHeadModel` as a standalone, add `is_decoder=True.`


Data2VecTextForCausalLM(
  (data2vec_text): Data2VecTextModel(
    (embeddings): Data2VecTextForTextEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): Data2VecTextEncoder(
      (layer): ModuleList(
        (0-11): 12 x Data2VecTextLayer(
          (attention): Data2VecTextAttention(
            (self): Data2VecTextSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): Data2VecTextSelfOutput(
              (dense): Linear(in_features=768, out

## model()

In [15]:
model.eval()
with torch.inference_mode():
    outputs = model(**inputs)
outputs

CausalLMOutputWithCrossAttentions(loss=None, logits=tensor([[[ 0.1393,  0.0000,  1.0835,  ..., -0.0165,  0.0973,  0.2165],
         [ 1.2129,  0.0000,  1.5141,  ..., -0.3814,  0.0653, -1.0900],
         [ 0.0293,  0.0000,  1.0880,  ..., -0.0131,  0.0710,  0.2964],
         ...,
         [ 1.1673,  0.0000,  1.5146,  ..., -0.3858,  0.0763, -1.1848],
         [-0.6855,  0.0000, -0.8657,  ..., -0.2378, -0.1951, -0.5437],
         [-0.6442,  0.0000,  0.0362,  ...,  0.0658, -0.0889,  0.3703]]],
       device='cuda:0'), past_key_values=None, hidden_states=None, attentions=None, cross_attentions=None)

In [16]:
outputs.logits.shape

torch.Size([1, 14, 50265])

## model.generate()

In [17]:
model.eval()
with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        # num_beams: beam search num
        generation_config = GenerationConfig(num_beams=1, min_length=0, max_new_tokens=100),
    )
outputs



tensor([[    0, 46000,    33,    57,  2343,    14, 15487,    10,  2335,    16,
           205,    13,    47,     2,  2560, 21748, 21748, 21748, 21748, 21748]],
       device='cuda:0')

In [22]:
tokenizer.batch_decode(outputs, skip_special_tokens=True)

['Studies have been shown that owning a dog is good for youla lib lib lib lib lib']