In [None]:
from transformers import BertTokenizer, BertModel, AutoModelForCausalLM, GPT2Tokenizer, GPT2Model
import torch

# BERT Tokenizer
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# GPT-2 Tokenizer and Model
gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt2_model = GPT2Model.from_pretrained('gpt2', add_cross_attention=True)

# Set special tokens
bos = gpt2_tokenizer.bos_token
eos = gpt2_tokenizer.eos_token
gpt2_tokenizer.pad_token = '-100'

# Generate random encoder hidden states
encoder_hidden_states = torch.rand(1, 16, gpt2_model.config.n_embd)

# Tokenize input text
txt = bos + 'He want you ask ' + eos
input_ids = gpt2_tokenizer(txt, padding="max_length", max_length=16, add_special_tokens=True, return_tensors="pt")['input_ids']

# Generate tokens using GPT-2 model
gen_tokens = gpt2_model.generate(
    input_ids,
    encoder_hidden_states=encoder_hidden_states,
    do_sample=True,
    temperature=0.9,
    max_length=100,
)

# Load GPT-2 model and tokenizer for generating text
model = AutoModelForCausalLM.from_pretrained("gpt2", add_cross_attention=True)
tokenizer = AutoTokenizer.from_pretrained("gpt2")

# Generate text using GPT-2 model
prompt = bos
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
gen_tokens = model.generate(
    input_ids,
    encoder_hidden_states=encoder_hidden_states*0,
    do_sample=False,
    temperature=0.9,
    max_length=10,
)
gen_text = tokenizer.batch_decode(gen_tokens)[0]

def tokenize(processor, target):
    return processor.tokenizer(target, return_tensors="pt").input_ids[0]

def untokenize(processor, tokens):
    labels = tokens
    labels[labels == -100] = processor.tokenizer.pad_token_id
    label_str = processor.decode(labels, skip_special_tokens=True)
    return label_str