In [1]:
import tiktoken
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
enc = tiktoken.get_encoding('gpt2')
encode = lambda s: enc.encode_ordinary(s)
eot = enc._special_tokens['<|endoftext|>']

tokens = encode("What is the capital of France?")

for token in tokens:
    print(token, end=' ')

2061 318 262 3139 286 4881 30 

In [3]:
def generate_text(tokenizer, model, prompt, max_length=64):
    inputs = tokenizer(prompt, return_tensors="pt").to('cuda' if torch.cuda.is_available() else 'cpu')
    attention_mask = inputs["attention_mask"]

    outputs = model.generate(
        inputs["input_ids"], 
        attention_mask=attention_mask, 
        max_length=max_length, 
        do_sample=False, 
        # top_p=0.95, 
        # top_k=50,
        pad_token_id=tokenizer.eos_token_id,
    )
    return inputs["input_ids"], tokenizer.decode(outputs[0], skip_special_tokens=True)


In [6]:
tokenizer = AutoTokenizer.from_pretrained("../converted_model")
model = AutoModelForCausalLM.from_pretrained("../converted_model", torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32)
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
input_ids, generated_text = generate_text(tokenizer, model, "Name three types of renewable energy.")
for input_id in input_ids[0]:
    print(input_id.item(), end=' ')
print()

print(generated_text)

2061 318 262 3139 286 4881 30 
What is the capital of France?

The capital of France is Paris.

The capital of France is Paris.

The capital of France is Paris.

The capital of France is Paris.

The capital of France is Paris.

The capital of France is Paris.

The
