In [2]:
import torch
import numpy as np
from transformers import GPT2Tokenizer, GPT2LMHeadModel, top_k_top_p_filtering

# load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('sshleifer/tiny-gpt2')
model = GPT2LMHeadModel.from_pretrained('sshleifer/tiny-gpt2')


# define prefix
prefix = "hel"

# tokenize prefix and convert to tensor
input_ids = torch.tensor(tokenizer.encode(prefix)).unsqueeze(0)

# generate completions
max_length = len(prefix) + 5 # set maximum length of completion
temperature = 1.0 # set sampling temperature
top_k = 10 # set top-k sampling value
top_p = 0.95 # set top-p sampling value
eos_token_id = tokenizer.eos_token_id # get end-of-sequence token id

# generate logits for next token using model
logits = model(input_ids)[0][:, -1, :]
# apply top-k and top-p filtering to logits
filtered_logits = top_k_top_p_filtering(logits, top_k=top_k, top_p=top_p)
# sample next token from filtered logits
probabilities = torch.softmax(filtered_logits / temperature, dim=-1)
next_token = torch.multinomial(probabilities, num_samples=1)

# append next token to output tensor
output = torch.cat((input_ids, next_token), dim=1)

# generate sequence of tokens
for i in range(max_length):
    # generate logits for next token using model
    logits = model(output)[0][:, -1, :]
    # apply top-k and top-p filtering to logits
    filtered_logits = top_k_top_p_filtering(logits, top_k=top_k, top_p=top_p)
    # sample next token from filtered logits
    probabilities = torch.softmax(filtered_logits / temperature, dim=-1)
    next_token = torch.multinomial(probabilities, num_samples=1)
    # append next token to output tensor
    output = torch.cat((output, next_token), dim=1)
    # check if end-of-sequence token is generated
    if next_token == eos_token_id:
        break

# decode and print completion
completion = tokenizer.decode(output[0], skip_special_tokens=True)
print(completion)

hel stairs intermittent Habit hauled hauledRocketdit Habit vendors


In [3]:
import torch
import numpy as np
from transformers import GPT2Tokenizer, GPT2LMHeadModel, top_k_top_p_filtering

# load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('sshleifer/tiny-gpt2')
model = GPT2LMHeadModel.from_pretrained('sshleifer/tiny-gpt2')

# define prefix
prefix = "hel"

# tokenize prefix and convert to tensor
input_ids = torch.tensor(tokenizer.encode(prefix)).unsqueeze(0)

# generate completions
max_length = len(prefix) + 10 # set maximum length of completion
temperature = 1.0 # set sampling temperature
top_k = 50 # set top-k sampling value
top_p = 0.95 # set top-p sampling value
eos_token_id = tokenizer.eos_token_id # get end-of-sequence token id

for i in range(10): # generate 10 completions
    output = input_ids # initialize output tensor
    for _ in range(max_length):
        # generate logits for next token using model
        logits = model(output)[0][:, -1, :]
        # apply top-k and top-p filtering to logits
        filtered_logits = top_k_top_p_filtering(logits, top_k=top_k, top_p=top_p)
        # sample next token from filtered logits
        probabilities = torch.softmax(filtered_logits / temperature, dim=-1)
        next_token = torch.multinomial(probabilities, num_samples=1)
        # append next token to output tensor
        output = torch.cat((output, next_token), dim=1)
        # check if end-of-sequence token is generated
        if next_token == eos_token_id:
            break
    # decode and print completion
    completion = tokenizer.decode(output[0], skip_special_tokens=True)
    print(completion)


hel Prob Participation directly credibility circumcisedmediately conservation ProbJD heir TAmediately intermittent
hel Observoho ONE Participation Hancock autonomy hauled ONE Observ Prob trilogy circumcised Rh
hel reviewingScene Rhdit intermittent credibilityRocket Brew Jr stairs circumcised ESV heir
hel confiriken intermittent credibility trilogy trilogy scalppress Hancock TAreement subst ONE
helmediately TAhibit ONE hauled stairsoother circumciseddit Brew Habit intermittentdit
hel antibioticmediately stairs conservationSher004 Prob antibiotic antibioticting circumcised Participation Motorola
hel vendors trilogy Brew directlyRocket heir Observ conservation intermittentatisf hauledootherpress
hel TA TA pawn Observpress ObservJDootherpress autonomyoho Moneyimura
hel Jratisf antibioticreement vendors reviewing antibioticRocket directly Observoother scalp Observ
heltingSheratisf hauled confirJDRocketmediately reviewing Rh confir Hancock ONE


In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('sshleifer/tiny-gpt2')
model = AutoModelForCausalLM.from_pretrained('sshleifer/tiny-gpt2')

# define prefix
prefix = "hel"

# tokenize prefix and convert to tensor
input_ids = tokenizer.encode(prefix, return_tensors='pt')

# generate completions
max_length = len(prefix) + 10 # set maximum length of completion
temperature = 1.0 # set sampling temperature
top_k = 50 # set top-k sampling value
top_p = 0.95 # set top-p sampling value

output = model.generate(
    input_ids=input_ids,
    max_length=max_length,
    temperature=temperature,
    top_k=top_k,
    top_p=top_p,
    do_sample=True,
    num_return_sequences=10,
)

# decode and print completions
completions = tokenizer.batch_decode(output, skip_special_tokens=True)
print(completions)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


['helimura subst Prob Daniel reviewing dispatchpressdit stairs directlySherdit', 'hel Observ ESV pawn substimura dispatch Observ antibiotic directly ESV Motorola hauled', 'hel MotorolaRocketoho Jroho Rh Brew hauled Observ ONE confiroho', 'helhibit TA hauled confir ProbJDikenootherpress Money scalp heir', 'helhibitatisf Brew Brew hauledSceneScene autonomy HabitRocketimuraSher', 'hel ESV Habit TA Brew Prob004 autonomy Jr vendorsatisf Hancock ESV', 'hel Habit Motorola reviewingoother subst Jr Daniel subst stairs antibioticikenScene', 'hel Daniel directlyRocket intermittent autonomy dispatch dispatch004press confir Observ conservation', 'hel004 heirhibit confir autonomypressiken MotorolaJD Danielhibit confir', 'hel intermittentpress confir Hancockditreement ESV conservation Jr stairs vendors intermittent']
