# Text Generation
prompt_tuner is set up for convenient text generation with transformers generation and sampling tools.

You can load a SoftPrompt object and simply concatenate it into your context string, where it shows up as a human-readable tag that tokenizes to the underlying number of tokens.

In [1]:
#@title Setup for Colab only
%pip install transformers
%pip install git+https://github.com/AlpinDale/prompt-tuner.git#egg=master --log PIP_LOG

In [2]:
from transformers.pipelines import pipeline
from prompt_tuner.inference import LlamaSoftPromptLM
from prompt_tuner.tokenizers import LlamaSPTokenizerFast
from prompt_tuner.soft_prompt import SoftPrompt
import torch

In [3]:
# You'll need to instantiate prompt_tuner's model and tokenizer classes.
model = GPT2SoftPromptLM.from_pretrained("NousResearch/Llama-2-7b-hf")
tokenizer = LlamaSoftPromptLM.from_pretrained("NousResearch/Llama-2-7b-hf")
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)

In [4]:
# SoftPrompts may be loaded in several ways.
# sp = SoftPrompt.from_json(json_str)
# sp = SoftPrompt.from_inputs_embeds(inputs_embeds)
# sp = SoftPrompt.from_tuning_model(model)
sp = SoftPrompt.from_string("With the court firmly balkanized into three distinct factions, Princess Charlotte had her work cut out for her.",
                             model=model, tokenizer=tokenizer)

#sp = SoftPrompt.from_file("sample_sps/finetune/neuromancer_NousResearch/Llama-2-7b-hf.json")

In [None]:
# Information about a SoftPrompt can be printed with
print(sp)

In [6]:
# Use 'get_tag_str()' to get a tag string that you can add to your context.
prompt = sp.get_tag_str() + "She"

# The addition operator also works:
# prompt = sp + "Armitage"

In [None]:
# The tag string conveniently contains the SP's name, part of its GUID,
# and a series of '@'s which represent individual soft tokens.
print(prompt)

In [None]:
# The tag string tokenizes to the correct length budget your context.
prompt_len = len(tokenizer.encode(prompt))
print(f"Length of soft prompt: {len(tokenizer.encode(sp.get_tag_str()))}")
print(f"Length of full prompt: {prompt_len}")

In [None]:
# Generation is as usual.
output = generator( prompt,
                    do_sample=True,
                    min_length=prompt_len+100,
                    max_length=prompt_len+100,
                    repetition_penalty=1.7,
                    top_p=0.8,
                    temperature=0.7,
                    use_cache=True,
                    return_full_text=True)

print(output[0]['generated_text'])

In [None]:
# It is also fine to use generate() instead of a pipeline.
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

# This also demonstrates multiple return sequences.
output = model.generate(input_ids,
                        do_sample=True,
                        min_length=prompt_len+100,
                        max_length=prompt_len+100,
                        repetition_penalty=1.7,
                        top_p=0.8,
                        temperature=0.7,
                        use_cache=True,
                        return_full_text=True,
                        num_return_sequences=3)

print(tokenizer.decode(output[0]))
print(tokenizer.decode(output[1]))
print(tokenizer.decode(output[2]))

In [None]:
# If you decide to write your own sampler instead of a pipeline,
# be aware that the special token logits will be very high.
# This is accounted for when using model.generate() or pipelines,
# but you will need to exclude special tokens when sampling.

output = model(input_ids)