In [26]:
# !pip install langchain -q
# !pip install openai -q
# !pip install tiktoken -q
# !pip install faiss-gpu -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [22]:
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.prompts import load_prompt
from langchain import FewShotPromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector, MaxMarginalRelevanceExampleSelector, SemanticSimilarityExampleSelector
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate, ChatMessagePromptTemplate
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

In [16]:
## Setting API keys

with open("openai_api.txt", "r") as f:
    OPENAI_API = f.read()

type(OPENAI_API)

str

## Power of Prompt Engineering

Traditionaly we would fine-tune a Language Model to solve a downstream taks. This implies that our models can only solve the task they have been trained on. LangChain and Large Language Models (LLM) give us a new perspective.

We can, now, `prompt` a LLM to solve any dawnstream task. It's possible due to the way those model have beed trained. They have been trained to receive text as input and output once again text.

So programming models is as simple as prompting the LLM to solve the task we are aiming for. A `prompt` refers to the input to the model. This input is often constructed from multiple components, like a `PromptTemplate`, which is responsible for the construction of this input. LangChain provides several classes and functions to make constructing and working with prompts easy.

In this new age of LLMs, `prompts` are kings. Bad prompts produce bad outputs, and good prompts are unreasonably powerful. Constructing good prompts is a crucial skill for those building with LLMs.

## Prompt Structure

A prompt is typically composed of multiple parts:

* `Instructions` tell the model what to do, how to use external information if provided, what to do with the query, and how to construct the output.

* `External information` (context) act as an additional source of knowledge for the model. These can be manually inserted into the prompt, retrieved via a vector database (retrieval augmentation), or pulled in via other means (APIs, calculations, etc.).

* `User input` (query) is typically (but not always) a query input into the system by a human user (the prompter).

* `Output indicator` marks the beginning of the to-be-generated text. If generating Python code, we may use import to indicate to the model that it must begin writing Python code.

Each component is usually placed in the prompt in this order. Starting with instructions, external information (where applicable), prompter input, and finally, the output indicator.

## Prompt Template

A `prompt template` refers to a reproducible way to generate a prompt. It contains a text string (“the template”), that can take in a set of parameters from the end user and generate a prompt.

The prompt template may contain:
* `instructions` to the LLM
* a set of few shot `examples` to help the LLM generate better responses
* a `question` to the LLM

In [None]:
## Setting the Template

template = """
I want you to act as a naming consultant for new companies.
What is a good name for a company that makes {product}?
"""

## Creating the Prompt Object

prompt = PromptTemplate(
    template = template,
    input_variables = ["product"]
)

## Seeing the Prompt Components

for k in prompt:
    print(k)

## Filling `product`

print(prompt.format(product = "AI Software"))

('input_variables', ['product'])
('output_parser', None)
('partial_variables', {})
('template', '\nI want you to act as a naming consultant for new companies.\nWhat is a good name for a company that makes {product}?\n')
('template_format', 'f-string')
('validate_template', True)

I want you to act as a naming consultant for new companies.
What is a good name for a company that makes AI Software?



In [None]:
## We can also create a Prompt Template only using a Template

template = """
I want you to act as a naming consultant for new companies.
What is a good name for a company that makes {product}?
"""

prompt = PromptTemplate.from_template(template)

for k in prompt:
    print(k)

## Filling `product`

print(prompt.format(product = "AI Software"))

('input_variables', ['product'])
('output_parser', None)
('partial_variables', {})
('template', '\nI want you to act as a naming consultant for new companies.\nWhat is a good name for a company that makes {product}?\n')
('template_format', 'f-string')
('validate_template', True)

I want you to act as a naming consultant for new companies.
What is a good name for a company that makes AI Software?



In [3]:
prompt_template = PromptTemplate.from_template(
    "Tell me a {adjective} joke about {content}."
)
prompt_template.format(adjective="funny", content="chickens")

'Tell me a funny joke about chickens.'

By default `PromptTemplate` will validate the template string by checking whether the `input_variables` match the variables defined in `template`

In [None]:
## We can disable that using:

prompt = PromptTemplate(
    template=template,
    input_variables=["product", "foo"],
    validate_template=False
)

for k in prompt:
    print(k)

print(prompt.format(product = "AI Software"))

('input_variables', ['product', 'foo'])
('output_parser', None)
('partial_variables', {})
('template', '\nI want you to act as a naming consultant for new companies.\nWhat is a good name for a company that makes {product}?\n')
('template_format', 'f-string')
('validate_template', False)

I want you to act as a naming consultant for new companies.
What is a good name for a company that makes AI Software?



In [None]:
## Saving and Loading a Prompt Template

prompt.save("simple_prompt.json")

In [None]:
loadded_prompt = load_prompt("simple_prompt.json")

assert loadded_prompt == prompt

## Few Shot Examples

The success of LLMs comes from their large size and ability to store `knowledge` within the model parameter. However, there are more ways to pass knowledge to an LLM. The two primary methods are:
* `Parametric knowledge` — anything that has been learned by the model during training time and is stored within the model parameters.

* `Source knowledge` — any knowledge provided to the model at inference time via the input prompt.

Langchain's `FewShotPromptTemplate` caters to source knowledge input. The idea is to "train" the model with some examples to guide how it should think to generate better responses.

`FewShotPromptTemplate` takes in a `PromptTemplate` and a list of few shot examples. It then formats the prompt template with the few shot examples.

Few-shot learning is perfect when our model needs help understanding what we're asking it to do.

In [None]:
## Setting Examples

examples = [
    {"product": "colourful socks", "name": "Cute Socks"},
    {"product": "AI Software", "name": "BrainAI"},
]

## Creating the Prompt for each Example

example_template = """
Product: {product}
Name: {name}
"""

example_prompt = PromptTemplate(
    template = example_template,
    input_variables = ["product", "name"]
)

## Creating the Few Shot Example Object

few_shot_prompt = FewShotPromptTemplate(
    examples = examples,
    example_prompt = example_prompt,
    prefix = "Provide a name to a company according to the product it is developing",
    suffix = "Product: {input}\nName: ",
    input_variables = ["input"],
    example_separator = "\n"
)


## Printing the Prompt

for k in few_shot_prompt:
    print(k)

print()
print(few_shot_prompt.format(input="Suits"))

('input_variables', ['input'])
('output_parser', None)
('partial_variables', {})
('examples', [{'product': 'colourful socks', 'name': 'Cute Socks'}, {'product': 'AI Software', 'name': 'BrainAI'}])
('example_selector', None)
('example_prompt', PromptTemplate(input_variables=['product', 'name'], output_parser=None, partial_variables={}, template='\nProduct: {product}\nName: {name}\n', template_format='f-string', validate_template=True))
('suffix', 'Product: {input}\nName: ')
('example_separator', '\n')
('prefix', 'Provide a name to a company according to the product it is developing')
('template_format', 'f-string')
('validate_template', True)

Provide a name to a company according to the product it is developing

Product: colourful socks
Name: Cute Socks


Product: AI Software
Name: BrainAI

Product: Suits
Name: 


## Select Examples

If you have a large number of examples, you can use the `ExampleSelector` to select a subset of examples that will be most informative for the Language Model.

This is useful when you are worried about constructing a prompt that will go over the length of the context window. For longer inputs, it will select fewer examples to include, while for shorter inputs it will select more.

In [None]:
## Setting Examples

examples = [
    {"product": "cars", "name": "carX"},
    {"product": "airplanes", "name": "BlueSky"},
    {"product": "batteries", "name": "Energetic"},
    {"product": "AI software", "name": "SonicAI"},
    {"product": "computers", "name": "FututeCMP"},
]

## Creating Example Prompt
example_template = """
Product: {product}
Name: {name}
"""
example_prompt = PromptTemplate(
    template = example_template,
    input_variables = ["product", "name"]
)

## Creating an Example Selector

example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=25 # `get_text_length` function, default is `Callable[[str], int] = lambda x: len(re.split("\n| ", x))`
    # we can set ourselfs this function (e.g. according to the tokens)
)

for k in example_selector:
    print(k)
print()

## Creating the final Template

dynamic_prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix = "Provide a name to a company according to the product it is developing",
    suffix = "Product: {input}\nName: ",
    input_variables = ["input"],
    example_separator = "\n"
)

for k in dynamic_prompt:
    print(k)
print()

## Seeing in Action

long_string = "apples, oranges, lemons, computers, cars, airplanes, web-sites, all kind of software and books"

print(dynamic_prompt.format(input = long_string))

('examples', [{'product': 'cars', 'name': 'carX'}, {'product': 'airplanes', 'name': 'BlueSky'}, {'product': 'batteries', 'name': 'Energetic'}, {'product': 'AI software', 'name': 'SonicAI'}, {'product': 'computers', 'name': 'FututeCMP'}])
('example_prompt', PromptTemplate(input_variables=['product', 'name'], output_parser=None, partial_variables={}, template='\nProduct: {product}\nName: {name}\n', template_format='f-string', validate_template=True))
('get_text_length', <function _get_length_based at 0x7f97724dfac0>)
('max_length', 25)
('example_text_lengths', [6, 6, 6, 7, 6])

('input_variables', ['input'])
('output_parser', None)
('partial_variables', {})
('examples', None)
('example_selector', LengthBasedExampleSelector(examples=[{'product': 'cars', 'name': 'carX'}, {'product': 'airplanes', 'name': 'BlueSky'}, {'product': 'batteries', 'name': 'Energetic'}, {'product': 'AI software', 'name': 'SonicAI'}, {'product': 'computers', 'name': 'FututeCMP'}], example_prompt=PromptTemplate(input

there are a lot more Example Selectors that we can use, some are `SemanticSimilarityExampleSelector`, `MaxMarginalRelevanceExampleSelector` and `NGramOverlapExampleSelector`.

In [28]:
## We will see the `MaxMarginalRelevanceExampleSelector`

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="Input: {input}\nOutput: {output}",
)

examples = [
    {"input": "happy", "output": "sad"},
    {"input": "tall", "output": "short"},
    {"input": "energetic", "output": "lethargic"},
    {"input": "sunny", "output": "gloomy"},
    {"input": "windy", "output": "calm"},
]

example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
    examples = examples,
    # The embedding class used to produce embeddings which are used to measure semantic similarity.
    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API),
    # The VectorStore class that is used to store the embeddings and do a similarity search over.
    vectorstore_cls = FAISS,
    # The number of examples to produce.
    k = 2,
)
mmr_prompt = FewShotPromptTemplate(
    # We provide an ExampleSelector instead of examples.
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="Give the antonym of every input",
    suffix="Input: {adjective}\nOutput:",
    input_variables=["adjective"]
)

# Input is a feeling, so should select the happy/sad example as the first one
print(mmr_prompt.format(adjective="worried"))

Give the antonym of every input

Input: happy
Output: sad

Input: windy
Output: calm

Input: worried
Output:


In [30]:
# Let's compare this to what we would just get if we went solely off of similarity
example_selector = SemanticSimilarityExampleSelector.from_examples(
    # The list of examples available to select from.
    examples = examples,
    # The embedding class used to produce embeddings which are used to measure semantic similarity.
    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API),
    # The VectorStore class that is used to store the embeddings and do a similarity search over.
    vectorstore_cls = FAISS,
    # The number of examples to produce.
    k=2,
)

similar_prompt = FewShotPromptTemplate(
    # We provide an ExampleSelector instead of examples.
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix="Give the antonym of every input",
    suffix="Input: {adjective}\nOutput:",
    input_variables=["adjective"],
)
print(similar_prompt.format(adjective="worried"))

Give the antonym of every input

Input: happy
Output: sad

Input: sunny
Output: gloomy

Input: worried
Output:


## Generating Predictions

In [None]:
## Initializing a LLM

davinci = OpenAI(
    model_name="text-davinci-003",
    openai_api_key=OPENAI_API
)

In [None]:
template = """
I want you to act as a naming consultant for new companies.
What is a good name for a company that makes {product}?
"""
prompt = PromptTemplate(
    template = template,
    input_variables = ["product"]
)

print(davinci(prompt.format(product = "AI Software")).strip())
print(davinci(prompt.format(product = "Super Cars")).strip())

Intellimind Technologies.
Speed Admirals.


In [None]:
print(davinci(few_shot_prompt.format(query = "I like to wear suits")))

 Oh, so you're the snappy dresser type?


In [None]:
long_string = "apples, oranges, lemons, computers, cars, airplanes, web-sites, all kind of software and books"

print(davinci(dynamic_prompt.format(input = long_string)))

 AllThingsTech


## Chat Prompt Template

`Chat Models` takes a list of chat messages as input - this list commonly referred to as `prompt`. These chat messages differ from raw string (which you would pass into a LLM model) in that every message is associated with a `role`.

A role can be AI, Human or System. he model is supposed to follow instruction from system chat message more closely.

To create a message template associated with a role, you use `MessagePromptTemplate`.

In [7]:
## Creating System Prompt
template="You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

## Creating a Human Prompt
human_template="{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [8]:
## More directly we can pass a Prompt Tempate to those Prompts

prompt=PromptTemplate(
    template="You are a helpful assistant that translates {input_language} to {output_language}.",
    input_variables=["input_language", "output_language"],
)
system_message_prompt_2 = SystemMessagePromptTemplate(prompt=prompt)

assert system_message_prompt == system_message_prompt_2

In [None]:
## Then we can create a Chat Prompt Template from one or more Message Prompt Templates

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

## Printing the Prompt

for k in chat_prompt:
    print(k)
print()

## Completing the Prompt
chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.")

('input_variables', ['text', 'input_language', 'output_language'])
('output_parser', None)
('partial_variables', {})
('messages', [SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input_language', 'output_language'], output_parser=None, partial_variables={}, template='You are a helpful assistant that translates {input_language} to {output_language}.', template_format='f-string', validate_template=True), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['text'], output_parser=None, partial_variables={}, template='{text}', template_format='f-string', validate_template=True), additional_kwargs={})])



ChatPromptValue(messages=[SystemMessage(content='You are a helpful assistant that translates English to French.', additional_kwargs={}), HumanMessage(content='I love programming.', additional_kwargs={}, example=False)])

In [None]:
## Specify Role

template = "May the {subject} be with you"

chat_message_prompt = ChatMessagePromptTemplate.from_template(role="Jedi", template=template)

chat_message_prompt.format(subject="force")

ChatMessage(content='May the force be with you', additional_kwargs={}, role='Jedi')

In [6]:
## A Complete Example

template = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI bot. Your name is {name}."),
    ("human", "Hello, how are you doing?"),
    ("ai", "I'm doing well, thanks!"),
    ("human", "{user_input}")
])

messages = template.format_messages(
    name="Bob",
    user_input="What is your name?"
)

messages

[SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
 HumanMessage(content='Hello, how are you doing?'),
 AIMessage(content="I'm doing well, thanks!"),
 HumanMessage(content='What is your name?')]