In [2]:
# !pip install langchain -q
# !pip install openai -q
# !pip install tiktoken -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.0/40.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import langchain
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from langchain.cache import InMemoryCache

## Models Overvie

One of the core value props of LangChain is that it provides a standard `interface` to models. This allows you to swap easily between models. At a high level, there are two main types of models:
* `Language Models` -- good for text generation
    * LLMs: wrap APIs which take text input and output.
    * ChatModels: wrap models whick take chat messages input and output.

* `Text Embedding Models` -- good for turning text into numerical representation.

In [None]:
llm = OpenAI(openai_api_key=open("openai_api.txt", "r").read())      # default is `text-davinci-003`
chat = ChatOpenAI(openai_api_key=open("openai_api.txt", "r").read()) # default is `gpt-3.5-turbo`

In [None]:
## Text -> Text

print(llm.predict("Say Hi!").strip())  # works also by `__call__()`
print(chat.predict("Say Hi!").strip()) # `__call__()` only works if we input a list of messages

Hi there!
Hello!


In [None]:
## Messages -> Message

print(llm.predict_messages([HumanMessage(content="say hi!")]))  # doesn't work with `__call__()`
print(chat.predict_messages([HumanMessage(content="say hi!")])) # also works with `__call__()`

content='\n\nRobot: Hi there! How can I help you?' additional_kwargs={} example=False
content='Hi!' additional_kwargs={} example=False


## Chat Models



In [None]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, AIMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.schema import AIMessage, HumanMessage, SystemMessage

### Messages

We get chat completions by `passing` one or more messages to the model and the result will also be a `message`.

LangChain supports:
* **AIMessage** -- the respone of the AI in one of our question | is being used for few-shot-examples.

* **HumanMessage** -- the human input | the query that we pass to the model.

* **SystemMessage** -- the context of the model that we are passing to guide its behaviour.

In [None]:
chat([HumanMessage(content="Translate this sentence from English to French. I love programming.")])

AIMessage(content="J'aime programmer.", additional_kwargs={}, example=False)

In [None]:
messages = [
    SystemMessage(content="You are a helpful assistant that translates English to French."),
    HumanMessage(content="I love programming.")
]

chat(messages)

AIMessage(content="J'adore la programmation.", additional_kwargs={}, example=False)

In [None]:
## For multiple Messages

batch_messages = [
    [
        SystemMessage(content="You are a helpful assistant that translates English to French."),
        HumanMessage(content="I love programming.")
    ],
    [
        SystemMessage(content="You are a helpful assistant that translates English to French."),
        HumanMessage(content="I love artificial intelligence.")
    ],
]

result = chat.generate(batch_messages) # doesn't work with `__call__()`

result



LLMResult(generations=[[ChatGeneration(text="J'adore programmer.", generation_info=None, message=AIMessage(content="J'adore programmer.", additional_kwargs={}, example=False))], [ChatGeneration(text="J'adore l'intelligence artificielle.", generation_info=None, message=AIMessage(content="J'adore l'intelligence artificielle.", additional_kwargs={}, example=False))]], llm_output={'token_usage': {'prompt_tokens': 57, 'completion_tokens': 18, 'total_tokens': 75}, 'model_name': 'gpt-3.5-turbo'})

we can see a lot of information that we can access about the result

### Prompt Templates

We can make use of templating by using `MessagePromptTemplate`. We can make a **ChatPromptTemplate** from one or more **MessagePromptTemplate**.

In [None]:
template = "You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [None]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

In [None]:
chat(chat_prompt.format_prompt(
    input_language = "English",
    output_language = "French",
    text = "I love programming!").to_messages())

AIMessage(content="J'adore la programmation !", additional_kwargs={}, example=False)

### Chains



In [None]:
prompt=PromptTemplate(
    template="You are a helpful assistant that translates {input_language} to {output_language}.",
    input_variables=["input_language", "output_language"]
)

system_message_prompt = SystemMessagePromptTemplate(prompt=prompt)

In [None]:
chain = LLMChain(llm=chat, prompt=chat_prompt)

In [None]:
chain.run(input_language="English", output_language="French", text="I love programming!")

"J'adore la programmation!"

### Few Shot Examples

In [None]:
## Using `AIMessage`

template="You are a helpful assistant that translates english to pirate."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

example_human = HumanMessagePromptTemplate.from_template("Hi")
example_ai = AIMessagePromptTemplate.from_template("Argh me mateys")

human_template="{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [None]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, example_human, example_ai, human_message_prompt])
chain = LLMChain(llm=chat, prompt=chat_prompt)

chain.run("I love programming.")

"I be lovin' programmin', arrr!"

In [None]:
## Using `SystemMessage`

template="You are a helpful assistant that translates english to pirate."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)

example_human = SystemMessagePromptTemplate.from_template("Hi", additional_kwargs={"name": "example_user"})
example_ai = SystemMessagePromptTemplate.from_template("Argh me mateys", additional_kwargs={"name": "example_assistant"})

human_template="{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [None]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, example_human, example_ai, human_message_prompt])
chain = LLMChain(llm=chat, prompt=chat_prompt)

chain.run("I love programming.")

"I be lovin' the art of code, me hearty."

## Text Embedding Models

The Embedding class is a class designed for `interfacing` with embeddings. There are lots of Embedding providers (OpenAI, Cohere, Hugging Face, etc) - this class is designed to provide a `standard` interface for all of them.

Embeddings create a `vector representation` of a piece of text. This is useful because it means we can think about text in the `vector space`, and do things like semantic search where we look for pieces of text that are most similar in the vector space.


The base Embedding class in LangChain exposes two methods: `embed_documents` and `embed_query`. The largest difference is that these two methods have different interfaces: one works over multiple documents, while the other works over a single document.

In [None]:
from langchain.embeddings import OpenAIEmbeddings

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=open("openai_api.txt", "r").read()) # default is `text-embedding-ada-002`

In [None]:
text = "This is a test document."

query_result = embeddings.embed_query(text)
print(type(query_result), len(query_result), query_result[:3])

<class 'list'> 1536 [-0.0031265460420399904, 0.01113363541662693, -0.004037691745907068]


In [None]:
doc_result = embeddings.embed_documents([text])

print(type(doc_result), len(doc_result), len(doc_result[0]), doc_result[0][:3])

<class 'list'> 1 1536 [-0.0031362669868111224, 0.011212612618712038, -0.004014022224356029]


## Memory Cache

It can save you money by reducing the number of API calls you make to the LLM provider, if you're often requesting the same completion multiple times. It can speed up your application by reducing the number of API calls you make to the LLM provider.

In [7]:
llm = OpenAI(model_name="gpt-3.5-turbo-instruct", n=2, best_of=2, openai_api_key=open("openai_api.txt", 'r').read())

langchain.llm_cache = InMemoryCache()

In [8]:
# The first time, it is not yet in cache, so it should take longer
llm.predict("Tell me a joke")

"\n\nWhy don't scientists trust atoms?\n\nBecause they make up everything."

In [9]:
# The second time it is, so it goes faster
llm.predict("Tell me a joke")

"\n\nWhy don't scientists trust atoms?\n\nBecause they make up everything."