In [1]:
import os
openai_api_key = os.environ["OPENAI_API_KEY"]

## Chat Messages
Like text, but specified with a message type (System, Human, AI)

- System - Helpful background context that tell the AI what to do
- Human - Messages that are intented to represent the user
- AI - Messages that show what the AI responded with
- For more, see OpenAI's documentation

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

chat = ChatOpenAI(temperature=.7, openai_api_key=openai_api_key)
chat(
    [
        SystemMessage(content="You are a nice AI bot that helps a user figure out what excercise to do in one short sentence"),
        HumanMessage(content="I want to build biceps, what excercise should I do?")
    ]
)

AIMessage(content='You should do bicep curls with dumbbells or a barbell.', additional_kwargs={}, example=False)

In [3]:
chat(
    [
        SystemMessage(content="You are a nice AI bot that helps a user figure out what excercise to do in one short sentence"),
        HumanMessage(content="I want to build biceps, what excercise should I do?"),
        AIMessage(content="You should do bicep curls with dumbbells or a barbell."),
        HumanMessage(content="What else should I do more give me detail answer?")
    ]
)

AIMessage(content='In addition to bicep curls, you can also incorporate exercises like hammer curls, chin-ups, and preacher curls to target different parts of your biceps and promote overall muscle growth.', additional_kwargs={}, example=False)

In [32]:
batch_messages = [
    [
        SystemMessage(content="You are a helpful assistant that translates English to French."),
        HumanMessage(content="I love programming.")
    ],
    [
        SystemMessage(content="You are a helpful assistant that translates English to French."),
        HumanMessage(content="I love artificial intelligence.")
    ],
]
result = chat.generate(batch_messages)
result

LLMResult(generations=[[ChatGeneration(text="J'adore la programmation.", generation_info=None, message=AIMessage(content="J'adore la programmation.", additional_kwargs={}, example=False))], [ChatGeneration(text="J'adore l'intelligence artificielle.", generation_info=None, message=AIMessage(content="J'adore l'intelligence artificielle.", additional_kwargs={}, example=False))]], llm_output={'token_usage': {'prompt_tokens': 53, 'completion_tokens': 20, 'total_tokens': 73}, 'model_name': 'gpt-3.5-turbo'}, run=[RunInfo(run_id=UUID('fae350c7-b304-4d24-bdfe-aa624212edca')), RunInfo(run_id=UUID('b41ec074-bdd4-4361-b134-e07cd1568874'))])

In [None]:
dir(result)

In [28]:
# result.schema()
# result.json()
result

LLMResult(generations=[[ChatGeneration(text="J'adore la programmation.", generation_info=None, message=AIMessage(content="J'adore la programmation.", additional_kwargs={}, example=False))], [ChatGeneration(text="J'adore l'intelligence artificielle.", generation_info=None, message=AIMessage(content="J'adore l'intelligence artificielle.", additional_kwargs={}, example=False))]], llm_output={'token_usage': {'prompt_tokens': 53, 'completion_tokens': 20, 'total_tokens': 73}, 'model_name': 'gpt-3.5-turbo'}, run=[RunInfo(run_id=UUID('49356043-d4f1-43b3-aa46-cc0f509956ff')), RunInfo(run_id=UUID('04066d6a-6566-4e09-90c6-03afbb3e899d'))])

In [33]:
data = result.dict()
generations = data['generations']
llm_output = data['llm_output']
token_usage = llm_output['token_usage']
model_name = llm_output['model_name']
runs = data['run']

print("Generations:")
for generation in generations:
    for res in generation:
        message_content = res['message']['content']
        print(message_content)

print("\nToken Usage:")
print("Prompt Tokens:", token_usage['prompt_tokens'])
print("Completion Tokens:", token_usage['completion_tokens'])
print("Total Tokens:", token_usage['total_tokens'])

print("\nModel Name:", model_name)

print("\nRuns:")
for run in runs:
    run_id = run['run_id']
    print(run_id)


Generations:
J'adore la programmation.
J'adore l'intelligence artificielle.

Token Usage:
Prompt Tokens: 53
Completion Tokens: 20
Total Tokens: 73

Model Name: gpt-3.5-turbo

Runs:
fae350c7-b304-4d24-bdfe-aa624212edca
b41ec074-bdd4-4361-b134-e07cd1568874


## Documents
An object that holds a piece of text and metadata (more information about that text)


In [35]:
from langchain.schema import Document

Document(page_content="This is my document. It is full of text that I've gathered from other places",
         metadata={
             'my_document_id' : 234234,
             'my_document_source' : "The LangChain Papers",
             'my_document_create_time' : 1680013019
         })

Document(page_content="This is my document. It is full of text that I've gathered from other places", metadata={'my_document_id': 234234, 'my_document_source': 'The LangChain Papers', 'my_document_create_time': 1680013019})

# Models - The interface to the AI brains
## Language Model
A model that does text in ➡️ text out!

Check out how I changed the model I was using from the default one to ada-001. See more models here

In [None]:
!pip install tiktoken

In [46]:
import tiktoken
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [47]:
encoding = tiktoken.get_encoding("cl100k_base")

num_tokens_from_string("hello", "cl100k_base")

1

In [37]:
from langchain.chat_models import ChatOpenAI
import tiktoken

llm = ChatOpenAI(temperature=7, model="gpt-3.5-turbo", openai_api_key=openai_api_key)
llm("What day comes after Friday?")



'Saturday comes after Friday.'

## Chat Model
A model that takes a series of messages and returns a message output

In [49]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

chat = ChatOpenAI(temperature=1, openai_api_key=openai_api_key)
output = chat(
    [
        SystemMessage(content="You are an unhelpful AI bot that makes a joke at whatever the user says"),
        HumanMessage(content="I would like to go to New York, how should I do this?")
    ]
)

In [50]:
dir(output)

['Config',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__class_vars__',
 '__config__',
 '__custom_root_type__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__exclude_fields__',
 '__fields__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_validators__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__include_fields__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__json_encoder__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__post_root_validators__',
 '__pre_root_validators__',
 '__pretty__',
 '__private_attributes__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__repr_args__',
 '__repr_name__',
 '__repr_str__',
 '__rich_repr__',
 '__schema_cache__',
 '__setattr__',
 '__setstate__',
 '__signature__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__try_update_forward_refs__',
 '__validators__',
 '_abc_impl',
 '_calculate_keys',
 '_copy_and_set_values',
 '_decompose_class',
 '_enfo

In [51]:
output.dict()

{'content': "Why don't you just teleport there? It's the fastest and cheapest way to travel, on a scale from totally impossible to completely nonexistent. Have fun!",
 'additional_kwargs': {},
 'example': False}

## Text Embedding Model
Change your text into a vector (a series of numbers that hold the semantic 'meaning' of your text). Mainly used when comparing two pieces of text together.

BTW: Semantic means 'relating to meaning in language or logic.'


In [52]:

from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
text = "Hi! It's time for the beach"
text_embedding = embeddings.embed_query(text)
print (f"Your embedding is length {len(text_embedding)}")
print (f"Here's a sample: {text_embedding[:5]}...")

Your embedding is length 1536
Here's a sample: [-0.00011466221621958539, -0.0031506523955613375, -0.0007831145194359124, -0.019504327327013016, -0.015125557780265808]...


## Prompts - Text generally used as instructions to your model
Prompt
What you'll pass to the underlying model

In [58]:
# managing prompts
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=1, model="gpt-3.5-turbo", openai_api_key=openai_api_key)


prompts = PromptTemplate(input_variables=["movname"],
                        template="Tell me about the {movname} new inspiring movie idea")

movres = llm.predict(prompts.format(movname="avengers"))
print(llm.predict(prompts.format(movname="avengers")))

Title: "Avengers: Legacy's Revival"

Synopsis:
"Avengers: Legacy's Revival" is an epic, inspiring movie that follows Earth's mightiest heroes, the Avengers, as they face unprecedented challenges while striving to protect the planet and the universe itself. In this installment, the Avengers must confront their deepest fears, reconnect with their true purpose, and rediscover their collective power to save humanity.

Plot:
The movie begins with the Avengers scattered and disheartened after their devastating defeat against their arch-nemesis, Thanos. The world is still reeling from the enormous loss and desperately needs the heroes to return. However, each Avenger now struggles with their personal struggles and a crisis of faith in their role as protectors.

A new threat looms as a mysterious, malevolent force, known as The Forgotten One, begins to sow chaos and destruction across the planet. The Avengers, including Iron Man, Captain America, Thor, Black Widow, Hulk, and Hawkeye, must put 

In [59]:
movres

'Title: "Avengers: Dawn of Destiny"\n\nPlot Summary:\n"Avengers: Dawn of Destiny" brings together Earth\'s mightiest heroes once again, as they face their greatest challenge yet. The movie opens with the Avengers scattered and disheartened, struggling to cope with their recent losses at the hands of a formidable villain, Dark Matter.\n\nFeeling responsible for the world\'s current state, Tony Stark gathers the remaining Avengers to devise a plan to restore hope and inspiration to humanity. They realize that they must not only  defeat Dark Matter but also rekindle the world\'s belief in heroes. In a surprising turn of events, they come across an ancient prophecy that foretells the appearance of a chosen one, who possesses the power to bring balance back to the universe.\n\nThe Avengers embark on a global quest to find this chosen one, encountering new allies and tackling powerful adversaries along the way. Each character confronts personal and emotional challenges, ultimately realizing 

## Example Selectors
An easy way to select from a series of examples that allow you to dynamic place in-context information into your prompt. Often used when your task is nuanced or you have a large list of examples.

Check out different types of example selectors here

If you want an overview on why examples are important (prompt engineering), check out this video

In [60]:
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003", openai_api_key=openai_api_key)

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="Example Input: {input}\nExample Output: {output}",
)

# Examples of locations that nouns are found
examples = [
    {"input": "pirate", "output": "ship"},
    {"input": "pilot", "output": "plane"},
    {"input": "driver", "output": "car"},
    {"input": "tree", "output": "ground"},
    {"input": "bird", "output": "nest"},
]

In [65]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.7.4-cp310-cp310-win_amd64.whl (10.8 MB)
     ---------------------------------------- 10.8/10.8 MB 3.3 MB/s eta 0:00:00
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4


In [66]:
# SemanticSimilarityExampleSelector will select examples that are similar to your input by semantic meaning

example_selector = SemanticSimilarityExampleSelector.from_examples(
    # This is the list of examples available to select from.
    examples, 
    
    # This is the embedding class used to produce embeddings which are used to measure semantic similarity.
    OpenAIEmbeddings(openai_api_key=openai_api_key), 
    
    # This is the VectorStore class that is used to store the embeddings and do a similarity search over.
    FAISS, 
    
    # This is the number of examples to produce.
    k=2
)

In [67]:
similar_prompt = FewShotPromptTemplate(
    # The object that will help select examples
    example_selector=example_selector,
    
    # Your prompt
    example_prompt=example_prompt,
    
    # Customizations that will be added to the top and bottom of your prompt
    prefix="Give the location an item is usually found in",
    suffix="Input: {noun}\nOutput:",
    
    # What inputs your prompt will receive
    input_variables=["noun"],
)
# Select a noun!
my_noun = "student"

print(similar_prompt.format(noun=my_noun))  

Give the location an item is usually found in

Example Input: driver
Example Output: car

Example Input: pilot
Example Output: plane

Input: student
Output:


In [68]:
print(llm(similar_prompt.format(noun=my_noun)))  

 classroom


## Output Parsers
A helpful way to format the output of a model. Usually used for structured output.

Two big concepts:

1. Format Instructions - A autogenerated prompt that tells the LLM how to format it's response based off your desired result

2. Parser - A method which will extract your model's text output into a desired structure (usually json)

In [74]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
llm = OpenAI(model_name="text-davinci-003", openai_api_key=openai_api_key)

In [70]:
# How you would like your response structured. This is basically a fancy prompt template
response_schemas = [
    ResponseSchema(name="bad_string", description="This a poorly formatted user input string"),
    ResponseSchema(name="good_string", description="This is your response, a reformatted response")
]

# How you would like to parse your output
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
# See the prompt template you created for formatting
format_instructions = output_parser.get_format_instructions()
print (format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"bad_string": string  // This a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```


In [71]:
template = """
You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

{format_instructions}

% USER INPUT:
{user_input}

YOUR RESPONSE:
"""

prompt = PromptTemplate(
    input_variables=["user_input"],
    partial_variables={"format_instructions": format_instructions},
    template=template
)

promptValue = prompt.format(user_input="welcom to califonya!")

print(promptValue)


You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"bad_string": string  // This a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```

% USER INPUT:
welcom to califonya!

YOUR RESPONSE:



In [78]:
llm_output = llm(promptValue)
llm_output

'```json\n{\n\t"bad_string": "welcom to califonya!",\n\t"good_string": "Welcome to California!"\n}\n```'

In [77]:
llm_output = llm(promptValue)

output_parser.parse(llm_output)

OutputParserException: Got invalid JSON object. Error: Expecting ',' delimiter: line 3 column 2 (char 41)

## Indexes - Structuring documents to LLMs can work with them
Document Loaders
Easy ways to import data from other sources. Shared functionality with OpenAI Plugins specifically retrieval plugins

See a big list of document loaders here. A bunch more on Llama Index as well.


In [79]:

from langchain.document_loaders import HNLoader
loader = HNLoader("https://news.ycombinator.com/item?id=34422627")
data = loader.load()
print (f"Found {len(data)} comments")
print (f"Here's a sample:\n\n{''.join([x.page_content[:150] for x in data[:2]])}")

Found 76 comments
Here's a sample:

Ozzie_osman 5 months ago  
             | next [–] 

LangChain is awesome. For people not sure what it's doing, large language models (LLMs) are very Ozzie_osman 5 months ago  
             | parent | next [–] 

Also, another library to check out is GPT Index (https://github.com/jerryjliu/gpt_index)


## Text Splitters
Often times your document is too long (like a book) for your LLM. You need to split it up into chunks. Text splitters help with this.

There are many ways you could split your text into chunks, experiment with different ones to see which is best for you.



In [80]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
# This is a long document we can split up.
with open('../worked.txt') as f:
    pg_work = f.read()
    
print (f"You have {len([pg_work])} document")

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 150,
    chunk_overlap  = 20,
)

texts = text_splitter.create_documents([pg_work])
print (f"You have {len(texts)} documents")

You have 1 document
You have 82 documents


In [81]:
print ("Preview:")
print (texts[0].page_content, "\n")
print (texts[1].page_content)

Preview:
Skip to main content
🦜️🔗 LangChain
JS/TS Docs
GitHub


Search...
CTRL
K
Get started
Introduction
Installation
Quickstart
Tutorials
Modules
Model I/​O 

Data connection

Document loaders

Document transformers


## Retrievers
Easy way to combine documents with language models.

There are many different types of retrievers, the most widely supported is the VectoreStoreRetriever

In [83]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

loader = TextLoader('..//worked.txt')
documents = loader.load()
# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(documents)

# Get embedding engine ready
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

# Embedd your texts
db = FAISS.from_documents(texts, embeddings)
# Init your retriever. Asking for just 1 document back
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=None, metadata=None, vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x0000013E140B1480>, search_type='similarity', search_kwargs={})