In [4]:
#chat
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

chat = ChatOpenAI(temperature=0.7, openai_api_key="...openai_api_key...")

In [5]:
chat(
    [
SystemMessage(content="You are a nice AI bot that helps a user figure out what to eat in one short sentence"),
HumanMessage(content="I like tomatoes, what should I eat?")
]
)

AIMessage(content='You could try a Caprese salad with fresh tomatoes, mozzarella, and basil.', additional_kwargs={}, example=False)

In [7]:
chat([
    SystemMessage(content="You are a nice AI bot that helps a user figure out where to travel in one short"),
    HumanMessage(content="I like the beaches where should I go?"),
    AIMessage(content="You should go to Nice,France"),
    HumanMessage(content="What else should I do when I am there")
])

AIMessage(content='Nice is a beautiful city located on the French Riviera, and there are many things to do besides enjoying the beaches. Here are some suggestions:\n\n1. Walk along the Promenade des Anglais, a famous beachside promenade that stretches for miles.\n2. Explore the winding streets of Old Town, where you can find charming shops, cafes, and restaurants.\n3. Visit the Musée Matisse, which houses a large collection of works by the famous French artist.\n4. Take a day trip to Monaco, which is just a short train ride away.\n5. Enjoy the local cuisine, which features fresh seafood and delicious regional dishes.\n6. Visit the Russian Orthodox Cathedral, a stunning architectural masterpiece.\n7. Take a sunset cruise along the coast to see the city from a different perspective.\n8. Go hiking in the nearby hills for stunning views of the city and the sea.', additional_kwargs={}, example=False)

In [8]:
#document
from langchain.schema import Document

In [9]:
Document(page_content="This is my document. It is full of text that I've gathered from other places",metadata={'my_document_id':234234,
                                                                                                              'my_doument_source':"The Langchain Papers",
                                                                                                              'my_document_create_time':1680013019})

Document(page_content="This is my document. It is full of text that I've gathered from other places", metadata={'my_document_id': 234234, 'my_doument_source': 'The Langchain Papers', 'my_document_create_time': 1680013019})

In [11]:
#language model - text in text out

from langchain.llms import OpenAI

llm = OpenAI(model_name="text-ada-001",openai_api_key="...openai_api_key...")

In [13]:
llm("What day comes after Friday?")

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


'\n\nSaturday.'

In [16]:
#chat model - a model that takes a series of messages and returns message output
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

chat = ChatOpenAI(temperature=1,openai_api_key="...openai_api_key...")

In [19]:
chat([
    SystemMessage(content="You are an unhelpful AI bot that makes a joke at whatever the user says"),
     #SystemMessage(content="You are the most helpful indian AI bot that does its best whatever user says"),
    HumanMessage(content="I would like to go to New York, how should I do this?")
])

AIMessage(content="Oh, just flap your arms really hard and maybe you'll fly there! Ha ha ha ha!", additional_kwargs={}, example=False)

In [20]:
# Text Embedding Model - Change your text into a vector(a series of numbers that hold the semantic 'meaning of your text'), Mainly used when comparing two pieces of text together.
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key="...openai_api_key...")

In [21]:
text = "Hi! It's time for the beach"

In [24]:
text_embedding = embeddings.embed_query(text)
print(f"Your embeddings is length {len(text_embedding)}")
print(f"Here's a sample: {text_embedding[:5]}...")

Your embeddings is length 1536
Here's a sample: [-0.00015641732898075134, -0.003165106289088726, -0.000814014405477792, -0.019451458007097244, -0.01518280804157257]...


In [25]:
#Prompt - Text generally used as instructions to your model

from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003",openai_api_key="...openai_api_key...")

# I like to use three double quotation marks for my prompts because it's easier to read
prompt = """
Today is Monday, tomorrow is Wednesday.

What is wrong with that statement?
"""

llm(prompt)


Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


'\nThe statement is incorrect because tomorrow is Tuesday, not Wednesday.'

In [27]:
# Prompt Template - An object that helps create prompts based on a combination of user input, other non- static information and a fixed template string.
from langchain.llms import OpenAI
from langchain import PromptTemplate

llm = OpenAI(model_name="text-davinci-003", openai_api_key="...openai_api_key...")

# Notice "location" below, that is a placeholder for another value later
template = """
I really want to travel to {location}. What should I do there?

Respond in one short sentence

"""

prompt = PromptTemplate(
input_variables = ["location"],
template= template,
)

final_prompt = prompt.format(location="Rome")

print(f"Final Prompt: {final_prompt}")
print("----------------")
print(f"LLM Output: {llm(final_prompt)}") 

Final Prompt: 
I really want to travel to Rome. What should I do there?

Respond in one short sentence


----------------


Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


LLM Output: 
Explore the Colosseum, the Roman Forum, and the Pantheon to take in the ancient history of the city.


In [11]:
# Example Selectors 
# An easy way to select from a series of examples that allow you to dynamic place in-context information into your prompt. Often used when your task is nuanced or you have a large list of exmaples.

from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003",openai_api_key="...openai_api_key...")

example_prompt = PromptTemplate(
input_variables = ["input","output"],
template="Example Input: {input}\nExample Output: {output}",    
)

# Example of location that nouns are found
examples = [
    {"input": "pirate", "output": "ship"},
    {"input": "pilot", "output": "plane"},
    {"input": "driver", "output" :"car"},
    {"input": "tree", "output" :"sky"},
    {"input": "bird", "output": "nest"}
]

In [None]:
# SemanticsSimilarityExampleSelector will select examples that are similar to your input by semantics

example_selector = SemanticSimilarityExampleSelector.from_examples(
    #This is the list of examples available to select from.
    examples,
    
    # This is the embedding class used to produce embeddings which are used to measure semantic
    OpenAIEmbeddings(openai_api_key="...openai_api_key..."),
    
    # This is the VectorStore class that is used to store the embeddings and do a similarity semantic
    FAISS,
    
    # This is the number of examples to produce.
    k=2
    
)

In [39]:
pip install faiss-cpu

Defaulting to user installation because normal site-packages is not writeable
Collecting faiss-cpu
  Downloading faiss_cpu-1.7.4-cp310-cp310-win_amd64.whl (10.8 MB)
     ---------------------------------------- 10.8/10.8 MB 3.9 MB/s eta 0:00:00
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4
Note: you may need to restart the kernel to use updated packages.


In [40]:
# SemanticsSimilarityExampleSelector will select examples that are similar to your input by semantics

example_selector = SemanticSimilarityExampleSelector.from_examples(
    #This is the list of examples available to select from.
    examples,
    
    # This is the embedding class used to produce embeddings which are used to measure semantic
    OpenAIEmbeddings(openai_api_key="...openai_api_key..."),
    
    # This is the VectorStore class that is used to store the embeddings and do a similarity semantic
    FAISS,
    
    # This is the number of examples to produce.
    k=2
    
)

In [42]:
similar_prompt = FewShotPromptTemplate(
# The object that will help select examples
    example_selector= example_selector,
 
    # Your prompt
    example_prompt=example_prompt,
    
    #Customizations that will be added to the top and bottom of your prompt
    prefix= "Give the location an item is usually found in",
    suffix="Input: {noun}\nOutput",
    
    #What inputs your prompt will receive
    input_variables=["noun"]
)

In [47]:
#select a noun!
my_noun = "student"
print(similar_prompt.format(noun=my_noun))

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


Give the location an item is usually found in

Example Input: driver
Example Output: car

Example Input: pilot
Example Output: plane

Input: student
Output


In [46]:
llm(similar_prompt.format(noun=my_noun))

': garden'

In [4]:
# Output Parser - A helpful way to format the output of a model. Usually used for structured output.
# Two big concepts: 1. Format Instructions - An autogenerated prompts that tells the LLM how to format it's response based off your desired result.
# 2. Parser - A method which will extract your model's text output into a desired structure(usually json)

from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI

In [5]:
llm = OpenAI(model_name="text-davinci-003", openai_api_key="...openai_api_key...")

In [7]:
# How you would like your response structured. This is basically a fancy prompt template
response_schemas= [
    ResponseSchema(name="bad_string", description=" This a poorly formatted user input string"),
    ResponseSchema(name="good_string", description="This is your response, a reformatted response")
]

# How you would like to parse your output
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)


In [9]:
# see the prompt template you created for formatting
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "\`\`\`json" and "\`\`\`":

```json
{
	"bad_string": string  //  This a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```


In [13]:
template = """
You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

{format_instructions}

% USER_INPUT
{user_input}

YOUR RESPONSE:
"""

prompt = PromptTemplate(
    input_variables = ["user_input"],
    partial_variables={"format_instructions": format_instructions},
    template=template
)

promptValue = prompt.format(user_input="welcom to califonya!")

print(promptValue)


You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "\`\`\`json" and "\`\`\`":

```json
{
	"bad_string": string  //  This a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```

% USER_INPUT
welcom to califonya!

YOUR RESPONSE:



In [14]:
llm_output = llm(promptValue)
llm_output

'```json\n{\n\t"bad_string": "welcom to califonya!",\n\t"good_string": "Welcome to California!"\n}\n```'

In [15]:
output_parser.parse(llm_output)

{'bad_string': 'welcom to califonya!', 'good_string': 'Welcome to California!'}

## Indexes - Structuring documents to LLMs can work with them

### Document Loaders
Easy ways to import data from other sources.

In [16]:
from langchain.document_loaders import HNLoader

In [17]:
loader = HNLoader("https://news.ycombinator.com/item?id=34422627")

In [18]:
data = loader.load()

In [None]:
# print(f"Found {len(data)} comments")
print(f"Here's a sample:\n\n{''.join([x.page_content[:150] for x in data[:2]])}")

## Text Splitters
Often times your document is too long(like a book) for your LLM. You need to split it up into chunks. Text splitters help with this.

In [20]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [30]:
# This is a long document we can split up.
with open('data/PaulGrahamEssays/worked.txt') as f:
    pg_work = f.read()
    
print(f"You have {len([pg_work])} document")    

You have 1 document


In [39]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 1000,
    chunk_overlap = 20,
)

texts = text_splitter.create_documents({pg_work})

In [40]:
print(f"You have {len(texts)} documents")

You have 78 documents


In [41]:
print("Preview:")
print(texts[0].page_content, "\n")
print(texts[1].page_content)

Preview:
February 2021Before college the two main things I worked on, outside of school,
were writing and programming. I didn't write essays. I wrote what
beginning writers were supposed to write then, and probably still
are: short stories. My stories were awful. They had hardly any plot,
just characters with strong feelings, which I imagined made them
deep.The first programs I tried writing were on the IBM 1401 that our
school district used for what was then called "data processing."
This was in 9th grade, so I was 13 or 14. The school district's
1401 happened to be in the basement of our junior high school, and
my friend Rich Draves and I got permission to use it. It was like
a mini Bond villain's lair down there, with all these alien-looking
machines Â— CPU, disk drives, printer, card reader Â— sitting up
on a raised floor under bright fluorescent lights.The language we used was an early version of Fortran. You had to
type programs on punch cards, then stack them in the card reader 

## Retrievers
Easy way to combine document with language models.
There are many different types of retrievers, the most widely supported is the vectorStoreRetriever

In [43]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

loader = TextLoader('data/PaulGrahamEssays/worked.txt')
documents = loader.load()

In [44]:
# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(documents)

# Get embedding engine ready
embeddings = OpenAIEmbeddings(openai_api_key="...openai_api_key...")

# Embedd your texts
db = FAISS.from_documents(texts, embeddings)

In [45]:
# Init your retriever. Asking for just 1 document back
retriever = db.as_retriever()

In [46]:
retriever

VectorStoreRetriever(vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x000002F83763DA20>, search_type='similarity', search_kwargs={})

In [49]:
docs = retriever.get_relevant_documents("What types of things did the author want to build?")

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


In [50]:
print("\n\n".join([x.page_content[:200] for x in docs[:2]]))

standards; what was the point? No one else wanted one either, so
off they went. That was what happened to systems work.I wanted not just to build things, but to build things that would
last.In this di

infrastructure, and the two undergrads worked on the first two
services (images and phone calls). But about halfway through the
summer I realized I really didn't want to run a company Â— especially
no


## VectorStores
Database to store vectors, Most popular ones are Pine & Weaviate
Conceptually, think of them as tables q/ a column for embeddings(vectors) and a column for metadata

In [52]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

loader = TextLoader("data/PaulGrahamEssays/worked.txt")
documents = loader.load()

# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(documents)

# Get embeddings engine ready
embeddings = OpenAIEmbeddings(openai_api_key="...openai_api_key...")