In [1]:
openai_api_key = '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'
serpapi_api_key = ''

### Schema - Nuts and Bolts of Working with LLMs

#### Text

In [2]:
my_text = 'What day comes after Friday'

#### Chat Messages

In [3]:
from langchain.chat_models import ChatOpenAI 
from langchain.schema import HumanMessage, SystemMessage, AIMessage 

In [4]:
chat = ChatOpenAI(temperature=0.7, openai_api_key=openai_api_key)

In [5]:
chat([
    SystemMessage(content='You are a nice AI bot that helps a user figure out what to eat in one short sentence'),
    HumanMessage(content='I like tomatoes, what should I eat?')
])

AIMessage(content='You could try a Caprese salad with fresh tomatoes, mozzarella cheese, and basil.', additional_kwargs={}, example=False)

In [6]:
chat([
    SystemMessage(content='You are a nice AI bot that helps a user figure out what to eat in one short sentence'),
    HumanMessage(content='I like the beaches where should I go?'),
    AIMessage(content='You should go to Nice, France'),
    HumanMessage(content='What else should I do when I\'m there?')
])

AIMessage(content='You should visit the Promenade des Anglais, explore the Old Town, and enjoy the local cuisine and wine.', additional_kwargs={}, example=False)

#### Documents

In [7]:
from langchain.schema import Document

In [8]:
Document(
    page_content='This is my document. It is full of text that I\'ve gathered from other places',
    metadata={
        'my_document_id': 234234, 
        'my_document_score': 'The LangChain Papers',
        'my_document_create_time': 1680013019
    }
)

Document(page_content="This is my document. It is full of text that I've gathered from other places", metadata={'my_document_id': 234234, 'my_document_score': 'The LangChain Papers', 'my_document_create_time': 1680013019})

### Models - The interface to the AI Brain

#### Language Model

In [9]:
from langchain.llms import OpenAI 

llm = OpenAI(model_name='text-ada-001', openai_api_key=openai_api_key)

In [10]:
llm(my_text)

'?\n\nSaturday.'

#### Chat Model

In [11]:
from langchain.chat_models import ChatOpenAI 
from langchain.schema import HumanMessage, SystemMessage, AIMessage 

In [12]:
chat([
    SystemMessage(content='You are an unhelpful AI bot that makes a joke at whatever the user says'),
    HumanMessage(content='I would like to go to New York, how should I do this?')
])

AIMessage(content="Oh, you could try walking there! It might take a while, but it's good exercise.", additional_kwargs={}, example=False)

#### Text Embedding Model

In [13]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

In [14]:
text = 'Hi! It\'s time for the beach'

In [15]:
text_embedding = embeddings.embed_query(text)
print(f'Your embedding is length {len(text_embedding)}')
print(f'Here\'s a sample: {text_embedding}')

Your embedding is length 1536
Here's a sample: [-0.00011533180804690346, -0.0032023766543716192, -0.0008074173820205033, -0.019493253901600838, -0.015150638297200203, 0.031223172321915627, -0.016011882573366165, -0.011675331741571426, 0.00926141906529665, -0.01357370987534523, 0.004994615912437439, 0.01350092887878418, 0.00897635892033577, -0.021337047219276428, -0.0009226544643752277, -0.021264266222715378, 0.03017997369170189, -0.003924124408513308, 0.014883773401379585, 0.002093978226184845, -0.012785245664417744, -0.024066345766186714, 0.028263399377465248, -0.004042393993586302, -0.009916449896991253, -0.00816969946026802, 0.010644263587892056, -0.01576927863061428, -0.0013365979539230466, -0.031053349375724792, 0.03971432149410248, 0.0018362111877650023, -0.0020757829770445824, -0.012566901743412018, -0.009886125102639198, -0.02424829825758934, -0.007890704087913036, -0.003129595424979925, 0.011184058152139187, -0.014641168527305126, -0.0062955813482403755, -0.021579651162028313,

### Prompts - Text generally Used as Instructions to your Model

#### Prompt

In [16]:
from langchain.llms import OpenAI 

llm = OpenAI(model_name='text-davinci-003', openai_api_key=openai_api_key)

prompt = """
Today is Monday, tomorrow is Wednesday.
What is wrong with that statement?
"""

llm(prompt)

'\nThe statement is incorrect because tomorrow is Tuesday, not Wednesday.'

#### Prompt Template

In [17]:
from langchain.llms import OpenAI 
from langchain import PromptTemplate 

llm = OpenAI(model_name='text-davinci-003', openai_api_key=openai_api_key)

template = """
I really want to travel to {location}. What should I do there?
Respond in one short sentence
"""

prompt = PromptTemplate(
    input_variables=['location'],
    template=template
)

final_prompt = prompt.format(location='Rome')

print(f'Final Prompt: {final_prompt}')
print('-----------------------')
print(f'LLM Output: {llm(final_prompt)}')

Final Prompt: 
I really want to travel to Rome. What should I do there?
Respond in one short sentence

-----------------------
LLM Output: Explore the city's ancient ruins, historic architecture, and vibrant culture.


#### Example Selectors

In [18]:
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector 
from langchain.vectorstores import FAISS 
from langchain.embeddings import OpenAIEmbeddings 
from langchain.prompts import FewShotPromptTemplate, PromptTemplate 
from langchain.llms import OpenAI

In [19]:
llm = OpenAI(model_name='text-davinci-003', openai_api_key=openai_api_key)

example_prompt = PromptTemplate(
    input_variables=['input', 'output'],
    template='Example Input: {input}\nExample Output: {output}'
)

examples = [
    {'input': 'pirate', 'output': 'ship'},
    {'input': 'pilot', 'output': 'plane'},
    {'input': 'driver', 'output': 'car'},
    {'input': 'tree', 'output': 'ground'},
    {'input': 'bird', 'output': 'nest'}
]

In [20]:
example_selector =  SemanticSimilarityExampleSelector.from_examples(
    examples,
    OpenAIEmbeddings(openai_api_key=openai_api_key),
    FAISS,
    k=2
)

In [21]:
similar_prompt = FewShotPromptTemplate(
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix='Give the location an item is usually found in',
    suffix='Input: {noun}\nOutput:',
    input_variables=['noun']
)

In [22]:
my_noun = 'student'

print(similar_prompt.format(noun=my_noun))

Give the location an item is usually found in

Example Input: driver
Example Output: car

Example Input: pilot
Example Output: plane

Input: student
Output:


In [23]:
llm(similar_prompt.format(noun=my_noun))

' classroom'

#### Output Parsers

In [24]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI 

In [25]:
llm = OpenAI(model_name='text-davinci-003', openai_api_key=openai_api_key)

In [26]:
response_schemas = [
    ResponseSchema(name='bad_string', description='This is a poorly formatted user input string'),
    ResponseSchema(name='good_string', description='This is your response, a reformatted response')
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

In [27]:
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "\`\`\`json" and "\`\`\`":

```json
{
	"bad_string": string  // This is a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```


In [28]:
template = """
You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

{format_instructions}

% USER INPUT:
{user_input}

YOUR RESPONSE:
"""

prompt = PromptTemplate(
    input_variables=['user_input'],
    partial_variables={'format_instructions': format_instructions},
    template=template
)

promptValue = prompt.format(user_input='Welcom to californya!')

print(promptValue)


You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "\`\`\`json" and "\`\`\`":

```json
{
	"bad_string": string  // This is a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```

% USER INPUT:
Welcom to californya!

YOUR RESPONSE:



In [29]:
llm_output = llm(promptValue)
llm_output

'```json\n{\n\t"bad_string": "Welcom to californya!"\n\t"good_string": "Welcome to California!"\n}\n```'

In [30]:
output_parser.parse(llm_output)

OutputParserException: Got invalid JSON object. Error: Expecting ',' delimiter: line 3 column 2 (char 42)

### Indexes - Structuring Documents to LLMs can Work with them

#### Document Loaders

In [31]:
from langchain.document_loaders import HNLoader

In [32]:
loader = HNLoader('https://news.ycombinator.com/item?id=34422627')

In [33]:
data = loader.load()

In [34]:
print(f'Found {len(data)} comments')
print(f"Here is a sample:\n\n{''.join([x.page_content[:150] for x in data[:2]])}")

Found 76 comments
Here is a sample:

Ozzie_osman 3 months ago  
             | next [–] 

LangChain is awesome. For people not sure what it's doing, large language models (LLMs) are very Ozzie_osman 3 months ago  
             | parent | next [–] 

Also, another library to check out is GPT Index (https://github.com/jerryjliu/gpt_index)


#### Text Splitters

In [35]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [36]:
with open('Data/worked.txt') as f:
    pg_work = f.read() 
    print(f'You have {len([pg_work])} document')

You have 1 document


In [37]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 150, 
    chunk_overlap = 20
)

texts = text_splitter.create_documents([pg_work])

In [38]:
print(f'You have {len(texts)} documents')

You have 606 documents


In [39]:
print('Preview:')
print(texts[0].page_content, '\n')
print(texts[1].page_content)

Preview:
February 2021Before college the two main things I worked on, outside of school,
were writing and programming. I didn't write essays. I wrote what 

beginning writers were supposed to write then, and probably still
are: short stories. My stories were awful. They had hardly any plot,


#### Retrievers

In [40]:
from langchain.document_loaders import TextLoader 
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain.vectorstores import FAISS 
from langchain.embeddings import OpenAIEmbeddings 

loader = TextLoader('Data/worked.txt')
documents = loader.load()

In [41]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

db = FAISS.from_documents(texts, embeddings)

In [42]:
retriever = db.as_retriever()

retriever

VectorStoreRetriever(vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x000002855F1FF820>, search_type='similarity', search_kwargs={})

In [43]:
docs = retriever.get_relevant_documents('What types of things did the author want to build?')

In [44]:
print('\n\n'.join([x.page_content[:200] for x in docs[:2]]))

standards; what was the point? No one else wanted one either, so
off they went. That was what happened to systems work.I wanted not just to build things, but to build things that would
last.In this di

infrastructure, and the two undergrads worked on the first two
services (images and phone calls). But about halfway through the
summer I realized I really didn't want to run a company  especially
not


#### VcetorStores

In [45]:
from langchain.document_loaders import TextLoader 
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain.vectorstores import FAISS 
from langchain.embeddings import OpenAIEmbeddings 

loader = TextLoader('Data/worked.txt')
documents = loader.load()

In [46]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)

texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

db = FAISS.from_documents(texts, embeddings)

In [47]:
print(f'You have {len(texts)} documents')

You have 78 documents


In [48]:
embedding_list = embeddings.embed_documents([text.page_content for text in texts])

In [49]:
print(f'You have {len(embedding_list)} embeddings')
print(f'Here is a sample of one: {embedding_list[0]}')

You have 78 embeddings
Here is a sample of one: [-0.0011257503838977905, -0.011114791224843644, -0.012860921430454106, -0.028743992117557838, -0.005987212163793924, 0.027145609721667005, -0.03723287273244722, -0.015459968971608905, -0.008905264700539164, -0.021087881536613198, 0.01990588602033992, 0.042847350969084014, -0.011517743776093695, -0.009785046036205555, 0.006561420503930902, 0.030570712460889275, 0.023169806621475057, -0.008367994011324036, 0.003619862549155486, -0.006631937456513373, -0.004123553703879345, 0.005775661771707806, 0.01113493852644324, -0.0049093124361024405, -0.01842839115933702, -0.013062398171740425, 0.012652728921967919, -0.023733940379489647, 0.011067779923121997, -0.006175256905019219, 0.013149704076661266, -0.018307505487094266, -7.277292148680993e-05, -0.00569842905192547, -0.02401600818981953, -0.008609765355809549, -0.024808481757919904, 0.011255824198685996, -0.006064444883576261, -0.018347800090293458, 0.03876409373104904, 0.02491593589576293, -0.00

### Memory

#### Chat Message History

In [50]:
from langchain.memory import ChatMessageHistory 
from langchain.chat_models import ChatOpenAI 

In [51]:
chat = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)

history = ChatMessageHistory() 

history.add_ai_message('Hi!')
history.add_user_message('What is the capital of France?')

In [52]:
history.messages

[AIMessage(content='Hi!', additional_kwargs={}, example=False),
 HumanMessage(content='What is the capital of France?', additional_kwargs={}, example=False)]

In [53]:
ai_response = chat(history.messages)
ai_response

AIMessage(content='The capital of France is Paris.', additional_kwargs={}, example=False)

In [54]:
history.add_ai_message(ai_response.content)
history.messages

[AIMessage(content='Hi!', additional_kwargs={}, example=False),
 HumanMessage(content='What is the capital of France?', additional_kwargs={}, example=False),
 AIMessage(content='The capital of France is Paris.', additional_kwargs={}, example=False)]

### Chains

#### 1. Simple Sequential Chains

In [55]:
from langchain.llms import OpenAI 
from langchain.chains import LLMChain 
from langchain.prompts import PromptTemplate 
from langchain.chains import SimpleSequentialChain 

llm = OpenAI(temperature=1, openai_api_key=openai_api_key)

In [56]:
template = """
Your job is to come up with a classic dish from the area that the users suggests.
% USER LOCATION
{user_location}

YOUR RESPONSE:
"""

prompt_template = PromptTemplate(
    input_variables=['user_location'],
    template=template
)

location_chain = LLMChain(llm=llm, prompt=prompt_template)

In [57]:
template = template = """
Given a meal, give a short and simple recipe on how to make that dish at home.
% MEAL
{user_meal}

YOUR RESPONSE:
"""

prompt_template = PromptTemplate(
    input_variables=['user_meal'],
    template=template
)

meal_chain = LLMChain(llm=llm, prompt=prompt_template)

In [58]:
overall_chain = SimpleSequentialChain(
    chains=[location_chain, meal_chain],
    verbose=True
)

In [59]:
review = overall_chain.run('Rome')



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mPasta alla Carbonara.[0m
[33;1m[1;3m
Ingredients:
- 5oz of spaghetti
- 2 eggs
- 2/3 cup Parmesan cheese
- 4oz bacon
- 1 teaspoon black pepper

Instructions:
1. Bring a pot of salted water to a boil. Add the spaghetti and cook until al dente. 
2. Cut bacon into pieces and cook in a skillet until crisp.
3. Whisk the eggs and Parmesan cheese in a mixing bowl.
4. When the spaghetti is done, drain it and put it back into the pot.
5. Remove the bacon from the skillet, add it to the pot with the spaghetti and mix.
6. Pour the egg mixture over the spaghetti and mix quickly.
7. Serve with freshly grated black pepper. Enjoy![0m

[1m> Finished chain.[0m


### Agents

In [60]:
from langchain.agents import load_tools 
from langchain.agents import initialize_agent 
from langchain.llms import OpenAI 
import json 

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)

In [61]:
toolkit = load_tools(['serpapi'], llm=llm, serpapi_api_key=serpapi_api_key)

In [62]:
agent = initialize_agent(
    toolkit, llm, agent='zero-shot-react-description', 
    verbose=True, return_intermediate_steps=True
)

In [63]:
response = agent({
    'input': 'what was the first album of the'
    'band that Natalie Bergman is a part of ?'
})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should try to find out what band Natalie Bergman is a part of
Action: Search
Action Input: "Natalie Bergman band"[0m
Observation: [36;1m[1;3mNatalie Bergman is an American singer-songwriter. She is one half of the duo Wild Belle, along with her brother Elliot Bergman. Her debut solo album, Mercy, was released on Third Man Records on May 7, 2021. She is based in Los Angeles.[0m
Thought:[32;1m[1;3m I should search for the name of Wild Belle's first album
Action: Search
Action Input: "Wild Belle first album"[0m
Observation: [36;1m[1;3mIsles[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Isles is the first album of the band that Natalie Bergman is a part of.[0m

[1m> Finished chain.[0m


In [64]:
print(json.dumps(response['intermediate_steps'], indent=2))

[
  [
    [
      "Search",
      "Natalie Bergman band",
      " I should try to find out what band Natalie Bergman is a part of\nAction: Search\nAction Input: \"Natalie Bergman band\""
    ],
    "Natalie Bergman is an American singer-songwriter. She is one half of the duo Wild Belle, along with her brother Elliot Bergman. Her debut solo album, Mercy, was released on Third Man Records on May 7, 2021. She is based in Los Angeles."
  ],
  [
    [
      "Search",
      "Wild Belle first album",
      " I should search for the name of Wild Belle's first album\nAction: Search\nAction Input: \"Wild Belle first album\""
    ],
    "Isles"
  ]
]
