In [211]:
import os
import openai
from dotenv import find_dotenv, load_dotenv
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage

In [212]:
load_dotenv(find_dotenv())
openai.api_key = os.getenv("OPENAI_API_KEY")

In [14]:
llm_model = "gpt-3.5-turbo"
llm = OpenAI(temperature=0.7)
chat_model= ChatOpenAI(temperature=0.7)

In [11]:
print(llm.predict("What is the weather in Mumbai?"))



As an AI, I do not have access to live weather data. However, according to current forecasts, the weather in Mumbai is typically hot and humid with temperatures ranging from 25-35 degrees Celsius (77-95 degrees Fahrenheit). Monsoon season in Mumbai lasts from June to September, with heavy rainfall and occasional thunderstorms.


In [15]:
print(chat_model.predict("What is the weather in Mumbai?"))

Currently, the weather in Mumbai is partly cloudy with a temperature of around 32°C (90°F).


In [17]:
prompt = "How old is the universe?"
messages = [HumanMessage(content=prompt)]
print(chat_model.predict_messages(messages).content)

content='The universe is approximately 13.8 billion years old. This age is based on scientific observations and measurements of the cosmic microwave background radiation, as well as the expansion rate of the universe.' response_metadata={'token_usage': {'completion_tokens': 38, 'prompt_tokens': 13, 'total_tokens': 51}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-d5af00f6-a0c2-4904-b35d-a71d76cea129-0'


## Prompt Templates

In [36]:
chat_model= ChatOpenAI(temperature=0.7, model=llm_model)

In [43]:
def get_completion(prompt, model=llm_model):
    messages = [{"role": "user", "content": prompt}]
    response = openai.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0,
    )
    return response.choices[0].message.content

In [44]:
customer_review = """Your product is terrible! I don't know how you were able to get this to the market. I don't want this! Actually no one would want this. Seriously, Give me my money now!"""
tone = """Proper English in a nice, warm and respectful tone"""
language = """Korean"""

In [45]:
promp = f""" Rewrite the following {customer_review} in {tone} and then please translate the new review message into {language}"""

In [46]:
rewrite = get_completion(prompt = promp)
print(rewrite)

I am disappointed with your product. I am unsure how it was approved for the market. Unfortunately, it does not meet my needs and I believe it would not appeal to others as well. I kindly request a refund. Thank you.

Korean translation:
당신의 제품에 실망했습니다. 시장에 출시되었다는 것에 의문을 가지고 있습니다. 안타깝게도 제 요구를 충족시키지 못하며 다른 사람들에게도 매력적이지 않을 것으로 생각됩니다. 환불을 요청합니다. 감사합니다.


### Built-in Prompt Templates

In [47]:
from langchain.prompts import ChatPromptTemplate

In [49]:
template_string = """Translate the following text {customer_review} into Italian in a polite tone. Also, give it a {field}"""

prompt_template = ChatPromptTemplate.from_template(template_string)

translation_message = prompt_template.format_messages(
    customer_review = customer_review,
    field = "thumbs down"
)

In [50]:
response = chat_model(translation_message)
print(response.content)

Il tuo prodotto è terribile! Non so come sei riuscito a farlo arrivare sul mercato. Non lo voglio! In realtà, nessuno lo vorrebbe. Sul serio, restituiscimi i soldi adesso! 👎


## Langchain Parsers

In [51]:
chat = ChatOpenAI(temperature=0.0, model=llm_model)

In [52]:
email_response = """
Here's our itinerary for our upcoming trip to Europe.
We leave from Denver, Colorado airport at 8:45 pm, and arrive in Amsterdam 10 hours later
at Schipol Airport.
We'll grab a ride to our airbnb and maybe stop somewhere for breakfast before 
taking a nap.

Some sightseeing will follow for a couple of hours. 
We will then go shop for gifts 
to bring back to our children and friends.  

The next morning, at 7:45am we'll drive to to Belgium, Brussels - it should only take aroud 3 hours.
While in Brussels we want to explore the city to its fullest - no rock left unturned!

"""

In [53]:
email_template = """
From the following email, extract the following information:

leave_time: when are they leaving for vacation to Europe. If there's an actual
time written, use it, if not write unknown.

leave_from: where are they leaving from, the airport or city name and state if
available.

cities_to_visit: extract the cities they are going to visit. 
If there are more than one, put them in square brackets like '["cityone", "citytwo"].

Format the output as JSON with the following keys:
leave_time
leave_from
cities_to_visit

email: {email}
"""

In [54]:
prompt_template = ChatPromptTemplate.from_template(email_template)

In [55]:
messages = prompt_template.format_messages(email=email_response)
response = chat(messages)

In [56]:
print(response.content)

{
  "leave_time": "8:45 pm",
  "leave_from": "Denver, Colorado",
  "cities_to_visit": ["Amsterdam", "Brussels"]
}


### Langchain Parsers

In [57]:
print(type(response.content))

<class 'str'>


In [58]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

In [59]:
leave_time_schema = ResponseSchema(name="leave_time", description="When they are leaving. \
    It's usually a numerical time of the day. \
        If not available, write n/a.")

leave_from_schema = ResponseSchema(name="leave_from", description="Where they are leaving from. \
    It's a city, airport, state or province. \
        If not available, write n/a.")

cities_to_visit_schema = ResponseSchema(name="cities_to_visit", description="The cities or towns to be visited on the trip. \
    This needs to be a list. \
        If not available, write n/a.")

In [60]:
response_schema = [
    leave_time_schema,
    leave_from_schema,
    cities_to_visit_schema
]

In [61]:
output_parser = StructuredOutputParser.from_response_schemas(response_schema)

format_instructions = output_parser.get_format_instructions()

In [62]:
email_template_revised = """
From the following email, extract the following information:

leave_time: when are they leaving for vacation to Europe. If there's an actual
time written, use it, if not write unknown.

leave_from: where are they leaving from, the airport or city name and state if
available.

cities_to_visit: extract the cities they are going to visit. If there are more than 
one, put them in square brackets like '["cityone", "citytwo"].

Format the output as JSON with the following keys:
leave_time
leave_from
cities_to_visit

email: {email}
{format_instructions}
"""

In [64]:
updated_prompt = ChatPromptTemplate.from_template(template=email_template_revised)
messages = prompt_template.format_messages(email = email_response, format_instructions=format_instructions)

In [65]:
response = chat(messages)

In [67]:
print(type(response.content))

<class 'str'>


In [68]:
output_dict = output_parser.parse(response.content)
print(type(output_dict))
print(f"Cities:::: {output_dict['cities_to_visit'][0]}")

<class 'dict'>
Cities:::: Amsterdam


### Pydantic Parser

In [69]:
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field, validator
from typing import List

In [71]:
class VacationInfo(BaseModel):
    leave_time: str = Field(description="When they are leaving.")
    leave_from: str = Field(description="Where they are leaving from. It's a city, airport, state or province.")
    cities_to_visit: List = Field(description="The cities, towns they will be visiting on their trip. This needs to be in a list")
    num_people: int = Field(description="this is an integer for a number of people on this trip")
    
    @validator('num_people')
    def check_num_people(cls, field):
        if field <=0:
            raise ValueError("Badly formatted number")
        return field

In [72]:
pydantic_parser = PydanticOutputParser(pydantic_object=VacationInfo)
format_instructions = pydantic_parser.get_format_instructions()

In [73]:
email_response = """
Here's our itinerary for our upcoming trip to Europe.
There will be 5 of us on this vacation trip.
We leave from Denver, Colorado airport at 8:45 pm, and arrive in Amsterdam 10 hours later
at Schipol Airport.
We'll grab a ride to our airbnb and maybe stop somewhere for breakfast before 
taking a nap.

Some sightseeing will follow for a couple of hours. 
We will then go shop for gifts 
to bring back to our children and friends.  

The next morning, at 7:45am we'll drive to to Belgium, Brussels - it should only take aroud 3 hours.
While in Brussels we want to explore the city to its fullest - no rock left unturned!

"""

In [74]:
email_template_revised = """
From the following email, extract the following information regarding 
this trip.

email: {email}

{format_instructions}
"""

In [75]:
updated_prompt = ChatPromptTemplate.from_template(template=email_template_revised)
messages = updated_prompt.format_messages(email=email_response, format_instructions=format_instructions)
format_response = chat(messages)

In [76]:
vacation = pydantic_parser.parse(format_response.content)
print(type(vacation))
# print(vacation.cities_to_visit)
for item in vacation.cities_to_visit:
    print(f"Cities: {item}")

<class '__main__.VacationInfo'>
Cities: Amsterdam
Cities: Brussels


## Memory Hands on

In [81]:
print(llm.predict("Hi! My favourite bands are BTS and Seventeen. Can you tell me about them?"))

 

Sure! BTS (also known as the Bangtan Boys) is a South Korean boy band that debuted in 2013 under Big Hit Entertainment. The group consists of seven members: RM, Jin, Suga, J-Hope, Jimin, V, and Jungkook. They are known for their powerful rap and vocal skills, as well as their socially conscious lyrics that tackle topics such as mental health, societal pressures, and self-love. BTS has gained a huge following worldwide, known as the BTS ARMY, and have broken numerous records in the music industry. They have also collaborated with many international artists and have won several awards, including multiple Billboard Music Awards and a Grammy nomination.

Seventeen is also a South Korean boy band that debuted in 2015 under Pledis Entertainment. The group consists of thirteen members: S.Coups, Jeonghan, Joshua, Jun, Hoshi, Wonwoo, Woozi, DK, Mingyu, The8, Seungkwan, Vernon, and Dino. Seventeen is known for their self-produced music, intricate choreography, and diverse concept styles. They

In [82]:
print(llm.predict("Great! What's my favourite band?"))



I'm sorry, I am not able to determine your personal preferences or favorites as I am an AI and do not have access to that information. 


In [83]:
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

In [84]:
memory = ConversationBufferMemory()
conversation = ConversationChain(
    llm = llm,
    memory = memory,
    verbose = True
)

In [89]:
conversation.predict(input="Hi! My name is Siddhi")
conversation.predict(input="Great! What's my name?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi! My favourite bands are BTS and Seventeen. Can you tell me about them?
AI:  Hi there! BTS, also known as Bangtan Boys, is a South Korean boy band that debuted in 2013. They have seven members: RM, Jin, Suga, J-Hope, Jimin, V, and Jungkook. They are known for their energetic performances and socially conscious lyrics. They have released several albums, including "Map of the Soul: Persona" and "Love Yourself: Tear," which have both achieved global success. As for Seventeen, they are a 13-member boy group also from South Korea. They debuted in 2015 and are known for their self-produced music and synchronized choreography. Some of their popul

' Your name is Siddhi, as you mentioned earlier. Did you have any other questions?'

In [90]:
print(f"memory => {memory.buffer}")

memory => Human: Hi! My favourite bands are BTS and Seventeen. Can you tell me about them?
AI:  Hi there! BTS, also known as Bangtan Boys, is a South Korean boy band that debuted in 2013. They have seven members: RM, Jin, Suga, J-Hope, Jimin, V, and Jungkook. They are known for their energetic performances and socially conscious lyrics. They have released several albums, including "Map of the Soul: Persona" and "Love Yourself: Tear," which have both achieved global success. As for Seventeen, they are a 13-member boy group also from South Korea. They debuted in 2015 and are known for their self-produced music and synchronized choreography. Some of their popular songs include "Don't Wanna Cry" and "Very Nice." Both BTS and Seventeen have dedicated fan bases and have won numerous awards for their music. Is there anything else you would like to know about them?
Human: Hi! My favourite bands are BTS and Seventeen. Can you tell me about them?
AI:  Sure! As I mentioned, BTS has seven members 

## Chains

In [95]:
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate

In [92]:
open_ai = OpenAI(temperature=0.7)

In [96]:
prompt = PromptTemplate(
    input_variables = ["language"],
    template = "How do you say good morning in {language}"
)

In [97]:
chain = LLMChain(llm=open_ai, prompt=prompt)
print(chain.run(language="Korean"))



The most common way to say good morning in Korean is "안녕하세요" (annyeonghaseyo). However, there are also other ways to greet someone in the morning, such as "좋은 아침입니다" (joheun achimimnida) or "좋은 아침이에요" (joheun achimieyo).


In [98]:
template = """ 
 As a children's book writer, please come up with a simple and short (90 words)
 lullaby based on the location
 {location}
 and the main character {name}
 
 STORY:
"""

In [99]:
prompt = PromptTemplate(input_variables=["location", "name"], template=template)

In [100]:
chain_story = LLMChain(llm=open_ai, prompt=prompt, verbose=True)
story = chain_story({"location": "Spain", "name": "Maya"})
print(story['text'])



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m 
 As a children's book writer, please come up with a simple and short (90 words)
 lullaby based on the location
 Spain
 and the main character Maya
 
 STORY:
[0m

[1m> Finished chain.[0m

"Maya, mi niña, close your eyes
Underneath the Spanish skies
The stars above twinkle bright
As you drift off into the night

Dream of castles and flamenco dance
As the moonbeams cast their magic trance
Maya, mi amor, sleep tight
In this land of Spanish delight

Let the guitar strum a gentle tune
As you rest beneath the Spanish moon
Maya, mi querida, rest your head
In this beautiful land where dreams are fed

So sleep, my little one, with a smile
For in Spain, life is always worthwhile
Maya, mi niña, buenos noches
Until the morning, sleep and recharge" 


In [102]:
from langchain.chains import SequentialChain

In [112]:
chain_story = LLMChain(llm=open_ai, prompt=prompt, output_key="ogstory", verbose=True)

In [115]:
template_update = """Translate the {ogstory} into {language}.  Make sure 
the language is simple and fun.

TRANSLATION: """

In [116]:
prompt_translate = PromptTemplate(input_variables=["ogstory", "language"], template=template_update)

In [117]:
chain_translate = LLMChain(llm=open_ai, prompt=prompt_translate, output_key="translated")

In [118]:
overall_chain = SequentialChain(
    chains = [chain_story, chain_translate],
    input_variables= ["location", "name", "language"],
    output_variables= ["ogstory", "translated"],
    verbose=True
)

In [119]:
response = overall_chain({"location": "Mumbai", "name": "Karina", "language": "Hindi"})
print(f"English Version ==> {response['ogstory']} \n \n")
print(f"Translated Version ==> {response['translated']} \n \n")



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m 
 As a children's book writer, please come up with a simple and short (90 words)
 lullaby based on the location
 Mumbai
 and the main character Karina
 
 STORY:
[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m
English Version ==> 
In Mumbai's bustling streets,
Lives a little girl named Karina.
She loves to dance and sing,
But at night, she needs some rest.

So her mother sings a lullaby,
Softly to her daughter's ears.
The city's noises fade away,
As Karina drifts off to sleep.

The stars above shine so bright,
As the moon watches over Mumbai.
Karina dreams of adventures,
In this magical city of lights.

So sleep now, my dear Karina,
In the heart of Mumbai's embrace.
Tomorrow is a new day,
Full of wonders and grace.

Goodnight, my little one,
Sleep tight in Mumbai's embrace. 
 

Translated Version ==> मुंबई की भीड़भाड़ वाली सड़कों में,
एक छोटी सी 

### Router Chain

In [120]:
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
from langchain.chains.router import MultiPromptChain

In [121]:
biology_template = """You are a very smart biology professor. 
You are great at answering questions about biology in a concise and easy to understand manner. 
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{input}"""


math_template = """You are a very good mathematician. You are great at answering math questions. 
You are so good because you are able to break down hard problems into their component parts, 
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{input}"""

astronomy_template = """You are a very good astronomer. You are great at answering astronomy questions. 
You are so good because you are able to break down hard problems into their component parts, 
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{input}"""

travel_agent_template = """You are a very good travel agent with a large amount
of knowledge when it comes to getting people the best deals and recommendations
for travel, vacations, flights and world's best destinations for vacation. 
You are great at answering travel, vacation, flights, transportation, tourist guides questions. 
You are so good because you are able to break down hard problems into their component parts, 
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{input}"""

In [122]:
prompt_infos = [
    {
        "name": "Biology",
        "description": "Good for answering biology related questions",
        "prompt_template": biology_template,
    },
    {
        "name": "math",
        "description": "Good for answering math related questions",
        "prompt_template": math_template,
    },
    {
        "name": "astronomy",
        "description": "Good for answering astronomy questions",
        "prompt_template": astronomy_template,
    },
    {
        "name": "travel_agent",
        "description": "Good for answering travel, tourism and vacation questions",
        "prompt_template": travel_agent_template,
    },
]

In [123]:
destination_chains = {}
for info in prompt_infos:
    name = info["name"]
    prompt_template = info["prompt_template"]
    prompt = ChatPromptTemplate.from_template(template=prompt_template)
    chain = LLMChain(llm=chat, prompt = prompt)
    destination_chains[name] = chain

In [124]:
default_prompt = ChatPromptTemplate.from_template("{input}")
default_chain = LLMChain(llm=chat, prompt=default_prompt)

In [125]:
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str =  "\n".join(destinations)

In [126]:
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(destinations=destinations_str)

In [127]:
router_prompt = PromptTemplate(
    template = router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser()
)

In [128]:
router_chain = LLMRouterChain.from_llm(
    llm=chat,
    prompt=router_prompt,
)

In [129]:
chain = MultiPromptChain(
    router_chain=router_chain,
    destination_chains=destination_chains,
    default_chain=default_chain,
    verbose=True
)

In [130]:
response = chain.run("How old as the stars?")
print(response)



[1m> Entering new MultiPromptChain chain...[0m
astronomy: {'input': 'How old are the stars?'}
[1m> Finished chain.[0m
Stars can vary greatly in age depending on their size and type. 

For example, smaller stars like red dwarfs can have lifespans of tens to hundreds of billions of years, while larger stars like blue giants may only live for a few million years. 

On average, most stars in our galaxy are thought to be around 10 billion years old, as the Milky Way itself is estimated to be around 13.6 billion years old. 

However, there are also much older stars known as Population II stars, which are believed to be remnants from the early universe and can be as old as 13.8 billion years. 

So, in general, stars can range in age from a few million years to over 13 billion years old.


In [131]:
response = chain.run("I need to go to Kenya for vacation, a family of four. Can you help me plan this trip?")
print(response)



[1m> Entering new MultiPromptChain chain...[0m
travel_agent: {'input': 'I need to plan a vacation to Kenya for a family of four. Can you help me with this trip?'}
[1m> Finished chain.[0m
Of course! I would be happy to help you plan a vacation to Kenya for your family of four. Kenya is a beautiful country with so much to offer in terms of wildlife, culture, and natural beauty.

First, I would recommend looking into the best time to visit Kenya based on your preferences. The dry season from June to October is typically the best time for wildlife viewing, while the wet season from November to May is great for bird watching and lush landscapes.

Next, I would suggest considering what activities and attractions you would like to experience in Kenya. From going on safari in the Maasai Mara or Amboseli National Park, to visiting the coastal city of Mombasa or exploring the Great Rift Valley, there are so many options to choose from.

In terms of accommodations, there are a variety of op

In [132]:
response = chain.run("Tell me about Attaca by Seventeen")
print(response)



[1m> Entering new MultiPromptChain chain...[0m
None: {'input': 'Tell me about Attaca by Seventeen'}
[1m> Finished chain.[0m
Attaca is a song by the South Korean boy group Seventeen. It is a track from their seventh mini album "Heng:garæ," which was released on June 22, 2020. The song is a high-energy dance track with a catchy chorus and powerful rap verses. The lyrics talk about breaking down barriers and pushing forward with determination and strength. The members of Seventeen showcase their impressive vocal and rap skills in this song, making it a fan favorite among their discography. Attaca has been praised for its dynamic production and energetic performance, solidifying Seventeen's reputation as one of the top K-pop groups in the industry.


## PDF Loading & Splitting

### PDF Loading

In [133]:
from langchain.document_loaders import PyPDFLoader

In [135]:
loader = PyPDFLoader("./data/react-paper.pdf")
pages = loader.load()

In [136]:
print(len(pages))

33


In [137]:
page = pages[0]
print(page.page_content[0:700])
print(page.metadata)

Published as a conference paper at ICLR 2023
REAC T: S YNERGIZING REASONING AND ACTING IN
LANGUAGE MODELS
Shunyu Yao∗*,1, Jeffrey Zhao2, Dian Yu2, Nan Du2, Izhak Shafran2, Karthik Narasimhan1, Yuan Cao2
1Department of Computer Science, Princeton University
2Google Research, Brain team
1{shunyuy,karthikn}@princeton.edu
2{jeffreyzhao,dianyu,dunan,izhak,yuancao}@google.com
ABSTRACT
While large language models (LLMs) have demonstrated impressive performance
across tasks in language understanding and interactive decision making, their
abilities for reasoning (e.g. chain-of-thought prompting) and acting (e.g. action
plan generation) have primarily been studied as separate topics. In this paper, we
{'source': './data/react-paper.pdf', 'page': 0}


### Document Splitting

In [138]:
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter

In [139]:
with open("./data/i-have-a-dream.txt") as paper:
    speech = paper.read()

In [144]:
text_splitter = CharacterTextSplitter(
    chunk_size = 50,
    chunk_overlap = 20,
    length_function = len,
    add_start_index=True
)

In [145]:
texts = text_splitter.create_documents([speech])
print(texts[0])

page_content='﻿As far as black Americans were concerned, the nation’s response to Brown was agonizingly slow, and neither state legislatures nor the Congress seemed willing to help their cause along. Finally, President John F. Kennedy recognized that only a strong civil rights bill would put teeth into the drive to secure equal protection of the laws for African Americans. On June 11, 1963, he proposed such a bill to Congress, asking for legislation that would provide “the kind of equality of treatment which we would want for ourselves.” Southern representatives in Congress managed to block the bill in committee, and civil rights leaders sought some way to build political momentum behind the measure.
A. Philip Randolph, a labor leader and longtime civil rights activist, called for a massive march on Washington to dramatize the issue. He welcomed the participation of white groups as well as black in order to demonstrate the multiracial backing for civil rights. The various elements of t

In [146]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 50,
    chunk_overlap = 20,
    length_function = len,
    add_start_index=True
)

In [148]:
docs = text_splitter.create_documents([speech])

In [149]:
print(len(docs))
print(f"Doc 1: {docs[0]}")
print(f"Doc 2: {docs[1]}")

353
Doc 1: page_content='﻿As far as black Americans were concerned, the' metadata={'start_index': 0}
Doc 2: page_content='were concerned, the nation’s response to Brown' metadata={'start_index': 27}


In [150]:
s1 = "abcdefghijklmnopqrstuvwxyz"
s = "Python can be easy to pick up whether you're a professional or a beginner."

text = text_splitter.split_text(s)
print(text)

["Python can be easy to pick up whether you're a", "up whether you're a professional or a beginner."]


## Vector Embeddings

In [155]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [152]:
#1. Load the file
loader = PyPDFLoader("./data/react-paper.pdf")
docs = loader.load()

In [154]:
#2. Split the document into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)
splits = text_splitter.split_documents(docs)
print(len(splits))

96


In [157]:
text1 = "Cat"
text2 = "Kpop"
text3 = "Kitten"

embed1 = embeddings.embed_query(text1)
embed2 = embeddings.embed_query(text2)
embed3 = embeddings.embed_query(text3)

print(f"Embed1 == {embed1}")

Embed1 == [-0.008174207879591734, -0.007511803310590743, -0.00995655437174355, -0.024788951157780095, -0.012790553094547429, 0.006654775143594856, -0.0015151649503578363, -0.03783217392596492, -0.014422662356334227, -0.026250339680779597, 0.017154227704543168, 0.046327340706031526, 0.0035646922858117093, 0.004240754467349556, -0.032287098019987186, -0.004592443287070655, 0.03955306057962428, 0.005261676778755394, 0.00789422251521935, -0.015501631209043845, -0.023723641081760536, 0.0053197228543978925, 0.014873371253461594, -0.012141805905252653, -0.006781109980413554, 0.004384161843708263, 0.01238764712186758, -0.013364181668659696, 0.005620194986821522, 0.0009731200272425828, 0.009881435756561042, -0.01653962559432841, -0.017796145505492914, -0.03922527229080438, -0.02947358559957327, -0.0003316290630643998, 0.011465743956545447, -0.007361567477209569, 0.02073257760289173, -0.01391732300905944, 0.009451215024468763, 0.009806318072701095, -0.013371010125682163, 0.011916451922350252, -0

In [159]:
import numpy as np
similarity1 = np.dot(embed1, embed3)
similarity2 = np.dot(embed1, embed2)
print(f"Similarity1 %: {similarity1*100}")
print(f"Similarity2 %: {similarity2*100}")

Similarity1 %: 88.31033101464733
Similarity2 %: 75.74192695239827


### with ChromaDB

In [160]:
from langchain.vectorstores import Chroma
persist_directory = "./data/db/chroma"

In [162]:
vectorstore = Chroma.from_documents(
    documents = splits,
    embedding = embeddings,
    persist_directory=persist_directory
)

In [163]:
print(vectorstore._collection.count())

96


In [164]:
query = "what do they say about ReAct prompting method?"

In [165]:
docs_resp = vectorstore.similarity_search(query=query, k=3)

In [167]:
print(len(docs_resp))
print(docs_resp[0].page_content)
vectorstore.persist()

3
Published as a conference paper at ICLR 2023
appear sparsely in the most relevant positions of a trajectory, so we let the language model decide the
asynchronous occurrence of thoughts and actions for itself.
Since decision making and reasoning capabilities are integrated into a large language model, ReAct
enjoys several unique features: A) Intuitive and easy to design : Designing ReAct prompts is
straightforward as human annotators just type down their thoughts in language on top of their actions
taken. No ad-hoc format choice, thought design, or example selection is used in this paper. We detail
prompt design for each task in Sections 3 and 4. B) General and ﬂexible : Due to the ﬂexible thought
space and thought-action occurrence format, ReAct works for diverse tasks with distinct action
spaces and reasoning needs, including but not limited to QA, fact veriﬁcation, text game, and web
navigation. C) Performant and robust :ReAct shows strong generalization to new task instances
while

## Retrievers

In [168]:
vector_store = Chroma(persist_directory=persist_directory,
                      embedding_function=embeddings)

In [169]:
retriever = vector_store.as_retriever(search_kwargs={"k":2})
docs = retriever.get_relevant_documents("Tell me more about ReAct Prompting")
print(docs[0].page_content)

Published as a conference paper at ICLR 2023
appear sparsely in the most relevant positions of a trajectory, so we let the language model decide the
asynchronous occurrence of thoughts and actions for itself.
Since decision making and reasoning capabilities are integrated into a large language model, ReAct
enjoys several unique features: A) Intuitive and easy to design : Designing ReAct prompts is
straightforward as human annotators just type down their thoughts in language on top of their actions
taken. No ad-hoc format choice, thought design, or example selection is used in this paper. We detail
prompt design for each task in Sections 3 and 4. B) General and ﬂexible : Due to the ﬂexible thought
space and thought-action occurrence format, ReAct works for diverse tasks with distinct action
spaces and reasoning needs, including but not limited to QA, fact veriﬁcation, text game, and web
navigation. C) Performant and robust :ReAct shows strong generalization to new task instances
while l

### RetrievalQA

In [170]:
from langchain.chains import RetrievalQA

In [171]:
qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = retriever,
    verbose = True,
    return_source_documents=True
)

In [172]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [173]:
query = "tell me more about ReAct prompting"
llm_response = qa_chain(query)
print(process_llm_response(llm_response=llm_response))



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
 ReAct prompting is a novel paradigm that combines reasoning and acting in language models for general task solving. It allows human annotators to type down their thoughts in language on top of their actions taken, making it easy and intuitive to design. It is also flexible and general, working for diverse tasks with distinct action spaces and reasoning needs. ReAct is performant and robust, consistently outperforming baselines with only reasoning or acting across different domains. Additionally, ReAct promises an interpretable sequential decision making and reasoning process, where humans can easily inspect reasoning and factual correctness. It is limited in its support of reasoning and acting behaviors, but can potentially improve with additional training data and by combining with other paradigms like reinforcement learning.


Sources:
./data/react-paper.pdf
./data/react-paper.pdf
None


## Agents

### Simple Agents + Tools

In [175]:
from langchain.agents import Tool, initialize_agent, load_tools
from langchain.chains import LLMMathChain

In [174]:
llm = OpenAI(temperature=0.0)

In [178]:
llm_math = LLMMathChain.from_llm(llm=llm)
math_tool = Tool(
    name="Calculator",
    func = llm_math.run,
    description = "useful for when you need to answer questions related to Math."
)

tools = [math_tool]
print(tools[0].name, tools[0].description)


Calculator useful for when you need to answer questions related to Math.


In [181]:
agent = initialize_agent(
    agent = "zero-shot-react-description",
    tools =tools,
    llm=llm,
    verbose=True,
    max_iteration=3
)

In [182]:
print(agent("what is 3.1^2.1"))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should use a calculator to solve this problem
Action: Calculator
Action Input: 3.1^2.1[0m
Observation: [36;1m[1;3mAnswer: 10.761171606099687[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: 10.761171606099687[0m

[1m> Finished chain.[0m
{'input': 'what is 3.1^2.1', 'output': '10.761171606099687'}


In [183]:
tools = load_tools(
    ['llm-math'],
    llm=llm
)

In [184]:
print(tools[0].name, tools[0].description)

Calculator Useful for when you need to answer questions about math.


In [185]:
query = "If James is currently 45 years old, how old will he be in 50 years? \
    If he has 4 kids and adopted 7 more, how many children does he have?"
result = agent(query)
print(result['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m We need to use the calculator to solve this problem.
Action: Calculator
Action Input: 45 + 50[0m
Observation: [36;1m[1;3mAnswer: 95[0m
Thought:[32;1m[1;3m Now we need to add the number of kids he has.
Action: Calculator
Action Input: 4 + 7[0m
Observation: [36;1m[1;3mAnswer: 11[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: James will be 95 years old in 50 years and he will have 11 children.[0m

[1m> Finished chain.[0m
James will be 95 years old in 50 years and he will have 11 children.


### Two Tools

In [186]:
prompt = PromptTemplate(
    input_variables=["query"],
    template="{query}"
)

llm_chain = LLMChain(llm=llm, prompt=prompt)

In [187]:
llm_tool = Tool(
    name="Language Model",
    func = llm_chain.run,
    description = "Use this tool for general queries and logic"
)

In [188]:
tools = load_tools(
    ['llm-math'],
    llm=llm
)

tools.append(llm_tool)

In [189]:
agent = initialize_agent(
    agent="zero-shot-react-description",
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3
)

In [190]:
query = "if I have 54 eggs and Mary has 10, and 5 more people have 12 eggs each.  \
    How many eggs to we have in total?"
    
print(agent.agent.llm_chain.prompt.template)

result = agent(query)
print(result['output'])

Answer the following questions as best you can. You have access to the following tools:

Calculator(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Useful for when you need to answer questions about math.
Language Model(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Use this tool for general queries and logic

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [Calculator, Language Model]
Action Input: the input to the action
Observation: the result of the action

In [191]:
query = "What's the capital of China?"
    
print(agent.agent.llm_chain.prompt.template)

result = agent(query)
print(result['output'])

Answer the following questions as best you can. You have access to the following tools:

Calculator(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Useful for when you need to answer questions about math.
Language Model(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Use this tool for general queries and logic

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [Calculator, Language Model]
Action Input: the input to the action
Observation: the result of the action

### Conversational Agent

In [192]:
from langchain.memory import ConversationBufferMemory

In [193]:
memory = ConversationBufferMemory(memory_key = "chat_history")

In [194]:
conversational_agent = initialize_agent(
    agent = "conversational-react-description",
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    memory=memory
)

In [195]:
query = "How old is a person born in 1917 in 2023"
    
query_two = "How old would that person be if their age is multiplied by 100?"
    
print(conversational_agent.agent.llm_chain.prompt.template)

result = conversational_agent(query)
results = conversational_agent(query_two)
# print(result['output'])

Assistant is a large language model trained by OpenAI.

Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.

Overall, Assistant is a powerful tool that can help with a wide range of tasks 

### Docstore

In [196]:
from langchain import Wikipedia
from langchain.agents.react.base import DocstoreExplorer

In [198]:
docstore = DocstoreExplorer(Wikipedia())

In [199]:
tools = [
    Tool(
        name="Search",
        func=docstore.search,
        description="search wikipedia"
    ),
    Tool(
        name="Lookup",
        func=docstore.lookup,
        description="lookup a term in wikipedia"
    )
]

In [200]:
docstore_agent = initialize_agent(
    tools,
    llm,
    agent="react-docstore",
    verbose=True,
    max_iterations=4
)

In [202]:
query = "What was Seventeen's latest single?"
result = docstore_agent.run(query)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to search Seventeen and find their latest single.
Action: Search[Seventeen][0m



  lis = BeautifulSoup(html).find_all('li')



Observation: [36;1m[1;3mCould not find [Seventeen]. Similar: ['17', 'At Seventeen', 'Seventeen Seconds', 'Seventeen (South Korean band)', 'Going Seventeen', 'Seventeen discography', 'Edge of Seventeen', 'The Edge of Seventeen', 'Going Seventeen (web series)', 'List of Seventeen live performances'][0m
Thought:[32;1m[1;3m Seventeen is a South Korean band, so I need to search Seventeen (South Korean band) and find their latest single.
Action: Search[Seventeen (South Korean band)][0m
Observation: [36;1m[1;3mSeventeen (Korean: 세븐틴; RR: Sebeuntin; stylized in all caps or as SVT) is a South Korean boy band formed by Pledis Entertainment. The group consists of thirteen members: S.Coups, Jeonghan, Joshua, Jun, Hoshi, Wonwoo, Woozi, DK, Mingyu, The8, Seungkwan, Vernon, and Dino. Seventeen has won a large audience since its debut and has grown into an internationally recognized K-pop group with signature music and performances.
The group debuted on May 26, 2015, with the extended play (E

### More agent work

In [203]:
import getpass
from langchain_core.output_parsers import StrOutputParser

In [204]:
model = ChatOpenAI(model="gpt-3.5-turbo")
prompt = ChatPromptTemplate.from_template(template="Create a social media post about {topic}")
parser = StrOutputParser()

chain = prompt | model | parser

In [205]:
chain.invoke({"topic": "generative AI"})

"Just discovered the fascinating world of generative AI! This technology is truly revolutionary, allowing machines to create art, music, and even write stories. The possibilities are endless and I can't wait to see how this technology continues to evolve. #GenerativeAI #ArtificialIntelligence #Innovation"

In [206]:
from langchain import hub
from langchain.agents import create_openai_functions_agent
from langchain.agents import AgentExecutor
from langchain.tools import DuckDuckGoSearchRun

In [209]:
llm_math_chain = LLMMathChain.from_llm(llm=model, verbose=True)
math_tool = Tool.from_function(
    func = llm_math_chain.run,
    name="Calculator",
    description="useful for when you need to enter questions about math. Only for math questions and nothing else."
)

search_tool = DuckDuckGoSearchRun()

tools = [math_tool, search_tool]

In [210]:
prompt = hub.pull("hwchase17/openai-functions-agent")
agent = create_openai_functions_agent(llm=model, tools=tools, prompt=prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
result = agent_executor.invoke({"input": "what is the tallest building in the world and how many times would you need to stack this building to reach the moon?"})
print(result)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe tallest building in the world is the Burj Khalifa in Dubai, United Arab Emirates, which stands at a height of 828 meters (2,717 feet).

The average distance from the Earth to the moon is about 384,400 kilometers (238,855 miles). To calculate how many times you would need to stack the Burj Khalifa to reach the moon, we can use the following calculation:

1 kilometer = 1000 meters
384,400 kilometers * 1000 = 384,400,000 meters

384,400,000 meters / 828 meters (height of the Burj Khalifa) = approximately 464,251.21

Therefore, you would need to stack the Burj Khalifa approximately 464,251 times to reach the moon.[0m

[1m> Finished chain.[0m
{'input': 'what is the tallest building in the world and how many times would you need to stack this building to reach the moon?', 'output': 'The tallest building in the world is the Burj Khalifa in Dubai, United Arab Emirates, which stands at a height of 828 meters (2,717 feet).\n\nTh

## Document Chain Types  

In [213]:
import textwrap
from langchain import PromptTemplate, LLMChain
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter

In [215]:
model = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0.5)

### Stuff Chain

In [223]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("./data/Software-Engineer-CV.pdf")
docs = loader.load()

In [224]:
cnt = 0
for doc in docs:
    cnt = cnt+1
    print("Document No",cnt)
    print(doc.page_content.strip())

Document No 1
Name: Sunil Sharma                              Mobile: +91 9898989898  
 
Designation: Senior Technical Lead                      Mail Id: sunil.sharma @gmail.com  
 
Objective:   
Experienced S enior Software Developer with 1 2 years of hands -on expertise in 
designing, developing, and delivering high -quality software solutions.  
Proven track record of successfully leading and collaborating with cross -functional 
teams to deliver projects on time and within budget. Seeking to leverage my technical 
skills and leadership experience to contribute to innovative software projects.  
Education:  
Bachelor in Engineering in Electronics and Communication  
K.L.N.  College of Information Technology, Madurai - 2007  
Professional Summary:  
• 12 years  of experience in Software Development in C on  Linux Environment . 
• Over 5 years of programming  experience as an Oracle PL/SQL  developer in 
Analysis, Design and Implementation of business application using Oracle DBMS . 


In [225]:
prompt_template = """
You are given a resume as the below text.
________
{text}
________
Question: Please respond with the Key Skills and Experience summary of the person.
Key Skills:
Experience Summary:
"""

In [227]:
prompt = PromptTemplate(template=prompt_template, input_variables=["text"])

stuff_chain = load_summarize_chain(model, chain_type="stuff", prompt=prompt)
output_summary = stuff_chain.run(docs)

In [228]:
print(output_summary)


Key Skills: 
- 12 years of experience in software development on Linux environment
- Programming languages: PL/SQL, C, Pro C, Shell scripting
- Database management: Oracle
- Operating system: Linux
- Version control: GIT, TFS, CVS
- Tools: PL/SQL developer, JIRA, Confluence, Visual studio, GDB, Mercurial, Spirent Test Centre (STC), Wireshark
- Leadership and team collaboration

Experience Summary:
- 12 years of experience in software development
- Proven track record of successfully leading and collaborating with cross-functional teams
- Expertise in all stages of Software Development Life Cycle
- Experience with Oracle PL/SQL development and analysis, design, and implementation of business applications
- Proficient in using table functions, indexes, table partitioning, collections, analytical functions, and materialized views
- Skilled in creating tables, views, constraints, and indexes
- Developed complex database objects such as packages, procedures, functions, and triggers using P

### Refine Chain

In [230]:
refine_Chain = load_summarize_chain(model, chain_type="refine")
print(refine_Chain.refine_llm_chain.prompt.template)

Your job is to produce a final summary.
We have provided an existing summary up to a certain point: {existing_answer}
We have the opportunity to refine the existing summary (only if needed) with some more context below.
------------
{text}
------------
Given the new context, refine the original summary.
If the context isn't useful, return the original summary.


In [231]:
output_summary = refine_Chain.run(docs)
output_summary

"\n\nSunil Sharma is a highly experienced Senior Technical Lead with a Bachelor's degree in Engineering and 12 years of experience in software development. He has a strong track record of successfully leading cross-functional teams and delivering projects on time and within budget. Sunil is skilled in all stages of the software development life cycle and has expertise in Oracle PL/SQL development. He has worked in various organizations, including HCL Technologies, Nokia Networks, and Plintron Global Technology Solutions Pvt. Ltd. Sunil's skills include programming languages such as PL/SQL, C, Pro C, and shell scripting, as well as database management using Oracle and operating systems like Linux. He is also proficient in version control tools such as GIT, TFS, and CVS, and has experience with various software development tools. Sunil has experience in leading offshore development activities and handling a PL SQL development team. He is an expert in solving complex SQL problems and crea

### Map-Reduce Chain

In [232]:
map_reduce_chain = load_summarize_chain(model, chain_type="map_reduce", verbose=True)

In [233]:
print(map_reduce_chain.llm_chain.prompt.template)

Write a concise summary of the following:


"{text}"


CONCISE SUMMARY:


In [234]:
output_summary = map_reduce_chain.run(docs)
print(output_summary)



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"        
                                                 
Name: Sunil Sharma                              Mobile: +91 9898989898  
 
Designation: Senior Technical Lead                      Mail Id: sunil.sharma @gmail.com  
 
Objective:   
Experienced S enior Software Developer with 1 2 years of hands -on expertise in 
designing, developing, and delivering high -quality software solutions.  
Proven track record of successfully leading and collaborating with cross -functional 
teams to deliver projects on time and within budget. Seeking to leverage my technical 
skills and leadership experience to contribute to innovative software projects.  
Education:  
Bachelor in Engineering in Electronics and Communication  
K.L.N.  College of Information Technology, Madurai - 2007  
Professional Summary:  
• 12 year