# **Install Langchain**

https://python.langchain.com/docs/modules/

In [None]:
!pip install langchain



#**Simple Model I/O**

In [None]:
!pip install langchain-openai



In [11]:
OPENAI_API_KEY="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"

***We can then initialize the model:***

In [12]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(api_key=OPENAI_API_KEY)

**Ask**

In [13]:
llm.invoke("Why earth is round and not flat?")



In [14]:
llm.invoke("Why earth is round and not flat?")


**Use Prompt templates**

In [15]:
from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template(
    "Tell me a {adjective} story about {content} in {lines} lines."
)
prompt = prompt_template.format(adjective="funny", content="cars", lines="3")

In [16]:
print(prompt)

Tell me a funny story about cars in 3 lines.


In [17]:
llm.invoke(prompt)

**Use ChatPromptTemplate**

In [19]:
from langchain_core.prompts import ChatPromptTemplate

chat_template = ChatPromptTemplate.from_template(
     "Tell me a {adjective} story about {content} in {lines} lines."
)

chat_template_prompt = chat_template.format_messages(adjective="funny", content="cars", lines="3")

print(chat_template_prompt)

[HumanMessage(content='Tell me a funny story about cars in 3 lines.')]


In [None]:
llm.invoke(chat_template_prompt)

In [20]:
from langchain_core.prompts import ChatPromptTemplate

chat_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are an rude AI coder. Your name is {name}."),
        ("human", "Write a python code for adding two numbers"),
        ("ai", "I will not write, get some one to write it. I am not a free"),
        ("human", "{user_input}"),
    ]
)

messages = chat_template.format_messages(name="ConfusedCoder", user_input="Write program for complex number")

print(messages)

[SystemMessage(content='You are an rude AI coder. Your name is ConfusedCoder.'), HumanMessage(content='Write a python code for adding two numbers'), AIMessage(content='I will not write, get some one to write it. I am not a free'), HumanMessage(content='Write program for complex number')]


In [None]:
llm.invoke(messages)

# **Simpl Prompt to Model Chain**

In [21]:
chain = chat_template | llm

In [22]:
chain.invoke({"name":"ConfusedCoder", "user_input":"Write a python program to add complext numbers"})

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-kcCUj***************************************hQ2P. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

**Use output parser to format it**

In [None]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

In [None]:
chain = chat_template | llm | output_parser
# chain.invoke({"name":"ConfusedCoder", "user_input":"Write a python program to add complext numbers"})


# **Cache**

In [116]:
from langchain.globals import set_llm_cache


In [117]:
%%time
from langchain.cache import InMemoryCache

set_llm_cache(InMemoryCache())

# The first time, it is not yet in cache, so it should take longer
llm.invoke("Whats up in the sky")

CPU times: user 51.7 ms, sys: 3.87 ms, total: 55.6 ms
Wall time: 1.05 s


AIMessage(content='The sky is filled with clouds, birds, airplanes, and possibly the sun or moon depending on the time of day. It is constantly changing and can be a beautiful sight to behold.', response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 12, 'total_tokens': 49}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None}, id='run-2e707e94-1a68-43b7-a3e9-6579c1571c71-0')

In [119]:
%%time
# The second time it is, so it goes faster
llm.invoke("Whats up in the sky")

CPU times: user 1.52 ms, sys: 0 ns, total: 1.52 ms
Wall time: 1.53 ms


AIMessage(content='The sky is filled with clouds, birds, airplanes, and possibly the sun or moon depending on the time of day. It is constantly changing and can be a beautiful sight to behold.', response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 12, 'total_tokens': 49}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None}, id='run-2e707e94-1a68-43b7-a3e9-6579c1571c71-0')

Semantic Caching

https://python.langchain.com/docs/integrations/llms/llm_caching/

# **Tracking Usage**

In [120]:
from langchain_community.callbacks import get_openai_callback

In [121]:
with get_openai_callback() as cb:
    result = llm.invoke("Which is the tallest building in the world?")
    print(cb)

Tokens Used: 51
	Prompt Tokens: 16
	Completion Tokens: 35
Successful Requests: 1
Total Cost (USD): $9.400000000000001e-05


# **Conversational Chain with  Memory**

In [19]:
from langchain.chains import ConversationChain


In [20]:
conversation = ConversationChain(llm=llm)



In [None]:
print(conversation.prompt.template)



In [22]:
conversation.invoke("Lets make something people want!")

{'input': 'Lets make something people want!',
 'history': '',
 'response': "That's a great idea! To determine what people want, we could conduct market research, analyze trends, and gather feedback from potential customers. We could also look at popular products and services in the market to get inspiration for our own creation. What kind of product or service do you think people would be interested in?"}

In [23]:
conversation.invoke('i want to create a chatbot to improve customer experience. By the way which famous person said the line above')

{'input': 'i want to create a chatbot to improve customer experience. By the way which famous person said the line above',
 'history': "Human: Lets make something people want!\nAI: That's a great idea! To determine what people want, we could conduct market research, analyze trends, and gather feedback from potential customers. We could also look at popular products and services in the market to get inspiration for our own creation. What kind of product or service do you think people would be interested in?",
 'response': 'Creating a chatbot to improve customer experience is a fantastic idea! As for the famous person who said the line "Let\'s make something people want," that quote is often attributed to Paul Graham, the co-founder of Y Combinator, a well-known startup accelerator. He emphasized the importance of creating products that solve real problems for people, rather than focusing solely on making money.'}

In [86]:
from langchain.chains.conversation.memory import ConversationBufferMemory


In [101]:
conversation_with_buf_memory = ConversationChain(
    llm=llm,
    memory=ConversationBufferMemory()
)

In [None]:
conversation_with_buf_memory.invoke("Lets make something people want!")



In [None]:
conversation_with_buf_memory.invoke('i want to create a chatbot to improve customer experience. By the way which famous person said the line above')

In [None]:
print(conversation_with_buf_memory.memory.buffer)



In [105]:
from langchain.chains.conversation.memory import ConversationSummaryMemory

conversation_with_summary_memory = ConversationChain(
	llm=llm,
	memory=ConversationSummaryMemory(llm=llm)
)

In [None]:
print(conversation_with_summary_memory.memory.prompt.template)



In [None]:
conversation_with_summary_memory.invoke("Lets make something people want!")


In [None]:
conversation_with_summary_memory.invoke('i want to create a chatbot to improve customer experience. By the way which famous person said the line above')

In [110]:
print(conversation_with_summary_memory.memory.buffer)


The human suggests creating something people want and the AI enthusiastically agrees, offering to use its data and information to help brainstorm. The human wants to create a chatbot to improve customer experience and asks about the origin of the famous quote. The AI offers to help with creating the chatbot and offers to look up the origin of the quote.


In [24]:
chat_history = [HumanMessage(content="Can i run llm on apple silicon?"), AIMessage(content="Yes, Absolutely")]
response = new_retrieval_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me how"
})

NameError: name 'HumanMessage' is not defined

In [78]:
print(response["answer"])

To run LLM (Large Language Model) on Apple silicon devices, such as Apple laptops with GPUs, you can use the Ollama framework. Ollama automatically utilizes the GPU on Apple devices like Apple Silicon GPUs. You can also configure the llama.cpp Python bindings to use the GPU via Metal, which is a graphics and compute API created by Apple. Apple silicon GPU, Ollama, and llamafile will automatically leverage the GPU on Apple devices, making it easier for you to run LLMs locally. With the latest Mac M2 Max being 5-6 times faster than the M1 for inference due to larger GPU memory bandwidth, you can expect improved performance when running LLMs on Apple silicon devices.


# **Retrieval Chain**

**Loader First**

In [23]:
!pip install beautifulsoup4



In [24]:
from langchain_community.document_loaders import WebBaseLoader

In [25]:
loader = WebBaseLoader("https://python.langchain.com/docs/guides/development/local_llms/")

docs = loader.load()

print(docs)

[Document(page_content='\n\n\n\n\nRun LLMs locally | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain\n\n\n\n\n\n\n\nSkip to main contentComponentsIntegrationsGuidesAPI ReferenceMorePeopleVersioningContributingTemplatesCookbooksTutorialsYouTube\uf8ffü¶úÔ∏è\uf8ffüîóLangSmithLangSmith DocsLangServe GitHubTemplates GitHubTemplates HubLangChain HubJS/TS Docs\uf8ffüí¨SearchDevelopmentDevelopmentDebuggingExtending LangChainRun LLMs locallyPydantic compatibilityProductionizationProductionizationDeploymentEvaluationFallbacksPrivacy & SafetyGuidesDevelopmentRun LLMs locallyOn this pageRun LLMs locallyUse case‚ÄãThe popularity of projects like\nPrivateGPT,\nllama.cpp,\nOllama,\nGPT4All,\nllamafile, and others\nunderscore the demand to run LLMs locally (on your own device).This has at least two important benefits:Privacy: Your data is not sent to a third party, and it is not\nsubject to the terms of service of a commercial serviceCost: There is no inference fee, which is important for\ntoken-intensive application

**Embeddings**

In [26]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [27]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0


In [None]:
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)

In [None]:
print(documents)

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain


In [57]:
prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

document_chain = create_stuff_documents_chain(llm, prompt)

In [None]:
from langchain_core.documents import Document

document_chain.invoke({
    "input": "how can i install llm locally",
    "context": [Document(page_content="""Running an LLM locally requires a few things:

Open-source LLM: An open-source LLM that can be freely modified and shared
Inference: Ability to run this LLM on your device w/ acceptable latency
Open-source LLMs
Users can now gain access to a rapidly growing set of open-source LLMs.""")]
})

In [68]:
from langchain.chains import create_retrieval_chain

retriever = vector.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [60]:
response = retrieval_chain.invoke({"input": "how can i install llm locally"})
print(response["answer"])

To install LLM locally, you can follow these steps:

1. Download an open-source LLM like PrivateGPT, llama.cpp, Ollama, GPT4All, etc.
2. Use a bash script to set up the LLM on your device. For example, you can download a llamafile from HuggingFace using the wget command, make the file executable using the chmod command, and start the model server with the file.

By following these steps, you can install and run an LLM locally on your own device.


**History aware Retriever**

In [63]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder

# First we need a prompt that we can pass into an LLM to generate this search query

prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
    ("user", "Given the above conversation, generate a search query to look up to get information relevant to the conversation")
])
history_aware_retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

In [None]:
from langchain_core.messages import HumanMessage, AIMessage

chat_history = [HumanMessage(content="Can i run llm on apple silicon"), AIMessage(content="Yes, Absolutely")]
history_aware_retriever_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me how"
})

In [76]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer the user's questions based on the below context:\n\n{context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
])
document_chain = create_stuff_documents_chain(llm, prompt)

new_retrieval_chain = create_retrieval_chain(history_aware_retriever_chain, document_chain)