In [None]:
! pip install langchain-openai
! pip install beautifulsoup4 # scraper
! pip install faiss-cpu      # vector storage and retrieval

In [2]:
import os
# os.environ["OPENAI_API_KEY"] = "sk-<your-api-key>"
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

In [None]:
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS

from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.documents import Document
from langchain_core.messages import HumanMessage, AIMessage

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain, create_history_aware_retriever

from langchain.text_splitter import RecursiveCharacterTextSplitter 

Example with OpenAI LLM

In [None]:
llm = ChatOpenAI()
llm.invoke("what is docker and how is it useful for deployment") # return the response from the model

# creating a prompt template
prompt = ChatPromptTemplate.from_messages([ 
    ("system", "You are an English-French translator that returns whatever the user says in French."),
    ("user", "{input}"),
]) 

# create chain using prompt
chain = prompt | llm
chain.invoke({ 
    "input": "i enjoy going to rock concerts" # var: input. Can be multiple.
    }) 

# NOTE output is AIMessage(content="j'aime aller aux concerts de rock")

# add output parser: defines output structure
output_parser = StrOutputParser()
chain = prompt | llm | output_parser

chain.invoke({"input": "my friend robert had a blue cat"}) # return the response from the model
# NOTE output now is "mon ami robert a un chat bleu"

In [None]:
# initialize webpage loader
loader = WebBaseLoader("url") # url of webpage you want to use
docs = loader.load() # load the documents from the webpage

Question-asking from documents

In [None]:
# embedding model to add info to vector store
embeddings = OpenAIEmbeddings()

# create vector store from documents
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split(docs) # split the documents into smaller parts
vectorstore = FAISS.from_documents(documents, embeddings) # create vector store from documents

# create prompt template for retrieval chain
template = """ Answer the following quesion based only on the provided context:
<context>
{context}
</context>

Question: {input}
"""
prompt = ChatPromptTemplate.from_template(template)

# create document chain from llm and prompt
document_chain = create_stuff_documents_chain(llm, prompt)
document_chain.invoke({
    "input": "What is langchain 0.1.0?",
    "context": [Document(page_content="langchain 0.1.0 is a new version of langchain")]
})
# NOTE context is hardcoded. No usage of documents yet. 

# create retrieval chain WITH documents
retriever = vectorstore.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)
# NOTE retrieval chain retrieves most important documents based on the input

# get reponse from retrieval chain
response = retrieval_chain.invoke({
    "input": "What is new in langchain 0.1.0?"
})
# returns repsonse from the model including input, context and answer

response['answer'] # returns the answer only

Conversation with memory

In [None]:
# conversational retrieval chain; uses previous context
prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"), # use history if it exists
    ("user", "{input}"), 
    ("user", "Given the above conversation, generate a search query to get above information")
])
retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

# create mock history conversation
chat_history = [
    HumanMessage(content="Is there anything new about langchain 0.1.0?"),
    AIMessage(content="yes.")
]

retriever_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me more about it."
})

# NOTE this returns multiple snippets matching the context, metadata, the input etc.

# return only the answer
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer the questions based on the below context:\n\n{context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}")
])

document_chain = create_stuff_documents_chain(llm, prompt)
conversational_retrieval_chain = create_retrieval_chain(retriever_chain, document_chain)

# get response based on earlier conversation
response = conversational_retrieval_chain.invoke({
    "chat_history": chat_history,
    "input": "Tell me more about it?"
})