Simple GenAI App using Langchain and OpenAI

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
#Langsmith Tracking
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')

In [1]:
### Data Ingestion - From the web page to scrape the data
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://docs.langchain.com/langsmith/set-up-a-workspace")
loader

USER_AGENT environment variable not set, consider setting it to identify your requests.


<langchain_community.document_loaders.web_base.WebBaseLoader at 0x1e79c2cc070>

In [2]:
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/set-up-a-workspace', 'title': 'Set up a workspace - Docs by LangChain', 'language': 'en'}, page_content='Set up a workspace - Docs by LangChainOur new LangChain Academy course on Deep Agents is now live! Enroll for free.Docs by LangChain home pagePythonSearch...⌘KLangSmithPlatform for LLM observability and evaluationSetupOverviewCreate an account and API keySet up a workspaceManage organizations using the APIManage billingSet up resource tagsUser managementAdditional resourcesFAQsCloud architecture and scalabilityRegions FAQAuthentication methodsData purging for complianceRelease versionsOur new LangChain Academy course on Deep Agents is now live! Enroll for free.Docs by LangChain home pagePythonSearch...⌘KGitHubForumForumSearch...NavigationSetupSet up a workspaceGet startedObservabilityEvaluationPrompt engineeringSelf-hostingAdministrationGet startedObservabilityEvaluationPrompt engineeringSelf-hostingAdministrationGi

In [3]:
##  Divide text into chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

documents = text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/set-up-a-workspace', 'title': 'Set up a workspace - Docs by LangChain', 'language': 'en'}, page_content='Set up a workspace - Docs by LangChainOur new LangChain Academy course on Deep Agents is now live! Enroll for free.Docs by LangChain home pagePythonSearch...⌘KLangSmithPlatform for LLM observability and evaluationSetupOverviewCreate an account and API keySet up a workspaceManage organizations using the APIManage billingSet up resource tagsUser managementAdditional resourcesFAQsCloud architecture and scalabilityRegions FAQAuthentication methodsData purging for complianceRelease versionsOur new LangChain Academy course on Deep Agents is now live! Enroll for free.Docs by LangChain home pagePythonSearch...⌘KGitHubForumForumSearch...NavigationSetupSet up a workspaceGet startedObservabilityEvaluationPrompt engineeringSelf-hostingAdministrationGet startedObservabilityEvaluationPrompt engineeringSelf-hostingAdministrationGi

In [4]:
# Embeddings
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()


In [5]:
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(documents, embeddings)

In [None]:
#Query from a vector store db

query = "How to delete a workspace?"
result = db.similarity_search(query)
result[0].page_content

'\u200bDelete a workspace\nDeleting a workspace will permanently delete the workspace and all associated data. This action cannot be undone.\nYou can delete a workspace through the LangSmith UI or via API. You must be a workspace Admin in order to delete a workspace.\n\u200bDelete a workspace via the UI\n\nNavigate to Settings.\nSelect the workspace you want to delete.\nClick Delete in the top-right corner of the screen.\n\nWas this page helpful?YesNoCreate an account and API keyManage organizations using the APIAssistantResponses are generated using AI and may contain mistakes.Docs by LangChain home pagegithubxlinkedinyoutubeResourcesChangelogLangChain AcademyTrust CenterCompanyAboutCareersBloggithubxlinkedinyoutubePowered by Mintlify'

In [7]:
#Create llm
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-4o")

In [8]:
#Retrieval chain - ask question by providing context
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided context:
    <context>
    {context}
    </context>
    """
)

document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000001E7B0768B20>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001E7B0769870>, root_client=<openai.OpenAI object at 0x000001E7B076A890>, root_async_client=<openai.AsyncOpenAI object at 0x000001E7B0769210>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), 

In [10]:
from langchain_core.documents import Document

document_chain.invoke({
    "input" : "How to delete a workspace?",
    "context" : [Document(page_content="You can delete a workspace through the LangSmith UI or via API. You must be a workspace Admin in order to delete a workspace.")]
})

'To delete a workspace through the LangSmith UI or via API, you must be a workspace Admin.'

However we want the documents to forst come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question

In [11]:
# Retriever = can be considered as an interface, if anybody asks a question, it will go to the vector store and get the relevant documents

retriever = db.as_retriever()

from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)


In [12]:
# Get the response from the LLM
response = retrieval_chain.invoke({
    "input" : "How to delete a workspace?"
})

response['answer']

'What is required to delete a workspace, and what steps must be taken via the UI according to the provided context? \n\nTo delete a workspace, you must be a workspace Admin. Here are the steps to delete a workspace via the UI:\n\n1. Navigate to Settings.\n2. Select the workspace you want to delete.\n3. Click Delete in the top-right corner of the screen. \n\nNote that deleting a workspace is a permanent action and cannot be undone.'