### Gen AI Application

Simple GenAI App using Langchain

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['LANGCHAIN_KEY'] = os.getenv('LANGCHAIN_KEY')
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv('LANGCHAIN_PROJECT')

In [3]:
# Data Ingestion
# Scrape the data from the website

from langchain_community.document_loaders import WebBaseLoader

In [10]:
# loader = WebBaseLoader("https://docs.smith.langchain.com/")
# loader = WebBaseLoader("https://docs.smith.langchain.com/tutorial/Administrators/manage_speed")
loader = WebBaseLoader("https://docs.smith.langchain.com/evaluation/how_to_guides")



In [11]:
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x15fb0457880>

In [12]:
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/evaluation/how_to_guides', 'title': 'Evaluation how-to guides | 🦜️🛠️ LangSmith', 'description': 'These guides answer “How do I…?” format questions.', 'language': 'en'}, page_content="\n\n\n\n\nEvaluation how-to guides | 🦜️🛠️ LangSmith\n\n\n\n\n\n\nSkip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationQuick StartTutorialsEvaluate a chatbotEvaluate a RAG applicationRun backtests on a new version of an agentRunning SWE-bench with LangSmithEvaluate a complex agentTest a ReAct agent with Pytest/Vitest and LangSmithHow-to GuidesAnalyze a single experimentLog user feedbackHow to run an evaluationHow to manage datasets in the UIRenaming an experimentHow to bind an evaluator to a dataset in the UIHow to manage datasets programmaticallyHow to run an evaluation from the prompt playgroundSet 

In [None]:
docs[0].metadata

{'source': 'https://docs.smith.langchain.com/evaluation/how_to_guides',
 'title': 'Evaluation how-to guides | 🦜️🛠️ LangSmith',
 'description': 'These guides answer “How do I…?” format questions.',
 'language': 'en'}

In [17]:
docs[0].page_content

"\n\n\n\n\nEvaluation how-to guides | 🦜️🛠️ LangSmith\n\n\n\n\n\n\nSkip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationQuick StartTutorialsEvaluate a chatbotEvaluate a RAG applicationRun backtests on a new version of an agentRunning SWE-bench with LangSmithEvaluate a complex agentTest a ReAct agent with Pytest/Vitest and LangSmithHow-to GuidesAnalyze a single experimentLog user feedbackHow to run an evaluationHow to manage datasets in the UIRenaming an experimentHow to bind an evaluator to a dataset in the UIHow to manage datasets programmaticallyHow to run an evaluation from the prompt playgroundSet up feedback criteriaAnnotate traces and runs inlineHow to evaluate an application's intermediate stepsHow to version a datasetUse annotation queuesHow to share or unshare a dataset publiclyHow to use off-the-shelf evaluators (Python only)How to c

In [18]:
# Load data ---> Docs ---> Divide text to chunks ---> Convert text to vectors using vector embedding ---> Store in Vectorstore db

from langchain_text_splitters import RecursiveCharacterTextSplitter

In [19]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

documents = text_splitter.split_documents(docs)

In [20]:
documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/evaluation/how_to_guides', 'title': 'Evaluation how-to guides | 🦜️🛠️ LangSmith', 'description': 'These guides answer “How do I…?” format questions.', 'language': 'en'}, page_content='Evaluation how-to guides | 🦜️🛠️ LangSmith'),
 Document(metadata={'source': 'https://docs.smith.langchain.com/evaluation/how_to_guides', 'title': 'Evaluation how-to guides | 🦜️🛠️ LangSmith', 'description': 'These guides answer “How do I…?” format questions.', 'language': 'en'}, page_content="Skip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationQuick StartTutorialsEvaluate a chatbotEvaluate a RAG applicationRun backtests on a new version of an agentRunning SWE-bench with LangSmithEvaluate a complex agentTest a ReAct agent with Pytest/Vitest and LangSmithHow-to GuidesAnalyze a single experimentLog user 

In [21]:
from langchain_openai import OpenAIEmbeddings

In [22]:
embeddings = OpenAIEmbeddings()

In [23]:
from langchain_community.vectorstores import FAISS

In [24]:
vectorstoredb = FAISS.from_documents(documents, embeddings)

In [25]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x15fd1532170>

In [28]:
# Create LLM model
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o")

In [None]:
# Query from vectorstore db
uery = " LLMs are non-deterministic by nature, meaning they can produce unexpected results"
query1 = "Evaluate your application over production traffic"

results = vectorstoredb.similarity_search(query1)
results[0].page_content

'Annotation queues and human feedback\u200b\nCollect feedback from subject matter experts and users to improve your applications.\n\nUse annotation queues\nLog user feedback\nSet up a new feedback criteria\nAnnotate traces inline in the UI\nAudit and correct evaluator scores\nWas this page helpful?You can leave detailed feedback on GitHub.PreviousTest a ReAct agent with Pytest/Vitest and LangSmithNextAnalyze a single experimentKey featuresOffline evaluationRun an evaluationDefine an evaluatorConfigure the evaluation dataConfigure an evaluation jobTesting integrationsOnline evaluationAutomatic evaluationAnalyzing experiment resultsDataset managementAnnotation queues and human feedbackCommunityDiscordTwitterGitHubDocs CodeLangSmith SDKPythonJS/TSMoreHomepageBlogLangChain Python DocsLangChain JS/TS DocsCopyright © 2025 LangChain, Inc.'

In [29]:
# Retrieval chain, Document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
Answer the following questions based only on the provided context:
<context>
{context}
</context>
"""
)

document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following questions based only on the provided context:\n<context>\n{context}\n</context>\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000016025DD84F0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000016025D57DF0>, root_client=<openai.OpenAI object at 0x0000016025DD8040>, root_async_client=<openai.AsyncOpenAI object at 0x0000016025DD96F0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={

In [30]:
from langchain_core.documents import Document

In [33]:
document_chain.invoke({
    "input": "Evaluate your application over production traffic",
    "context": [Document(page_content="Evaluate your application over production traffic")]
})

'To evaluate your application over production traffic, you could consider conducting a performance assessment by gathering and analyzing data on how your application performs under real-world conditions. This involves monitoring metrics such as response time, error rates, system load, and user satisfaction. Additionally, you might want to simulate different load scenarios to see how the application handles peak usage. Doing this helps identify any bottlenecks, inefficiencies, or areas that require optimization to ensure the application performs reliably and efficiently for users.'

In [34]:
# Retrievers
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x15fd1532170>

In [35]:
# Input ---> Retriever ---> Vectorstoredb

retriever = vectorstoredb.as_retriever()

from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [36]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000015FD1532170>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following questions based only on the provided context:\n<context>\n{context}\n</context>\n'), additional_kwargs={})])
            | Cha

In [37]:
# Get the response from the LLM

response = retrieval_chain.invoke({"input": "Evaluate your application over production traffic"})
response['answer']

'1. What are some methods mentioned for collecting feedback to improve applications?\n\nYou can collect feedback by using annotation queues and logging user feedback. Additionally, setting up new feedback criteria and annotating traces inline in the UI are mentioned methods.\n\n2. How can you manage datasets in the context provided?\n\nDatasets can be managed by creating them with the SDK or from the UI, running offline and online evaluations, analyzing evaluation results in the UI, and logging user and expert feedback.\n\n3. What are some evaluation strategies mentioned in the context?\n\nThe context mentions online evaluation, offline evaluation, automatic evaluation, setting up LLM-as-judge and custom code evaluators, creating a few-shot evaluator, and auditing and correcting evaluator scores.\n\n4. What are some features related to analyzing experiments?\n\nYou can analyze a single experiment, compare experiments with the comparison view, filter experiments, view pairwise experimen

In [38]:
response

{'input': 'Evaluate your application over production traffic',
 'context': [Document(id='70b17ada-411c-4ad8-b820-79a82d901a21', metadata={'source': 'https://docs.smith.langchain.com/evaluation/how_to_guides', 'title': 'Evaluation how-to guides | 🦜️🛠️ LangSmith', 'description': 'These guides answer “How do I…?” format questions.', 'language': 'en'}, page_content='Annotation queues and human feedback\u200b\nCollect feedback from subject matter experts and users to improve your applications.\n\nUse annotation queues\nLog user feedback\nSet up a new feedback criteria\nAnnotate traces inline in the UI\nAudit and correct evaluator scores\nWas this page helpful?You can leave detailed feedback on GitHub.PreviousTest a ReAct agent with Pytest/Vitest and LangSmithNextAnalyze a single experimentKey featuresOffline evaluationRun an evaluationDefine an evaluatorConfigure the evaluation dataConfigure an evaluation jobTesting integrationsOnline evaluationAutomatic evaluationAnalyzing experiment resul

In [39]:
response['context']

[Document(id='70b17ada-411c-4ad8-b820-79a82d901a21', metadata={'source': 'https://docs.smith.langchain.com/evaluation/how_to_guides', 'title': 'Evaluation how-to guides | 🦜️🛠️ LangSmith', 'description': 'These guides answer “How do I…?” format questions.', 'language': 'en'}, page_content='Annotation queues and human feedback\u200b\nCollect feedback from subject matter experts and users to improve your applications.\n\nUse annotation queues\nLog user feedback\nSet up a new feedback criteria\nAnnotate traces inline in the UI\nAudit and correct evaluator scores\nWas this page helpful?You can leave detailed feedback on GitHub.PreviousTest a ReAct agent with Pytest/Vitest and LangSmithNextAnalyze a single experimentKey featuresOffline evaluationRun an evaluationDefine an evaluatorConfigure the evaluation dataConfigure an evaluation jobTesting integrationsOnline evaluationAutomatic evaluationAnalyzing experiment resultsDataset managementAnnotation queues and human feedbackCommunityDiscordTwi