## Simple Gen AI App Langchain

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')

os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')

#### Data Ingestion -- from the website we need to scrap the data

In [3]:
from langchain_community.document_loaders import WebBaseLoader

In [4]:
loader = WebBaseLoader("https://docs.smith.langchain.com/?_gl=1*1x0axhw*_gcl_au*MTYzOTI2MzQ3OC4xNzU0NjU0MDE2*_ga*MjExOTk5MTA2Ni4xNzU0NjU0MDE2*_ga_47WX3HKKY2*czE3NTUzMzg1ODQkbzgkZzEkdDE3NTUzMzkwMDkkajUyJGwwJGgw")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x1b50a46bad0>

In [6]:
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/?_gl=1*1x0axhw*_gcl_au*MTYzOTI2MzQ3OC4xNzU0NjU0MDE2*_ga*MjExOTk5MTA2Ni4xNzU0NjU0MDE2*_ga_47WX3HKKY2*czE3NTUzMzg1ODQkbzgkZzEkdDE3NTUzMzkwMDkkajUyJGwwJGgw', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content="\n\n\n\n\nGet started with LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\n\nSkip to main contentOur new LangChain Academy Course Deep Research with LangGraph is now live! Enroll for free.API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceGet StartedOn this pageGet started with LangSmith\nLangSmith is a platform for building production-gra

## Steps-
##### Load data ---> Docs ---> divide text into chunks (becasue everey LLMs have limitation to context size)---> Vectors --> vector embedding ---> vectorStore DB

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/?_gl=1*1x0axhw*_gcl_au*MTYzOTI2MzQ3OC4xNzU0NjU0MDE2*_ga*MjExOTk5MTA2Ni4xNzU0NjU0MDE2*_ga_47WX3HKKY2*czE3NTUzMzg1ODQkbzgkZzEkdDE3NTUzMzkwMDkkajUyJGwwJGgw', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Get started with LangSmith | 🦜️🛠️ LangSmith'),
 Document(metadata={'source': 'https://docs.smith.langchain.com/?_gl=1*1x0axhw*_gcl_au*MTYzOTI2MzQ3OC4xNzU0NjU0MDE2*_ga*MjExOTk5MTA2Ni4xNzU0NjU0MDE2*_ga_47WX3HKKY2*czE3NTUzMzg1ODQkbzgkZzEkdDE3NTUzMzkwMDkkajUyJGwwJGgw', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Skip to main contentOur new LangChain Academy Course Deep Research with LangGraph is now live! Enroll for free.API ReferenceRESTPythonJS/TSSearchRegionUSEUGo

##### Emedding

In [8]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

In [9]:
from langchain_community.vectorstores import FAISS
vectorStoreDB = FAISS.from_documents(documents,embeddings)
vectorStoreDB

<langchain_community.vectorstores.faiss.FAISS at 0x1b5231b8e90>

##### Query from a vector DB

In [11]:
query = "AI applications involve writing prompts to instruct "
result = vectorStoreDB.similarity_search(query)
result[0].page_content

'Get started by adding tracing to your application.\nCreate dashboards to view key metrics like RPS, error rates and costs.\n\nEvals\u200b\nThe quality and development speed of AI applications depends on high-quality evaluation datasets and metrics to test and optimize your applications on. The LangSmith SDK and UI make building and running high-quality evaluations easy.\n\nGet started by creating your first evaluation.\nQuickly assess the performance of your application using our off-the-shelf evaluators as a starting point.\nAnalyze results of evaluations in the LangSmith UI and compare results over time.\nEasily collect human feedback on your data to improve your application.\n\nPrompt Engineering\u200b\nWhile traditional software applications are built by writing code, AI applications involve writing prompts to instruct the LLM on what to do. LangSmith provides a set of tools designed to enable and facilitate prompt engineering to help you find the perfect prompt for your applicati

##### Retrieval chain, Document chain

In [13]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o')

In [14]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
    """" 
    Answer the folowing question based only on the provided context:
    <context>
    {context}
    </context>
    
    """
)

document_chain = create_stuff_documents_chain(llm,prompt)

In [15]:
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='" \n    Answer the folowing question based only on the provided context:\n    <context>\n    {context}\n    </context>\n\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000001B52357BAD0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001B524800850>, root_client=<openai.OpenAI object at 0x000001B52357B890>, root_async_client=<openai.AsyncOpenAI object at 0x000001B5235884D0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(

In [16]:
from langchain_core.documents import Document

document_chain.invoke({"input":"AI applications involve writing prompts to instruct ",
                       "context": [Document(page_content="AI applications involve writing prompts to instruct the LLM on what to do. LangSmith provides a set of tools designed to enable and facilitate prompt engineering to help you find the perfect prompt for your application.")]
                    })

'What does LangSmith provide to assist with AI applications?'

#### However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.
##### Input ----> Retriver ----> vectorStore DB

In [17]:
vectorStoreDB

<langchain_community.vectorstores.faiss.FAISS at 0x1b5231b8e90>

In [23]:
retriever = vectorStoreDB.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,document_chain)

In [24]:
retrieval_chain


RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001B5231B8E90>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='" \n    Answer the folowing question based only on the provided context:\n    <context>\n    {context}\n    </context>\n\n    '), additional_kwargs={

In [29]:
## get the response form the LLM
response = retrieval_chain.invoke({"input":"AI applications involve writing prompts to instruct ",
                       "context": [Document(page_content="AI applications involve writing prompts to instruct the LLM on what to do. LangSmith provides a set of tools designed to enable and facilitate prompt engineering to help you find the perfect prompt for your application.")]
                    })
## get the response form the LLM
response

{'input': 'AI applications involve writing prompts to instruct ',
 'context': [Document(id='621543ae-b978-42e1-8347-5676c3350e76', metadata={'source': 'https://docs.smith.langchain.com/?_gl=1*1x0axhw*_gcl_au*MTYzOTI2MzQ3OC4xNzU0NjU0MDE2*_ga*MjExOTk5MTA2Ni4xNzU0NjU0MDE2*_ga_47WX3HKKY2*czE3NTUzMzg1ODQkbzgkZzEkdDE3NTUzMzkwMDkkajUyJGwwJGgw', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Get started by adding tracing to your application.\nCreate dashboards to view key metrics like RPS, error rates and costs.\n\nEvals\u200b\nThe quality and development speed of AI applications depends on high-quality evaluation datasets and metrics to test and optimize your applications on. The LangSmith SDK and UI make building and running high-quality evaluations easy.\n\nGet started by creating your first evaluation.\nQuickly assess the performance of your application using 

In [30]:
response['answer']

'What benefits does the LangSmith platform offer for AI applications as per the provided context?'

In [31]:
response['context']

[Document(id='621543ae-b978-42e1-8347-5676c3350e76', metadata={'source': 'https://docs.smith.langchain.com/?_gl=1*1x0axhw*_gcl_au*MTYzOTI2MzQ3OC4xNzU0NjU0MDE2*_ga*MjExOTk5MTA2Ni4xNzU0NjU0MDE2*_ga_47WX3HKKY2*czE3NTUzMzg1ODQkbzgkZzEkdDE3NTUzMzkwMDkkajUyJGwwJGgw', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Get started by adding tracing to your application.\nCreate dashboards to view key metrics like RPS, error rates and costs.\n\nEvals\u200b\nThe quality and development speed of AI applications depends on high-quality evaluation datasets and metrics to test and optimize your applications on. The LangSmith SDK and UI make building and running high-quality evaluations easy.\n\nGet started by creating your first evaluation.\nQuickly assess the performance of your application using our off-the-shelf evaluators as a starting point.\nAnalyze results of evaluati