# Simple GenAi APP using OpenAi

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')

In [9]:
## Data ingestion---> from the website and scrape the data

from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader('https://docs.smith.langchain.com/tutorials/Administrators/manage_spend')
loader 

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x1fa4364fc10>

In [10]:
docs = loader.load()

docs 

[Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='\n\n\n\n\nOptimize tracing spend on LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\n\nSkip to main contentGo to API DocsSearchRegionUSEUGo to AppQuick startTutorialsAdministratorsOptimize tracing spend on LangSmithDevelopersHow-to guidesConceptsReferencePricingSelf-hostingLangGraph CloudTutorialsAdministratorsOptimize tracing spend on LangSmithOn this pageOptimize tracing spend on LangSmithRecommended ReadingBefore diving into this content, it might be helpful to read the following:Data Retention Conceptual DocsUsage Limiting Conceptual DocsnoteSome of the features mentioned in this guide are not currently available in Enterprise plan due to its\ncustom nature of billing. If you are on Enterprise plan a

In [11]:
# as this is a huge data. so, if the number of tokens is beyond the context length of llm model then  we need
# to divide the entire data into small chunks of text

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000 , chunk_overlap = 200)

documents = text_splitter.split_documents(docs)

documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith'),
 Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Skip to main contentGo to API DocsSearchRegionUSEUGo to AppQuick startTutorialsAdministratorsOptimize tracing spend on LangSmithDevelopersHow-to guidesConceptsReferencePricingSelf-hostingLangGraph CloudTutorialsAdministratorsOptimize tracing spend on LangSmithOn this pageOptimize tracing spend on LangSmithRecommended ReadingBefore diving into this co

In [12]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = OpenAIEmbeddings() #initialiing the openai embedings
vector_store_db = FAISS.from_documents(documents, embeddings)
# converting all the documents into vectors and storing them in a vector storedb FAISS



In [13]:
vector_store_db

<langchain_community.vectorstores.faiss.FAISS at 0x1fa5e14e920>

In [30]:
## querying a vector storedb

query = "LangSmith has two usage limits: total traces and extende"

result = vector_store_db.similarity_search(query)

print(result)

result[0].page_content

[Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='That\'s a cost reduction of nearly 75% per day!Optimization 2: limit usage\u200bIn the previous section, we managed data retention settings to optimize existing spend. In this section, we will\nuse usage limits to prevent future overspend.LangSmith has two usage limits: total traces and extended retention traces. These correspond to the two metrics we\'ve\nbeen tracking on our usage graph. We can use these in tandem to have granular control over spend.To set limits, we navigate back to Settings -> Usage and Billing -> Usage configuration. There is a table at the\nbottom of the page that lets you set usage limits per workspace. For each workspace, the two limits appear, along\nwith a cost estimat

'That\'s a cost reduction of nearly 75% per day!Optimization 2: limit usage\u200bIn the previous section, we managed data retention settings to optimize existing spend. In this section, we will\nuse usage limits to prevent future overspend.LangSmith has two usage limits: total traces and extended retention traces. These correspond to the two metrics we\'ve\nbeen tracking on our usage graph. We can use these in tandem to have granular control over spend.To set limits, we navigate back to Settings -> Usage and Billing -> Usage configuration. There is a table at the\nbottom of the page that lets you set usage limits per workspace. For each workspace, the two limits appear, along\nwith a cost estimate:Lets start by setting limits on our production usage, since that is where the majority of spend comes from.Setting a good total traces limit\u200bPicking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should'

You use `create_stuff_documents_chain` in LangChain to simplify the process of feeding multiple documents to a language model (LLM). This is especially useful when you want the LLM to answer questions based on information spread across several documents.

**`ChatPromptTemplate.from_template`**

* Focus: This method emphasizes creating prompts with a single template string that includes placeholders for dynamic input.
* Structure: You provide a template string with placeholders (e.g., {question}, {context}). When you use the template, you provide values for these placeholders to generate the complete prompt.
* Example:

template = "Answer the question based on this context: {context}\nQuestion: {question}"

prompt = ChatPromptTemplate.from_template(template) 


**`ChatPromptTemplate.from_messages`**

* Focus: This method focuses on constructing prompts as a sequence of messages, each with a specific role (e.g., "system", "user", "assistant").
* Structure: You provide a list of messages, where each message is a tuple containing the role and the content. This allows you to create more structured and conversational prompts.
* Example:

messages = [
    ("system", "You are a helpful AI assistant."),
    ("user", "{user_input}")
]

prompt = ChatPromptTemplate.from_messages(messages)
          
from_template Use cases: Single-turn questions, simple instructions	
            
from_messages uses_cases: Chatbots, multi-turn dialogues, complex scenarios



LangChain document chains are a powerful tool for working with large language models (LLMs) and text data. They provide a structured way to process documents and extract valuable information, enabling you to build more sophisticated and capable LLM applications.

**Purpose of Document Chains:**

* **Efficiently process large texts:** LLMs have limitations on the amount of text they can handle at once (context window). Document chains break down large texts into smaller chunks, making them manageable for LLMs.
* **Answer questions over documents:** They allow you to ask questions about the information contained within your documents and get accurate answers from the LLM.
* **Summarize documents:**  Condense lengthy documents into concise summaries, extracting the most important information.
* **Extract key information:**  Identify and extract specific details or insights from your documents.

**Common Use Cases:**

* **Question Answering:** Build systems that can answer questions based on a collection of documents, like a company's internal knowledge base or a set of research papers.
* **Document Summarization:**  Automatically generate summaries of long articles, reports, or legal documents.
* **Information Extraction:**  Extract key data from documents, such as names, dates, or product specifications.
* **Chatbots:** Create more informative chatbots that can access and retrieve information from a knowledge base to answer user questions.
* **Content Creation:** Generate new content based on existing documents, such as creating marketing materials from product descriptions.

**Types of Document Chains:**

LangChain offers several types of document chains, each designed for different tasks and approaches:

* **Stuff Documents Chain:** Combines all documents into a single prompt and sends it to the LLM. Useful for smaller sets of documents.
* **Map Reduce Documents Chain:** Processes documents in smaller chunks and combines the results. Suitable for larger datasets.
* **Refine Documents Chain:** Iteratively refines the answer by processing one document at a time. Useful for complex questions and analysis.
* **Map Re-rank Documents Chain:**  Ranks the documents based on their relevance to the question and returns the best answer.


**Example:**

Imagine you have a collection of customer reviews. You can use a document chain to:

1. **Analyze sentiment:** Determine the overall sentiment (positive, negative, neutral) expressed in the reviews.
2. **Identify key themes:**  Extract common topics and issues discussed in the reviews.
3. **Summarize feedback:** Generate a concise summary of the customer feedback.

**Key Benefits:**

* **Improved accuracy:**  By providing relevant context, document chains help LLMs generate more accurate responses.
* **Scalability:** They enable you to work with large volumes of text data.
* **Modularity:** You can easily combine different chains and components to build complex workflows.

By understanding and utilizing document chains effectively, you can unlock the full potential of LLMs for various document-related tasks and build more powerful and insightful applications.


In [19]:
## Document Chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model = 'gpt-4o')
prompt = ChatPromptTemplate.from_template(
    """
Answer the following question baased only on the provided context:
<context>
{context}
</context>

"""
)

document_chain = create_stuff_documents_chain(llm,prompt)

document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question baased only on the provided context:\n<context>\n{context}\n</context>\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001FA82375180>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001FA823EC220>, root_client=<openai.OpenAI object at 0x000001FA82376680>, root_async_client=<openai.AsyncOpenAI object at 0x000001FA82375450>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_doc

In [23]:
from langchain_core.documents import Document
document_chain.invoke({
    "input": "LangSmith has two usage limits: total traces and extended",
    "context":[Document(page_content="LangSmith has two usage limits: total traces and extended retention traces. These correspond to the two metrics we've been tracking on our usage graph")]
})

'LangSmith has two usage limits: total traces and extended retention traces.'

However, we want the documents to first come from the retriver we jsut set up. That way, we can use the retriver to dynamically select the most relevant documetns and pass those in for a given question

In the context of LangChain and LLMs, a **retriever** is a component that finds and retrieves relevant information from a knowledge base or a collection of documents. It acts like a search engine within your LLM application, helping you locate the specific pieces of information needed to answer a user's query.

**Purpose of a Retriever:**

* **Overcome context limitations:** LLMs have a limited context window, meaning they can't process massive amounts of data at once. Retrievers help by selecting the most relevant portions of your knowledge base, allowing the LLM to focus on the information that's most likely to be useful.
* **Improve accuracy:** By providing targeted information, retrievers increase the chances that the LLM will generate accurate and relevant responses.
* **Enable dynamic knowledge updates:**  You can easily update your knowledge base without retraining the LLM. The retriever will simply fetch the updated information when needed.

**Types of Retrievers:**

LangChain offers various types of retrievers, each with its own strengths and weaknesses:

* **Vector database retrievers:** These retrievers use vector embeddings to represent documents and queries as numerical vectors. This allows them to find semantically similar documents even if they don't share exact keywords. (e.g., FAISS, Pinecone)
* **Text-based retrievers:** These rely on keyword matching and other text-based techniques to find relevant documents. (e.g., TF-IDF)
* **Hybrid retrievers:** These combine different retrieval methods to leverage their strengths.

**What is a Retriever Chain?**

A retriever chain in LangChain combines a retriever with other components to form a complete question-answering system. Here's a typical flow:

1. **Receive a user query.**
2. **Use the retriever to find relevant documents.**
3. **Feed the retrieved documents and the query to an LLM chain.** (e.g., `StuffDocumentsChain`, `MapReduceDocumentsChain`)
4. **The LLM chain processes the information and generates a response.**

**Example:**

Imagine you have a large collection of documents about different dog breeds. A user asks, "What are the characteristics of a Golden Retriever?"

1. A retriever chain would use a retriever to find documents specifically about Golden Retrievers.
2. These documents would be passed to an LLM chain along with the original question.
3. The LLM chain would extract relevant information from the documents and generate a response describing the characteristics of Golden Retrievers.

**Key benefits of using retriever chains:**

* **Efficiency:** They focus the LLM on the most relevant information, reducing processing time and improving response speed.
* **Accuracy:** By providing targeted context, they help the LLM generate more accurate and informative answers.
* **Scalability:** They allow you to work with large knowledge bases without overwhelming the LLM's context window.

By incorporating retrievers and retriever chains into your LangChain applications, you can build more powerful and efficient question-answering systems that can effectively leverage vast amounts of information.


In [27]:
# how to create a retriver
# to create a retriver first we should have vector stroedb
# then using as_retriver() method we can convert vector_store_db into retriever

retriver = vector_store_db.as_retriever()

from langchain.chains import create_retrieval_chain

retrieval_chain =create_retrieval_chain(retriver, document_chain)


retrieval_chain 

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001FA5E14E920>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question baased only on the provided context:\n<context>\n{context}\n</context>\n\n'), additional_kwargs={})])
            | C

In [28]:
# get the response from llm

response =  retrieval_chain.invoke(
    {
        "input": "Langsmith has two usage limits: total traces and extended",
    }
)

response

{'input': 'Langsmith has two usage limits: total traces and extended',
 'context': [Document(metadata={'source': 'https://docs.smith.langchain.com/tutorials/Administrators/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='That\'s a cost reduction of nearly 75% per day!Optimization 2: limit usage\u200bIn the previous section, we managed data retention settings to optimize existing spend. In this section, we will\nuse usage limits to prevent future overspend.LangSmith has two usage limits: total traces and extended retention traces. These correspond to the two metrics we\'ve\nbeen tracking on our usage graph. We can use these in tandem to have granular control over spend.To set limits, we navigate back to Settings -> Usage and Billing -> Usage configuration. There is a table at the\nbottom of the page that lets you set usage limits p

In [29]:
response['answer']

'What are the two usage limits that LangSmith provides for controlling spend?\n\nLangSmith provides two usage limits for controlling spend: total traces and extended retention traces.'