# Simple Generative Ai Application

In [3]:
# Import the 'os' module to interact with the operating system
import os 

# Import the 'load_dotenv' function from the 'dotenv' library
from dotenv import load_dotenv

# Load environment variables from a .env file into the program
load_dotenv()

# Set the OpenAI API key from the environment variable
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# Set the LangChain API key from the environment variable
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

# Enable LangChain Tracing v2 for tracking
os.environ["LANGCHAIN_TRACING_V2"] = "true"

# Set the project name for LangChain from the environment variable
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

# This is a polite and sometimes required way to tell the server who's making the request (like a browser or a script).
os.environ["USER_AGENT"] = os.getenv("USER_AGENT")

# Data Ingestion
from websites we need to scrap the data

In [4]:
from langchain_community.document_loaders import WebBaseLoader

In [5]:
loader=WebBaseLoader("https://docs.smith.langchain.com/administration/tutorials/manage_spend")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x25f5e971780>

# Loads the entire website content into a single document.

In [7]:
docs=loader.load()
# docs

## Load Data--->Docs---->divide our text into smaller chunks--->text---->vectors---->vector embeddings----->vector store Db

# Splits the document into smaller chunks of texts,because each LLM has different context window sizes.

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

textsplitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents=textsplitter.split_documents(docs)

In [10]:
# documents

# Creating embedding for converting text to vector

In [11]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

# Creating a vector store DB for storing the vector embeddings

In [12]:
from langchain_community.vectorstores import FAISS

vectorstoredb=FAISS.from_documents(documents, embeddings)


In [13]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x25f5240c910>

# Querying from vector storestore db

In [15]:
query="We understand what usage looks like in terms of traces"

response=vectorstoredb.similarity_search(query, k=3)

#response

In [17]:
# response[0].page_content

# 🔁 Chain – Simple Definition
A Chain is a set of connected steps that process input and give output using a language model.

🟢 It connects things like prompts, models, and output formatting in one flow.

# 📚 Retriever – Simple Definition
A Retriever is a tool that finds and returns the most relevant information from a large set of documents.

🟢 It helps the AI answer questions using real data or files.



# Creating LLM model

In [18]:
from langchain_openai import ChatOpenAI 
llm=ChatOpenAI(model="gpt-4o")

# Retrieval chain and Document chain

In [19]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>


"""
)
document_chain=create_stuff_documents_chain(llm,prompt)

document_chain


RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000025F5EA2BDF0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000025F006F8B80>, root_client=<openai.OpenAI object at 0x0000025F006C58D0>, root_async_client=<openai.AsyncOpenAI object at 0x0000025F5EA2BE20>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, confi

# Trying the document chain

In [20]:
from langchain_core.documents import Document

document_chain.invoke({
    "input": "LangSmith has two usage limits: total traces and extended",
    "context": [
        Document(page_content="LangSmith has two usage limits: total traces and extended traces. These correspond to the two metrics we've been tracking on our usage graph.")
    ]
})

'LangSmith has two usage limits: total traces and extended traces.'

# Add retrieval capability
Turns your vectorstoredb (a searchable document database) into a retriever that can find documents for any question.

In [21]:
retriever=vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [22]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000025F5240C910>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
            | 

In [25]:
## Get the response form the LLM
response=retrieval_chain.invoke({"input":"LangSmith has two usage limits: total traces and extended"})
response["answer"]

'To calculate a good "total traces" limit for production usage, follow these steps:\n\n1. **Understand Current Load**: \n   - Determine the current usage of your application.\n   - Example: An application called between 1.2-1.5 times per second logs around 100,000-130,000 traces per day.\n\n2. **Estimate Growth**:\n   - Estimate how much you expect your usage to grow.\n   - Example: Expect to double the current load in the near future.\n\n3. **Calculate Monthly Limit**:\n   - Use a back-of-the-envelope calculation for the limit.\n   - Formula: \n     \\[\n     \\text{limit} = \\text{current\\_load\\_per\\_day} \\times \\text{expected\\_growth} \\times \\text{days/month}\n     \\]\n   - Example Calculation:\n     \\[\n     \\text{limit} = 130,000 \\times 2 \\times 30 = 7,800,000 \\text{ traces / month}\n     \\]\n\nSet this monthly limit for your usage in LangSmith to manage and optimize your costs efficiently.'

In [26]:
response

{'input': 'LangSmith has two usage limits: total traces and extended',
 'context': [Document(id='cf4c5c48-8641-4981-a89d-5bf92297d4e9', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Lets start by setting limits on our production usage, since that is where the majority of spend comes from.\nSetting a good total traces limit\u200b\nPicking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should\nclearly think about your assumptions before setting a limit.\nFor example:\n\nCurrent Load: Our gen AI application is called between 1.2-1.5 times per second, and each API request has a trace associated with it,\nmeaning we log around 100,000-130,000 traces per day\nExpected Growth in Load: We expect to 

In [27]:
response["context"]

[Document(id='cf4c5c48-8641-4981-a89d-5bf92297d4e9', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Lets start by setting limits on our production usage, since that is where the majority of spend comes from.\nSetting a good total traces limit\u200b\nPicking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should\nclearly think about your assumptions before setting a limit.\nFor example:\n\nCurrent Load: Our gen AI application is called between 1.2-1.5 times per second, and each API request has a trace associated with it,\nmeaning we log around 100,000-130,000 traces per day\nExpected Growth in Load: We expect to double in size in the near future.\n\nFrom these assumptions, we can do a quick bac