### Simple Gen AI APP udsing langchain

In [1]:
import os 
from rich import print 
from dotenv import load_dotenv 
load_dotenv(dotenv_path="../.env")

True

In [3]:
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "BASIC_GENAI_APP"
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

#### DATA Ingestion --> from the website we need to scrape the data

In [5]:
from langchain_community.document_loaders import WebBaseLoader

In [6]:
loader=WebBaseLoader("https://docs.smith.langchain.com/administration/tutorials/manage_spend")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x10578b460>

In [7]:
docs=loader.load()

In [10]:
print(docs)

In [9]:
len(docs)

1

#### Work flow

load Data--> Docs--> chunks --(embedding model)--> vectors -->vector Store DB

In [14]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

chunks=text_splitter.split_documents(docs)


In [15]:
print(chunks[:2]) # first two chunks

In [17]:
len(chunks) # we have total 17 chunks

17

#### Embeddings

In [18]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()


In [21]:
from langchain_community.vectorstores import FAISS 

vector_db=FAISS.from_documents(chunks,embedding=embeddings)

In [23]:
vector_db

<langchain_community.vectorstores.faiss.FAISS at 0x11f619540>

In [24]:
query="LangSmith has tow usage limits:total traces and extended"

In [25]:
results=vector_db.similarity_search(query)

In [28]:
print(results[0].page_content)

In [30]:
from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq
llm_groq=ChatGroq(model="openai/gpt-oss-20b")  
llm_openai=ChatOpenAI(model="gpt-5")

#### Retrieval chain , Document chain

In [31]:
# create a chain for passing a list of documents to a model 
from langchain.chains.combine_documents import create_stuff_documents_chain 
from langchain_core.prompts import ChatPromptTemplate
prompt=ChatPromptTemplate.from_template(
"""
    Answer the following question based only on the provided context:
    <context>
        {context}
    </context>


"""
)

document_chain=create_stuff_documents_chain(llm_openai,prompt)



In [None]:
document_chain # chain --> prompt | llm | output parser

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n        {context}\n    </context>\n\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x138cc16c0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x138cc0b80>, root_client=<openai.OpenAI object at 0x138cc1f00>, root_async_client=<openai.AsyncOpenAI object at 0x138cc2230>, model_name='gpt-5', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_n

In [None]:
from langchain_core.documents import Document

results_chain=document_chain.invoke(

    {
        "input":"LangSmith has tow usage limits:total traces and extended",
        "context":[Document(page_content='''LangSmith has tow usage limits:total traces and extended.
                            These correspond to the two metrics we've been tracking on our usage graph. ''')]
    }
)
print(results_chain)

However, we want the documents to first come from the retriever we just set up.That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.

### Input --> Retriever ----> vectorStoreDB

we will convert vector DB into retriever , so no need to use similarity search and easy to use in chain 

In [37]:
# create a retriever 

retriever=vector_db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x11f619540>, search_kwargs={})

In [48]:
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x11f619540>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n    Answer the following question based only on the provided context:\n    <context>\n        {context}\n    </context>\n\n\n'), additional_kwargs={})])
 

In [43]:
## Get the response from the llm 
respomse_RC=retrieval_chain.invoke({"input":"LangSmith has tow usage limits:total traces and extended"})


In [51]:
print(respomse_RC['answer'])

In [53]:
print(respomse_RC)