In [33]:
import os
from dotenv import load_dotenv
load_dotenv()
#LangSmith Tracking
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")


In [34]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_groq import ChatGroq
llm=ChatGroq(
    model_name="llama-3.3-70b-versatile"
)


In [35]:
##Data Ingestion-From the website we need to scrap the data
import requests
from langchain_community.document_loaders import WebBaseLoader

In [36]:
loader=WebBaseLoader("https://docs.langchain.com/langsmith/optimize-classifier")

In [37]:
docs=loader.load()
docs

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/optimize-classifier', 'title': 'Optimize a classifier - Docs by LangChain', 'language': 'en'}, page_content='Optimize a classifier - Docs by LangChainSkip to main contentDocs by LangChain home pageLangSmithSearch...⌘KAsk AIGitHubTry LangSmithTry LangSmithSearch...NavigationTutorialsOptimize a classifierGet startedObservabilityEvaluationPrompt engineeringDeploymentPlatform setupReferenceOverviewQuickstartConceptsCreate and update promptsCreate a promptManage promptsManage prompts programmaticallyPrompt template formatConfigure prompt settingsUse tools in a promptInclude multimodal content in a promptWrite your prompt with AIConnect to modelsTutorialsOptimize a classifierSync prompts with GitHubTest multi-turn conversationsOn this pageThe objectiveGetting startedSet up automationsUpdate the applicationSemantic search over examplesTutorialsOptimize a classifierCopy pageCopy pageThis tutorial walks through optimizing a cla

In [38]:
#Dividing into chunks 
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

In [39]:
documents=text_splitter.split_documents(docs)

In [40]:
documents

[Document(metadata={'source': 'https://docs.langchain.com/langsmith/optimize-classifier', 'title': 'Optimize a classifier - Docs by LangChain', 'language': 'en'}, page_content='Optimize a classifier - Docs by LangChainSkip to main contentDocs by LangChain home pageLangSmithSearch...⌘KAsk AIGitHubTry LangSmithTry LangSmithSearch...NavigationTutorialsOptimize a classifierGet startedObservabilityEvaluationPrompt engineeringDeploymentPlatform setupReferenceOverviewQuickstartConceptsCreate and update promptsCreate a promptManage promptsManage prompts programmaticallyPrompt template formatConfigure prompt settingsUse tools in a promptInclude multimodal content in a promptWrite your prompt with AIConnect to modelsTutorialsOptimize a classifierSync prompts with GitHubTest multi-turn conversationsOn this pageThe objectiveGetting startedSet up automationsUpdate the applicationSemantic search over examplesTutorialsOptimize a classifierCopy pageCopy pageThis tutorial walks through optimizing a cla

In [41]:
#Converting this text to vectors
embeddings = OllamaEmbeddings(model="mxbai-embed-large")

In [42]:
#We will use FAISS Database
from langchain_community.vectorstores import FAISS
vectorstoredb=FAISS.from_documents(documents,embeddings)

In [43]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x27f11721b70>

In [44]:
#Query from a vectorstore db
query="we will build a bot that classify GitHub issues"
result=vectorstoredb.similarity_search(query) #this will try to give you based on context all the info that is available near those vectors


In [45]:
result[0].page_content

'\u200bThe objective\nIn this example, we will build a bot that classify GitHub issues based on their title. It will take in a title and classify it into one of many different classes. Then, we will start to collect user feedback and use that to shape how this classifier performs.\n\u200bGetting started\nTo get started, we will first set it up so that we send all traces to a specific project. We can do this by setting an environment variable:\nCopyimport os\nos.environ["LANGSMITH_PROJECT"] = "classifier"'

In [50]:
#What is i need to ask a much more meaningful question
#I really need to provide context with respect to that particular question.
#we use retrieval chain for this 
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template( #the context is the information that i will be giving my LLM model regarding my document or text
    """
Answer the following question based only on the provided context:
<context> 
{context}
</context>
Question: {input}
"""
) #example say i give a particular text, now i need to get context information
#context information basically means that instead of searching the entire page, i can give a paragraph and say that go and search for this particular text in this paragraph
#so paragraph becomes my context and due to this my search will also happen quickly.
#But how do i get my paragraph?
#For this we will be using document chain




In [51]:
document_chain = prompt | llm | StrOutputParser()


In [53]:
from langchain_core.documents import Document
document_chain.invoke({
    "input":"we will build a bot that classify GitHub issues",
    "context":[Document(page_content="we will build a bot that classify GitHub issues based on their title. It will take in a title and classify it into one of many different classes.")] #Adding context manually
})

'Based on the context, the completion of the sentence is: "...based on their title." \n\nSo, the full sentence is: "We will build a bot that classify GitHub issues based on their title."'

However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.

In [54]:
#Input--->Retriever--->can be considered as an interface--->vectorstoredb
#When we create vectorstoredb we convert it to a retriever, whihc is an interface wrt any input, so i can pass the input to retriever and get the output from vectorstore

vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x27f11721b70>

In [None]:
retriever=vectorstoredb.as_retriever() #retriever already knows how to fetch context, we dont have to fill it manually


In [None]:
from langchain_classic.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain) #we dont pass context because the retriever generates it dynamically for every query

In [63]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000027F11721B70>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context> \n{context}\n</context>\nQuestion: {input}\n'), additional_kwargs={})])
            | ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 32768, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': F

In [None]:
##get the response from the llm
response=retrieval_chain.invoke({"input":"we will build a bot that classify GitHub issues"})
#Langchain automatically injects retrieved docs into {context}, injects user query into {input}

In [65]:
response['answer']

"That's correct. According to the provided context, the objective is to build a bot that classifies GitHub issues based on their title, categorizing them into one of many different classes."

In [66]:
response

{'input': 'we will build a bot that classify GitHub issues',
 'context': [Document(id='99aadc5b-b331-4d0d-a217-d8365ece5122', metadata={'source': 'https://docs.langchain.com/langsmith/optimize-classifier', 'title': 'Optimize a classifier - Docs by LangChain', 'language': 'en'}, page_content='\u200bThe objective\nIn this example, we will build a bot that classify GitHub issues based on their title. It will take in a title and classify it into one of many different classes. Then, we will start to collect user feedback and use that to shape how this classifier performs.\n\u200bGetting started\nTo get started, we will first set it up so that we send all traces to a specific project. We can do this by setting an environment variable:\nCopyimport os\nos.environ["LANGSMITH_PROJECT"] = "classifier"'),
  Document(id='009ae338-8aa8-49a6-bb5e-97c9a415e9e3', metadata={'source': 'https://docs.langchain.com/langsmith/optimize-classifier', 'title': 'Optimize a classifier - Docs by LangChain', 'langua

In [67]:
response['context'] #print context

[Document(id='99aadc5b-b331-4d0d-a217-d8365ece5122', metadata={'source': 'https://docs.langchain.com/langsmith/optimize-classifier', 'title': 'Optimize a classifier - Docs by LangChain', 'language': 'en'}, page_content='\u200bThe objective\nIn this example, we will build a bot that classify GitHub issues based on their title. It will take in a title and classify it into one of many different classes. Then, we will start to collect user feedback and use that to shape how this classifier performs.\n\u200bGetting started\nTo get started, we will first set it up so that we send all traces to a specific project. We can do this by setting an environment variable:\nCopyimport os\nos.environ["LANGSMITH_PROJECT"] = "classifier"'),
 Document(id='009ae338-8aa8-49a6-bb5e-97c9a415e9e3', metadata={'source': 'https://docs.langchain.com/langsmith/optimize-classifier', 'title': 'Optimize a classifier - Docs by LangChain', 'language': 'en'}, page_content='We can then create our initial application. This