## Simple Gen AI Using Langchain

In [40]:
import os 
from  dotenv import load_dotenv
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

## for langsmith tracking
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT_NAME"] = "LangchainFramework"


In [41]:
### Data ingestion -- from the website we need to scrape the data 
from langchain_community.document_loaders import WebBaseLoader

In [42]:
## Load data from a website with custom headers (data ingestion)

loader = WebBaseLoader(
    "https://python.langchain.com",
    header_template={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }
)
docs = loader.load()
print(f"Number of documents: {len(docs)}")
print(docs[0].page_content[:500])  # Print the first 500 characters

Number of documents: 1
LangChain overview - Docs by LangChainSkip to main contentDocs by LangChain home pageLangChain + LangGraphSearch...⌘KSupportGitHubTry LangSmithTry LangSmithSearch...NavigationLangChain overviewLangChainLangGraphDeep AgentsIntegrationsLearnReferenceContributePythonOverviewGet startedInstallQuickstartChangelogPhilosophyCore componentsAgentsModelsMessagesToolsShort-term memoryStreamingStructured outputMiddlewareOverviewBuilt-in middlewareCustom middlewareAdvanced usageGuardrailsRuntimeContext engin


In [43]:
%pip install -U langchain langchain-community


Note: you may need to restart the kernel to use updated packages.


In [44]:
# dived the single documents into smaller chunks
# we devide into multiple chunk because LLMs have context limit 

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

texts = text_splitter.split_documents(docs)
print(f"Number of text chunks: {len(texts)}")

Number of text chunks: 7


In [45]:
texts

[Document(metadata={'source': 'https://python.langchain.com', 'title': 'LangChain overview - Docs by LangChain', 'description': 'LangChain is an open source framework with a pre-built agent architecture and integrations for any model or tool — so you can build agents that adapt as fast as the ecosystem evolves', 'language': 'en'}, page_content='LangChain overview - Docs by LangChainSkip to main contentDocs by LangChain home pageLangChain + LangGraphSearch...⌘KSupportGitHubTry LangSmithTry LangSmithSearch...NavigationLangChain overviewLangChainLangGraphDeep AgentsIntegrationsLearnReferenceContributePythonOverviewGet startedInstallQuickstartChangelogPhilosophyCore componentsAgentsModelsMessagesToolsShort-term memoryStreamingStructured outputMiddlewareOverviewBuilt-in middlewareCustom middlewareAdvanced usageGuardrailsRuntimeContext engineeringModel Context Protocol (MCP)Human-in-the-loopMulti-agentRetrievalLong-term memoryAgent developmentLangSmith StudioTestAgent Chat UIDeploy with Lang

In [46]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()

  embeddings = HuggingFaceEmbeddings()


In [47]:
from langchain_community.vectorstores import FAISS
vectorstore = FAISS.from_documents(texts, embedding=embeddings)

In [48]:
# Groq LLM
from langchain_groq import ChatGroq
import os 


llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0,
    groq_api_key=os.getenv("GROQ_API_KEY")
)


In [49]:
# Querying from a vectorstore db 

query = "LangChain is the easiest way to start building agents and applications powered by LLMs"
result = vectorstore.similarity_search_with_score(query)
result[0]

(Document(id='36c3f1e3-8bbd-4310-a20b-616d6efb01fc', metadata={'source': 'https://python.langchain.com', 'title': 'LangChain overview - Docs by LangChain', 'description': 'LangChain is an open source framework with a pre-built agent architecture and integrations for any model or tool — so you can build agents that adapt as fast as the ecosystem evolves', 'language': 'en'}, page_content='agent architecture and integrations for any model or tool — so you can build agents that adapt as fast as the ecosystem evolvesCopy pageLangChain is the easiest way to start building agents and applications powered by LLMs. With under 10 lines of code, you can connect to OpenAI, Anthropic, Google, and more. LangChain provides a pre-built agent architecture and model integrations to help you get started quickly and seamlessly incorporate LLMs into your agents and applications.'),
 np.float32(0.19588156))

In [50]:
# here again define llm for generating response

from langchain_groq import ChatGroq
import os 


llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0,
    groq_api_key=os.getenv("GROQ_API_KEY")
)


In [51]:
!pip install langchain-classic




In [52]:
import os
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate

from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains.retrieval import create_retrieval_chain 

prompt = ChatPromptTemplate.from_template(
    """Answer the following question based on the context provided.
    <context>
    {context}
    </context>
    """
)

# 3. Create Document Chain
document_chain = create_stuff_documents_chain(llm, prompt)

document_chain



RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='Answer the following question based on the context provided.\n    <context>\n    {context}\n    </context>\n    '), additional_kwargs={})])
| ChatGroq(profile={}, client=<groq.resources.chat.completions.Completions object at 0x00000212304E9C70>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000212304EA570>, model_name='llama-3.1-70b-versatile', temperature=1e-08, model_kwargs={}, groq_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [57]:
from langchain_core.documents import Document

responce  = document_chain.invoke({ 
    "input":"what is langchain?",
    "context":[ Document( page_content="LangChain is the platform for agent engineering. AI teams at Replit, Clay, Rippling, Cloudflare, Workday, and more trust LangChain’s products to engineer reliable agents." ) ]
})

print(responce)


BadRequestError: Error code: 400 - {'error': {'message': 'The model `llama-3.1-70b-versatile` has been decommissioned and is no longer supported. Please refer to https://console.groq.com/docs/deprecations for a recommendation on which model to use instead.', 'type': 'invalid_request_error', 'code': 'model_decommissioned'}}

however we want the documents to first come from the retriever we just setup . That way we can use retriever to dynamically select the most relevent documents and pass those in for given question 

In [None]:
### Retriever - A retriever takes a query and returns relevant documents from a vector storedb.

In [58]:
retriever = vectorstore.as_retriever()
from langchain_classic.chains.retrieval import create_retrieval_chain
document_chain = create_retrieval_chain(
    retriever,
    document_chain=document_chain
)

TypeError: create_retrieval_chain() got an unexpected keyword argument 'document_chain'

In [2]:
## GET THE RESPONCE FROM THE LLM 