Simple Gen AI APP Using Langchain

In [26]:
import os
from dotenv import load_dotenv
load_dotenv()


##Langsmith tracking
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]= "true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

Load data -> Docs -> Divide docs into chunks docs-> text -> vectors -> vector embedding -> VectorStoredb

In [3]:
##data ingestion from the website

from langchain_community.document_loaders import WebBaseLoader


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
loader = WebBaseLoader("https://en.wikipedia.org/wiki/Gujarat")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x2447fca6b60>

In [5]:
docs = loader.load()
docs[0].page_content

'\n\n\n\nGujarat - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact usDonate\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload file\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\n Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n\n\n1\nEtymology\n\n\n\n\n\n\n\n\n2\nHistory\n\n\n\n\nToggle History subsection\n\n\n\n\n\n2.1\nAncient history\n\n\n\n\n\n\n\n\n2.2\nMedieval history\n\n\n\n\n\n\n\n

In [9]:
##divide into chunks

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap=0)
documents = text_splitter.split_documents(docs)

 

In [10]:
from langchain_community.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(model = "gemma2:2b")

In [11]:
from langchain_community.vectorstores import FAISS
vectorstoredb = FAISS.from_documents(documents, embeddings)

In [15]:
query = " The economy of Gujarat is the fourth-largest in India"

vectorstoredb = FAISS.from_documents(documents, embeddings)
result = vectorstoredb.similarity_search(query)[0].page_content

KeyboardInterrupt: 

In [12]:
from langchain_ollama.llms import OllamaLLM
llm = OllamaLLM(model='gemma2:2b')

In [16]:
## Retrieval Chain, Document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    '''
    Answer the following question based only on the provided context:
    <context>
    <context>
    {context}
    </context>
    '''
)


document_chain = create_stuff_documents_chain(llm, prompt)
document_chain


RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template='\n    Answer the following question based only on the provided context:\n    <context>\n    <context>\n    {context}\n    </context>\n    '))])
| OllamaLLM(model='gemma2:2b')
| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})

In [17]:
from langchain_core.documents import Document

document_chain.invoke({
    "input":"The economy of Gujarat is the fourth-largest in India",
    "context":[Document(page_content="The economy of Gujarat is the fourth-largest in India,with a gross state domestic product (GSDP) of ₹16.55 trillion (equivalent to ₹19 trillion or US$220 billion in 2023) and has the country's 10th-highest GSDP per capita of ₹215,000 (US$2,600).[6] Gujarat has the highest exports of all states, accounting for around one-third of national exports.")]
})

"What is the ranking of Gujarat's GDP in India? \n"

In [19]:
## Input--> Retriever--> Vectorstoredb

retriever = vectorstoredb.as_retriever()

In [20]:
from langchain.chains import create_retrieval_chain
retriever_chain = create_retrieval_chain(retriever, document_chain)

In [21]:
retriever_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002441813FA90>), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['context'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template='\n    Answer the following question based only on the provided context:\n    <context>\n    <context>\n    {context}\n    </context>\n    '))])
            | OllamaLLM(model='gemma2:2b')
            | StrOutputParser(), config={'run_name': 'stuff_documents_chain'})
  }), config={'run_name': 'retrieval_chai

In [24]:
##get the response from the llm 
response = retriever_chain.invoke({"input":"The economy of Gujarat is the fourth-largest in India"})

response['answer']

"The provided text discusses geographic features and tides in the Indian Ocean, mentioning places like Barygaza and its connection to rivers. \n\n\nLet me know if you'd like me to extract more information from this context!  \n"

In [25]:
response['context']

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Gujarat', 'title': 'Gujarat - Wikipedia', 'language': 'en'}, page_content='^ William H. Schoff (1912), The Periplus of the Erythraean Sea: Travel and Trade in the Indian Ocean by a Merchant of the First Century (digitalized), New York, archived from the original on 24 February 2011, retrieved 1 October 2013, As a sign of these places to those approaching from the sea there are serpents, very large and black; for at the other places on this coast and around Barygaza, they are smaller, and in color bright green, running into gold\xa0... Now the whole country of India has very many rivers, and very great ebb and flow of the tides; increasing at the new moon, and at the full moon for three days, and falling off during the intervening days of the moon. But about Barygaza it is much greater, so that the bottom is suddenly seen, and now parts of the dry land are sea, and now it is dry where ships were sailing just before; and the ri