#### Simple Gen AI APP Using Langchain

In [8]:
import os
from dotenv import load_dotenv
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

## Langsmith Tracking
load_dotenv()
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [36]:
## Data Ingestion--From the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader
loader=WebBaseLoader("https://python.langchain.com/docs/integrations/graphs/neo4j_cypher/")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x1eea2b2a5d0>

In [37]:
docs=loader.load()
docs

[Document(metadata={'source': 'https://python.langchain.com/docs/integrations/graphs/neo4j_cypher/', 'title': 'Neo4j | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain', 'description': 'Neo4j is a graph database management system developed by Neo4j, Inc.', 'language': 'en'}, page_content='\n\n\n\n\nNeo4j | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain\n\n\n\n\n\n\nSkip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!IntegrationsAPI ReferenceMoreContributingPeopleError referenceLangSmithLangGraphLangChain HubLangChain JS/TSv0.3v0.3v0.2v0.1\uf8ffüí¨SearchProvidersAnthropicAWSGoogleHugging FaceMicrosoftOpenAIMoreProvidersAbsoAcreomActiveloop Deep LakeAerospikeAI21 LabsAimAINetworkAirbyteAirtableAlchemyAleph AlphaAlibaba CloudAnalyticDBAnnoyAnthropicAnyscaleApache Software FoundationApache DorisApifyAppleArangoDBArceeArcGISArgillaArizeArthurArxivAscendAskNewsAssemblyAIAstra DBAtlasAwaDBAWSAZLyricsAzure AIBAAIBagelBagelDBBaichuanBaiduBananaBasetenBeamBeautiful So

In [38]:
### Load Data--> Docs-->Divide our Docuemnts into chunks dcouments-->text-->vectors-->Vector Embeddings--->Vector Store DB
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)

In [39]:
documents

[Document(metadata={'source': 'https://python.langchain.com/docs/integrations/graphs/neo4j_cypher/', 'title': 'Neo4j | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain', 'description': 'Neo4j is a graph database management system developed by Neo4j, Inc.', 'language': 'en'}, page_content='Neo4j | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain'),
 Document(metadata={'source': 'https://python.langchain.com/docs/integrations/graphs/neo4j_cypher/', 'title': 'Neo4j | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain', 'description': 'Neo4j is a graph database management system developed by Neo4j, Inc.', 'language': 'en'}, page_content='Skip to main contentJoin us at  Interrupt: The Agent AI Conference by LangChain on May 13 & 14 in San Francisco!IntegrationsAPI ReferenceMoreContributingPeopleError referenceLangSmithLangGraphLangChain HubLangChain JS/TSv0.3v0.3v0.2v0.1\uf8ffüí¨SearchProvidersAnthropicAWSGoogleHugging FaceMicrosoftOpenAIMoreProvidersAbsoAcreomActiveloop Deep LakeAerospikeAI21 LabsAimAINetworkAirbyteAirtableAlchemyAleph Alpha

In [41]:
from langchain_community.embeddings import OllamaEmbeddings
embeddings=OllamaEmbeddings(model="gemma:2b")  ##by default it ues llama2
embeddings


OllamaEmbeddings(base_url='http://localhost:11434', model='gemma:2b', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=False, headers=None, model_kwargs=None)

In [42]:
from langchain_community.vectorstores import FAISS
vectorstoredb=FAISS.from_documents(documents,embeddings)

In [44]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x1ee9ccf4390>

In [45]:
## Query From a vector db
query="its developers as an ACID-compliant transactional database"
result=vectorstoredb.similarity_search(query)
result[0].page_content

'Neo4j is a graph database management system developed by Neo4j, Inc.\n\n\nThe data elements Neo4j stores are nodes, edges connecting them, and attributes of nodes and edges. Described by its developers as an ACID-compliant transactional database with native graph storage and processing, Neo4j is available in a non-open-source "community edition" licensed with a modification of the GNU General Public License, with online backup and high availability extensions licensed under a closed-source commercial license. Neo also licenses Neo4j with these extensions under closed-source commercial terms.\n\n\nThis notebook shows how to use LLMs to provide a natural language interface to a graph database you can query with the Cypher query language.\n\n\nCypher is a declarative graph query language that allows for expressive and efficient data querying in a property graph.'

In [46]:
llm=Ollama(model="gemma:2b")

In [33]:
## Retrieval Chain, Document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>
"""
)

document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n'), additional_kwargs={})])
| Ollama(model='gemma:2b')
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [47]:
retriever=vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)


In [48]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001EE9CCF4390>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n'), additional_kwargs={})])
            | Olla

In [50]:
## Get the response form the LLM
response=retrieval_chain.invoke({"input":"its developers as an ACID-compliant transactional database"})
response['answer']

'The context does not provide any information about the answer to the question, so I cannot answer this question from the provided context.'

In [51]:

response

{'input': 'its developers as an ACID-compliant transactional database',
 'context': [Document(id='6be2c9bd-0667-4a81-8100-8eb68f38909a', metadata={'source': 'https://python.langchain.com/docs/integrations/graphs/neo4j_cypher/', 'title': 'Neo4j | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain', 'description': 'Neo4j is a graph database management system developed by Neo4j, Inc.', 'language': 'en'}, page_content='Neo4j is a graph database management system developed by Neo4j, Inc.\n\n\nThe data elements Neo4j stores are nodes, edges connecting them, and attributes of nodes and edges. Described by its developers as an ACID-compliant transactional database with native graph storage and processing, Neo4j is available in a non-open-source "community edition" licensed with a modification of the GNU General Public License, with online backup and high availability extensions licensed under a closed-source commercial license. Neo also licenses Neo4j with these extensions under closed-source commercial terms.\n

In [52]:
response['context']

[Document(id='6be2c9bd-0667-4a81-8100-8eb68f38909a', metadata={'source': 'https://python.langchain.com/docs/integrations/graphs/neo4j_cypher/', 'title': 'Neo4j | \uf8ffü¶úÔ∏è\uf8ffüîó LangChain', 'description': 'Neo4j is a graph database management system developed by Neo4j, Inc.', 'language': 'en'}, page_content='Neo4j is a graph database management system developed by Neo4j, Inc.\n\n\nThe data elements Neo4j stores are nodes, edges connecting them, and attributes of nodes and edges. Described by its developers as an ACID-compliant transactional database with native graph storage and processing, Neo4j is available in a non-open-source "community edition" licensed with a modification of the GNU General Public License, with online backup and high availability extensions licensed under a closed-source commercial license. Neo also licenses Neo4j with these extensions under closed-source commercial terms.\n\n\nThis notebook shows how to use LLMs to provide a natural language interface to a