## Retrieval and chain with langchain.

In [5]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('attention_is_all_you_need.pdf')
loaded_doc = loader.load()
loaded_doc

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'attention_is_all_you_need.pdf', 'total_pages': 15, 'page': 0, 'page_label': '1'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogl

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap = 200)
chunk_doc = text_splitter.split_documents(loaded_doc)
limited_doc = chunk_doc[:5]

In [13]:
import os
from dotenv import load_dotenv

load_dotenv()
os.environ['GOOGLE_API_KEY'] = os.getenv("GOOGLE_API_KEY")

In [19]:
## Vector embedding and vector store

from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-exp-03-07")
db = FAISS.from_documents(limited_doc, embeddings)



In [21]:
ollama_embeddings = OllamaEmbeddings(model= "llama3.2")
db2 = FAISS.from_documents(limited_doc, ollama_embeddings)

In [22]:
db2

<langchain_community.vectorstores.faiss.FAISS at 0x2278eafd810>

In [16]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x2278eadedd0>

In [27]:
query = "The encoder is composed of a stack of N = 6 identical layers"
retrieved_result = db.similarity_search(query)
print(retrieved_result[0].page_content)

1 Introduction
Recurrent neural networks, long short-term memory [13] and gated recurrent [7] neural networks
in particular, have been firmly established as state of the art approaches in sequence modeling and
transduction problems such as language modeling and machine translation [ 35, 2, 5]. Numerous
efforts have since continued to push the boundaries of recurrent language models and encoder-decoder
architectures [38, 24, 15].
Recurrent models typically factor computation along the symbol positions of the input and output
sequences. Aligning the positions to steps in computation time, they generate a sequence of hidden
states ht, as a function of the previous hidden state ht−1 and the input for position t. This inherently
sequential nature precludes parallelization within training examples, which becomes critical at longer
sequence lengths, as memory constraints limit batching across examples. Recent work has achieved


# Prompt -> LLM -> Chain -> Retriever -> Retrieval Chain

In [None]:
## Design Chat prompt template 
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
                                          Answer the following on the basis of the context provided. Process the information step-by-step
                                          before providing the answer.On every correct answer, get Rs 100 as tip.
                                          <context>
                                          {context}
                                          </context>
                                          
                                          Question: {input}:
                                          
                                          """)




In [None]:
## Create a llm

from langchain_google_genai import GoogleGenerativeAI
llm = GoogleGenerativeAI(model= "gemini-1.5-flash")


In [55]:
llm

GoogleGenerativeAI(model='gemini-2-flash', google_api_key=SecretStr('**********'), client=ChatGoogleGenerativeAI(model='models/gemini-2-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000002278ECB6BD0>, default_metadata=(), model_kwargs={}))

In [56]:
## Chains 

from langchain.chains.combine_documents import create_stuff_documents_chain

chain = create_stuff_documents_chain(llm, prompt)


In [57]:
chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n                                          Answer the following on the basis of the context provided. Process the information step-by-step\n                                          before providing the answer.On every correct answer, get Rs 100 as tip.\n                                          <context>\n                                          {context}\n                                          </context>\n\n                                          Question: {input}:\n\n                                          '), additional_kwargs={})])
| GoogleGenerativeAI(mode

In [58]:
## Retrievers : retrieves data from vectorstore

retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002278E9F5610>, search_kwargs={})

In [59]:
## Retrieval Chain: chain takes user inquiry, which is passed to retriever to fetch docs, which is then passed to llm to generate answer.

from langchain.chains import create_retrieval_chain

retrieval_chain = create_retrieval_chain(retriever, chain)


In [60]:
response= retrieval_chain.invoke({"input":"Scaled-Dot Product attention"})
response['answer']

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised NotFound: 404 models/gemini-2-flash is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods..


NotFound: 404 models/gemini-2-flash is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.