In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("alice_in_wonderland.pdf")
docs = loader.load()
docs

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)

#splitting document and saving it in "documents" variable
documents=text_splitter.split_documents(docs)
documents


[Document(page_content="The Project Gutenberg eBook of Alice's Adventures in W onderland\nThis ebook is for the use of anyone anywhere in the United States and most other parts of the world at no cost and with\nalmost no restrictions whatsoever . You may copy it, give it away or re-use it under the terms of the Project Gutenberg\nLicense included with this ebook or online at www .gutenberg.org  (http://www .gutenberg.org) . If you are not located in the\nUnited States, you will have to check the laws of the country where you are located before using this eBook.\nTitle: Alice's Adventures in W onderland\nAuthor: Lewis Carroll\nRelease date: June 27, 2008 [eBook #1 1] Most recently updated: March 30, 2021\nLanguage: English\nCredits: Arthur DiBianca and David Widger\n_ START OF THE PROJECT GUTENBERG EBOOK ALICE'S ADVENTURES IN WONDERLAND _  [Illustration]\nAlice’ s Adventures in W onderland\nby Lewis Carroll\nTHE MILLENNIUM FULCRUM EDITION 3.0\nContents", metadata={'source': 'alice_in_wo

In [8]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

db=FAISS.from_documents(documents[:30],OpenAIEmbeddings())
db

<langchain_community.vectorstores.faiss.FAISS at 0x20c86357490>

In [10]:
query="What time is it? "
result=db.similarity_search(query)
result[0].page_content

'before her was another long passage, and the White Rabbit was still in sight, hurrying down it. There was not a moment\nto be lost: away went Alice like the wind, and was just in time to hear it say , as it turned a corner , “Oh my ears and\nwhiskers, how late it’ s getting!” She was close behind it when she turned the corner , but the Rabbit was no longer to be\nseen: she found herself in a long, low hall, which was lit up by a row of lamps hanging from the roof.'

In [12]:
from langchain_community.llms import Ollama
## Load Ollama LAMA2 LLM model
llm=Ollama(model="gemma:2b")
llm

Ollama(model='gemma:2b')

In [13]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
Answer as if you are explaining the concept to a 5-year-old. 
<context>
{context}
</context>
Question: {input}""")

#input is the question being asked
#context is the outside information that the model will use to answer the question

This create_stuff_documents_chain takes a list of documents and formats them all into a prompt, then passes that prompt to an LLM. It passes ALL documents, so you should make sure it fits within the context window the LLM you are using.

https://python.langchain.com/docs/modules/chains/


In [15]:
## Chain Introduction
## Create Stuff Document Chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=create_stuff_documents_chain(llm,prompt)

In [16]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000020C86357490>)

In [18]:
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [19]:
response=retrieval_chain.invoke({"input":"Scaled Dot-Product Attention"})

In [21]:
response['answer']

'Sure, here is the answer to the question:\n\nImagine all the kids you know the same age as you. If you were the same age as them, would you be the same person they are now? What if you were someone different?\n\nAlice was sure she was not like the other kids, so she thought she must have been changed for Mabel. She tried to figure out how she could be different like Mabel.\n\nShe started by looking at things that were familiar to her, like London and Paris. Then she looked at the geography lesson the Rabbit was giving. She was confused, but she was still curious.\n\nWhen the Rabbit took out its watch, Alice realized she had never seen a rabbit with one of these things before. She was so curious!\n\nThen she ran outside and found a rabbit. She was so happy and relieved to finally get dry again.\n\nAlice learned that the Multiplication T able is not just about numbers, but also about things that are similar to each other.'