In [16]:
from langchain_community.llms import Ollama
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough   


In [17]:
# LOAD PDF

loader = PyPDFLoader(r"c:\Users\jyoti\Downloads\Grandma's Bag of Stories by Sudha Murthy.pdf")
docs = loader.load()


In [18]:
#Split into chunks

splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=100
)
chunks = splitter.split_documents(docs)

In [19]:
#Embeddings + FAISS vector store

emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(chunks, emb)
retriever = db.as_retriever()

In [20]:
# LLM (Ollama)

llm = Ollama(model="qwen2:0.5b")

In [23]:
def combine_context(inputs):
    return f"Use the following context to answer the question.\n\nContext:\n{inputs['context']}\n\nQuestion: {inputs['question']}\nAnswer:"


In [28]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

rag_chain = (
    {"context": retriever | format_docs, 
     "question": RunnablePassthrough()}
    | llm
    | StrOutputParser()
)


In [31]:
print(rag_chain.input_schema)


<class 'langchain_core.runnables.base.RunnableParallel<context,question>Input'>


In [34]:
print(rag_chain.input_schema.schema())


{'title': 'RunnableParallel<context,question>Input', 'type': 'string'}


In [38]:
print(rag_chain)


first={
  context: VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002350D6AE390>, search_kwargs={})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
} middle=[Ollama(model='qwen2:0.5b')] last=StrOutputParser()


In [39]:
response = rag_chain.invoke("Summarize this PDF")
print(response)


ValueError: Invalid input type <class 'dict'>. Must be a PromptValue, str, or list of BaseMessages.