In [3]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores.pgvector import PGVector
from langchain_community.chat_models import ChatOllama
from langchain.chains import RetrievalQA
import os
import glob

In [2]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
loader = DirectoryLoader(
    path="./Bookshelf",
    glob="**/*.pdf",
    loader_cls=PyPDFLoader
)
documents = loader.load()

In [None]:
# Populating the database
PGVector.from_documents(documents,
                        embedding=embeddings, 
                        connection_string="postgresql+psycopg2://pguser:pgpass@pgvector:5432/pgdb", 
                        collection_name='knowledge_base')

In [None]:
vectorstore = PGVector(collection_name='knowledge_base', connection_string='postgresql+psycopg2://pguser:pgpass@pgvector:5432/pgdb
', embedding_function=embeddings)

In [None]:
llm = ChatOllama(model="deepseek-r1")

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k": 25})

In [None]:
rag_chain = RetrievalQA.from_chain_type(llm)

In [None]:
def ask_question(query: str) -> str:
    return rag_chain.run(query)