In [62]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [80]:
from langchain_community.document_loaders import PyMuPDFLoader

file_path = "llm.pdf"
loader = PyMuPDFLoader(file_path=file_path)
docs = loader.load()

In [100]:
import os
from dotenv import load_dotenv
load_dotenv()

from langchain_qdrant import QdrantVectorStore

url = os.getenv("QDRANT_URL")
api_key=os.getenv("QDRANT_API_KEY")

vector_store = QdrantVectorStore.from_documents(
    docs,
    embeddings,
    url=url,
    prefer_grpc=True,
    api_key=api_key,
    collection_name="pdf-store"
)

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from uuid import uuid4

text_in_split = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

text_chunks = text_in_split.split_documents(docs)

uuids = [str(uuid4()) for _ in range(len(text_chunks))]
vector_store.add_documents(documents=text_chunks, ids=uuids)

In [126]:
query = "what is LLM?"
result = vector_store.similarity_search(query, k=3)

context = "\n".join([doc.page_content for doc in result])

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAI

llm = GoogleGenerativeAI(model="gemini-2.0-flash")

prompt_template = PromptTemplate(
    template="Based on the following context, answer the question:\n\n Context: {context}\n\n\Question: {query}\n\nAnswer:",
    input_variables=["context", "query"]
)

final_prompt = prompt_template.format_prompt(context=context, query=query)
response = llm.invoke(final_prompt)

print("shanGPT 0.1:", response)