In [None]:
# Install required libraries for RAG and PDF processing
!pip install langchain qdrant_client langchain_huggingface langchain_community pypdf langchain_community langchain-qdrant langchain_openai
!pip install -U langchain-qdrant langchain-huggingface

In [None]:
# ----------------- Imports -----------------
from operator import itemgetter
from langchain_qdrant import QdrantVectorStore
from langchain_core import embeddings
from qdrant_client import QdrantClient
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pypdf import PdfReader  # For reading PDF documents

In [None]:
# ----------------- PDF Reading -----------------
reader = PdfReader("")  # Load PDF
print(len(reader.pages))  # Print number of pages in PDF

In [None]:
# Extract text from all pages
text = ""
for page in reader.pages:
    text += page.extract_text()

In [None]:
# ----------------- Text Splitting -----------------
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,        # Size of each text chunk
    chunk_overlap=15       # Overlap between chunks
)

chunks = text_splitter.split_text(text)
print("len of the chunks:", len(chunks))  # Number of chunks created

In [None]:
# ----------------- Embedding Model -----------------
from langchain_huggingface import HuggingFaceEmbeddings

embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")  # Sentence embeddings

# Metadata for each chunk
metadatas = [{"source": "Azeem Tech", "page": i+1} for i in range(len(chunks))]

In [None]:
# ----------------- Qdrant Vector Store -----------------
qdrant_url = ""           # Qdrant URL
qdrant_key = ""           # Qdrant API Key
collection_name = ""      # Collection name in Qdrant

In [None]:
# Store chunks in Qdrant with embeddings
qdrant = QdrantVectorStore.from_texts(
    texts=chunks,
    embedding=embed_model,
    metadatas=metadatas,
    url=qdrant_url,
    api_key=qdrant_key,
    collection_name=collection_name
)

# Retrieve Data From VectorStore


In [None]:
# ----------------- Qdrant Client -----------------
client = QdrantClient(
    url="",
    api_key=""
)
print(client.get_collections())  # Verify collections in Qdrant

In [None]:
# Re-initialize vector store for retrieval
qdrant = QdrantVectorStore(
    client=client,
    collection_name="",
    embedding=embed_model
)


In [None]:
# ----------------- Helper Function -----------------
def format_docs(docs):
    """Combine retrieved documents into single string context"""
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
# ----------------- Prompt Template -----------------
prompt_str = """
Answer the user in two lines from the following context.
context:
{context}

question:
{question}
"""
_prompt = ChatPromptTemplate.from_template(prompt_str)


In [None]:
# ----------------- Retriever Setup -----------------
num_chunks = 2  # Number of similar chunks to retrieve
retriever = qdrant.as_retriever(
    search_type="similarity",
    search_kwrags={"k": num_chunks}  # Typo in original code (should be search_kwargs)
)

In [None]:
# ----------------- LLM Setup -----------------
chat_llm = ChatOpenAI(
    model="tngtech/deepseek-r1t2-chimera:free",
    openai_api_key="sk-xxxx",  # Replace with environment variable in production
    openai_api_base="https://openrouter.ai/api/v1",
    temperature=0  # Deterministic responses
)

In [None]:
# ----------------- RAG Chain Setup -----------------
query_fetcher = itemgetter("question")  # Extract question from input

setup = {"question": query_fetcher,"context": query_fetcher | retriever | format_docs}

_chain = setup | _prompt | chat_llm  # Complete pipeline

In [None]:
# ----------------- Query & Response -----------------
query = "What is RAG?"  # Example question
response = _chain.invoke({"question": query})

print(response)  # Print AI assistant response