In [None]:
# Mount Google Drive to access local files (e.g., the PDF)
from google.colab import drive
drive.mount('/content/drive')


In [None]:
# Install required libraries
!pip install langchain sentence-transformers faiss-cpu pdfplumber langchain-community langchain-ollama


In [None]:
# ---------------------- Imports ----------------------
import pdfplumber  # For PDF text extraction
from langchain_community.vectorstores import FAISS  # FAISS for vector database
from langchain.embeddings import HuggingFaceEmbeddings  # HuggingFace embeddings for sentence vectors
from langchain.chains import ConversationalRetrievalChain  # LangChain RAG Chain with memory
from langchain_ollama.llms import OllamaLLM  # Ollama LLM integration (e.g., mistral)
from langchain_core.prompts import ChatPromptTemplate  # Custom prompt templates
from langchain.memory import ConversationBufferMemory  # Conversation memory for chat history
from langchain.docstore.document import Document  # LangChain document object

import logging
logging.getLogger("pdfminer").setLevel(logging.ERROR)  # Suppress PDFMiner warnings

In [None]:
# ---------------------- Step 1: Extract text from PDF ----------------------
def extract_text_from_pdf(pdf_path):
    """Extracts full text content from all pages of a PDF."""
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text


In [None]:
# ---------------------- Step 2: Split text into paragraph-based chunks ----------------------
def split_into_documents(text):
    """
    Splits the raw text into a list of LangChain Document objects,
    each representing a paragraph.
    """
    paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
    docs = [Document(page_content=p) for p in paragraphs]
    return docs

In [None]:
# ---------------------- Step 3: Build FAISS vector index ----------------------
def build_faiss_index(docs):
    """
    Builds a FAISS vector store using HuggingFace sentence embeddings.
    """
    embedder = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(docs, embedding=embedder)
    return vectorstore

In [None]:
# ---------------------- Step 4: Create the RAG chain with custom prompt ----------------------
def create_conversational_rag_chain(vectorstore):
    """
    Creates a Conversational Retrieval-Augmented Generation (RAG) chain
    using Ollama's Mistral model and a custom prompt template.
    """
    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
    model = OllamaLLM(model="mistral")

    # Initialize memory to maintain chat history
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    # Define custom prompt template with context, history, and question
    prompt_template = ChatPromptTemplate.from_template(
        """You are a helpful AI assistant expert in Docker, Dockerfiles, and dockercompose.

Here is the conversation history:
{chat_history}

Relevant context from documents:
{context}

Now, answer the user's question as clearly and practically as possible:
Question: {question}
Answer:"""
    )

    # Create the final conversational chain
    rag_chain = ConversationalRetrievalChain.from_llm(
        llm=model,
        retriever=retriever,
        memory=memory,
        combine_docs_chain_kwargs={"prompt": prompt_template},
    )
    return rag_chain

In [None]:
# ---------------------- Step 5: Prepare PDF and build the chain ----------------------
# Path to your PDF in Google Drive
pdf_path = "/content/drive/MyDrive/LearningDocker.pdf"

# Pipeline: extract -> split -> embed -> retrieve -> generate
text = extract_text_from_pdf(pdf_path)
docs = split_into_documents(text)
vectorstore = build_faiss_index(docs)
rag_chain = create_conversational_rag_chain(vectorstore)

In [None]:
# ---------------------- Step 6: Start interactive chat loop ----------------------
print("✅ RAG Agent is ready. Type 'exit' to quit.\n")

while True:
    question = input("💬 Your question: ")
    if question.lower() in ["exit", "quit"]:
        print("👋 Goodbye!")
        break
    response = rag_chain.run(question)  
    print("🔎 Answer:\n", response, "\n")