In [None]:
# Cell 1: Installations (Updated)
# Cell 1: Final Installation Command
!pip install -q langchain langchain-groq langchain-huggingface langchain-community chromadb pymupdf ipywidgets \
requests==2.32.4 \
opentelemetry-api==1.37.0 \
opentelemetry-sdk==1.37.0

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.8/64.8 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.5/2.5 MB[0m [31m158.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m69.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Cell 2: Imports and API Key
import os
import getpass
from google.colab import files
import ipywidgets as widgets
from IPython.display import display

# Import the core logic components
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# Prompt for the API key
os.environ["GROQ_API_KEY"] = getpass.getpass("Enter Your Groq API Key: ")
print("API Key set successfully!")

Enter Your Groq API Key: ··········
API Key set successfully!


In [None]:
# Cell 3: The RAG System Class

class RAGSystem:
    def __init__(self):
        # Use the fast Groq Llama3 model and a free HuggingFace embedding model
        self.llm = ChatGroq(model="groq/compound", temperature=0)
        self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        self.vector_store = None
        print(" RAGSystem initialized with Groq and HuggingFace Embeddings.")

    def process_pdf(self, pdf_path: str):
        """Loads, splits, and creates a vector store from the PDF."""
        print(f" Processing PDF: {pdf_path}...")
        loader = PyMuPDFLoader(pdf_path)
        documents = loader.load()
        chunks = self.text_splitter.split_documents(documents)
        self.vector_store = Chroma.from_documents(documents=chunks, embedding=self.embeddings)
        print(" PDF processed and vector store created successfully!")

    def get_structured_summary(self):
        """Identifies topics and provides a summary for each."""
        if not self.vector_store:
            return "Error: No PDF has been processed yet."

        topic_prompt = PromptTemplate.from_template(
            "Based on the provided text, identify and list the main topics or chapters. List them as a comma-separated list.\n\nText:\n{context}"
        )
        retriever = self.vector_store.as_retriever(search_kwargs={'k': 15})
        all_chunks = retriever.get_relevant_documents("all content")
        context_text = "\n\n---\n\n".join([doc.page_content for doc in all_chunks])
        topic_chain = topic_prompt | self.llm
        topic_list_str = topic_chain.invoke({"context": context_text}).content
        topics = [topic.strip() for topic in topic_list_str.split(',')]

        summaries = {}
        for topic in topics:
            summary_prompt = PromptTemplate.from_template(
                "Provide a detailed summary of '{topic}' using the provided text. Cover all key points.\n\nContext:\n{context}"
            )
            topic_retriever = self.vector_store.as_retriever(search_kwargs={'k': 5})
            document_chain = create_stuff_documents_chain(self.llm, summary_prompt)
            retrieval_chain = create_retrieval_chain(topic_retriever, document_chain)
            response = retrieval_chain.invoke({"input": topic, "topic": topic})
            summaries[topic] = response['answer']

        return {"topics": topics, "summaries": summaries}

    def answer_question(self, question: str):
        """Answers a question based on the textbook's exact methodology."""
        if not self.vector_store:
            return "Error: No PDF has been processed yet."

        question_prompt = PromptTemplate.from_template("""
            Answer the user's question by following the exact methodology, formulas, and step-by-step reasoning found ONLY in the provided text context. Do not use any external knowledge. If the context does not contain the answer, state that.

            CONTEXT: {context}
            QUESTION: {input}
            ANSWER:
            """
        )
        retriever = self.vector_store.as_retriever()
        document_chain = create_stuff_documents_chain(self.llm, question_prompt)
        retrieval_chain = create_retrieval_chain(retriever, document_chain)
        response = retrieval_chain.invoke({"input": question})
        return response['answer']

In [None]:
# Cell 4: Upload and Process PDF

# 1. Upload the file
print("Please upload your PDF file:")
uploaded = files.upload()

# Check if a file was uploaded
if not uploaded:
    print("\nNo file uploaded. Please run the cell again to upload a file.")
else:
    # 2. Get the filename and process it
    pdf_filename = next(iter(uploaded))

    # Instantiate the system
    rag_system = RAGSystem()

    # Process the PDF
    rag_system.process_pdf(pdf_filename)

Please upload your PDF file:


Saving Unit 1.pdf to Unit 1 (6).pdf
 RAGSystem initialized with Groq and HuggingFace Embeddings.
 Processing PDF: Unit 1 (6).pdf...
 PDF processed and vector store created successfully!


In [None]:
# Cell 5: Interactive UI

# --- Create UI Widgets ---
summarize_button = widgets.Button(description="Generate Summary", button_style='success')
summary_output = widgets.Output()

question_input = widgets.Textarea(placeholder='Enter your question from the textbook here...')
ask_button = widgets.Button(description="Ask Question", button_style='primary')
answer_output = widgets.Output()

# --- Define Button Click Logic ---
def on_summarize_clicked(b):
    with summary_output:
        summary_output.clear_output()
        print(" Generating summary...")
        summary_data = rag_system.get_structured_summary()
        summary_output.clear_output()
        print("--- Topics ---")
        for topic in summary_data['topics']:
            print(f"- {topic}")
        print("\n--- Summaries ---")
        for topic, summary in summary_data['summaries'].items():
            print(f"\n {topic.upper()}")
            print(summary)

def on_ask_clicked(b):
    with answer_output:
        answer_output.clear_output()
        print(" Thinking...")
        answer = rag_system.answer_question(question_input.value)
        answer_output.clear_output()
        print(" Answer:")
        print(answer)

# --- Link Logic to Buttons ---
summarize_button.on_click(on_summarize_clicked)
ask_button.on_click(on_ask_clicked)

# --- Display the UI ---
print("--- Step 1: Get a structured summary of your document ---")
display(summarize_button, summary_output)

print("\n--- Step 2: Ask a specific question ---")
display(question_input, ask_button, answer_output)

--- Step 1: Get a structured summary of your document ---


Button(button_style='success', description='Generate Summary', style=ButtonStyle())

Output()


--- Step 2: Ask a specific question ---


Textarea(value='', placeholder='Enter your question from the textbook here...')

Button(button_style='primary', description='Ask Question', style=ButtonStyle())

Output()