In [1]:
!pip install langchain langchain-google-genai langgraph PyPDF2 python-dotenv
!pip install langchain-community



In [16]:
import os
import PyPDF2
import json
import io
from typing import List, Dict, Any, Optional
from google.colab import userdata, files
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
from langgraph.graph import StateGraph, END
from typing_extensions import TypedDict

# Add your Gemini API key to Colab secrets with name 'GEMINI_API_KEY'
try:
    os.environ["GOOGLE_API_KEY"] = "AIzaSyA7AYprgkC_DG31Fxjt7vY6Z1J8M2W1KwU"
    print("✅ API key loaded successfully")
except Exception as e:
    print("❌ Please add your Gemini API key to Colab secrets with name 'GEMINI_API_KEY'")
    print("Go to the key icon on the left sidebar and add your key")

# Initialize Gemini model
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.3,
    max_tokens=3000
)


✅ API key loaded successfully


In [17]:
class PDFQuizState(TypedDict):
    """State schema for PDF quiz generation workflow"""
    pdf_content: str
    summary: str
    quiz_questions: List[Dict[str, Any]]
    current_step: str
    error_message: Optional[str]
    file_name: str

In [18]:
class PDFProcessor:
    """Handles PDF file processing and content extraction"""

    @staticmethod
    def upload_pdf() -> tuple[str, str]:
        """Upload PDF file in Colab and return path and filename"""
        print("📁 Please upload your PDF file:")
        uploaded = files.upload()

        if not uploaded:
            raise ValueError("No file uploaded")

        filename = list(uploaded.keys())[0]
        if not filename.lower().endswith('.pdf'):
            raise ValueError("Please upload a PDF file")

        print(f"✅ Uploaded: {filename}")
        return filename, filename

    @staticmethod
    def extract_pdf_content(pdf_path: str) -> str:
        """Extract text content from PDF file"""
        try:
            with open(pdf_path, "rb") as file:
                reader = PyPDF2.PdfReader(file)
                content = ""

                print(f"📖 Processing {len(reader.pages)} pages...")

                for page_num, page in enumerate(reader.pages, 1):
                    page_text = page.extract_text()
                    if page_text.strip():  # Only add non-empty pages
                        content += f"\n--- Page {page_num} ---\n"
                        content += page_text + "\n"

                if not content.strip():
                    raise ValueError("No text content found in PDF")

                print(f"✅ Extracted {len(content)} characters from PDF")
                return content.strip()

        except Exception as e:
            raise Exception(f"Error extracting PDF content: {str(e)}")

In [10]:
def load_pdf_content(state: PDFQuizState) -> PDFQuizState:
    """Node to load and process PDF content"""
    try:
        # If pdf_content is already loaded, skip this step
        if state.get("pdf_content"):
            state["current_step"] = "content_loaded"
            return state

        # This would be called with pre-loaded content
        state["current_step"] = "content_loaded"
        return state
    except Exception as e:
        state["error_message"] = f"PDF loading error: {str(e)}"
        state["current_step"] = "error"
        return state

def summarize_pdf_content(state: PDFQuizState) -> PDFQuizState:
    """Node to summarize PDF content into key points"""

    summary_prompt = PromptTemplate(
        template="""
        You are an expert educational content summarizer analyzing a PDF document.

        Please analyze the following content from a PDF and create a comprehensive summary:

        PDF Content:
        {content}

        Instructions:
        - Create 6-10 clear, concise bullet points that capture the most important concepts
        - Focus on key facts, definitions, processes, and main ideas
        - Each bullet point should be specific and informative
        - Organize points logically from general to specific concepts
        - Use educational language appropriate for quiz generation
        - Ignore any formatting artifacts or page numbers

        Provide only the bullet-point summary:
        """,
        input_variables=["content"]
    )

    try:
        print("🔄 Generating summary from PDF content...")

        # Truncate content if too long (Gemini has token limits)
        content = state["pdf_content"]
        if len(content) > 15000:  # Approximate token limit consideration
            print("⚠️  PDF content is long, using first 15000 characters")
            content = content[:15000] + "... [content truncated]"

        prompt_text = summary_prompt.format(content=content)
        response = llm.invoke([HumanMessage(content=prompt_text)])

        state["summary"] = response.content
        state["current_step"] = "summarization_complete"
        print("✅ Summary generated successfully")
        return state

    except Exception as e:
        state["error_message"] = f"Summarization error: {str(e)}"
        state["current_step"] = "error"
        return state

def generate_pdf_quiz(state: PDFQuizState) -> PDFQuizState:
    """Node to generate quiz questions from PDF summary"""

    quiz_prompt = PromptTemplate(
        template="""
        You are an expert quiz creator. Based on the following summary from a PDF document,
        create exactly 6 high-quality multiple-choice questions.

        Summary from PDF:
        {summary}

        Requirements:
        - Create exactly 6 questions that test understanding of key concepts
        - Each question must have exactly 4 options (a, b, c, d)
        - Only one correct answer per question
        - Make incorrect options plausible but clearly wrong
        - Questions should test comprehension, not just memorization
        - Include a brief explanation for each correct answer

        Format your response as valid JSON with this exact structure:
        {{
            "questions": [
                {{
                    "question": "Clear, specific question text?",
                    "options": {{
                        "a": "First option text",
                        "b": "Second option text",
                        "c": "Third option text",
                        "d": "Fourth option text"
                    }},
                    "correct_answer": "a",
                    "explanation": "Clear explanation of why this answer is correct"
                }}
            ]
        }}

        Generate the 6 quiz questions now:
        """,
        input_variables=["summary"]
    )

    try:
        print("🔄 Generating quiz questions...")

        prompt_text = quiz_prompt.format(summary=state["summary"])
        response = llm.invoke([HumanMessage(content=prompt_text)])

        # Clean the response content to ensure valid JSON
        response_content = response.content.strip()

        # Remove markdown code blocks if present
        if response_content.startswith("```json"):
            response_content = response_content[7:]
        if response_content.endswith("```"):
            response_content = response_content[:-3]

        # Parse JSON response
        quiz_data = json.loads(response_content.strip())

        if "questions" not in quiz_data or len(quiz_data["questions"]) == 0:
            raise ValueError("No questions generated in response")

        state["quiz_questions"] = quiz_data["questions"]
        state["current_step"] = "quiz_complete"
        print(f"✅ Generated {len(quiz_data['questions'])} quiz questions")
        return state

    except json.JSONDecodeError as e:
        state["error_message"] = f"JSON parsing error: {str(e)}"
        state["current_step"] = "error"
        return state
    except Exception as e:
        state["error_message"] = f"Quiz generation error: {str(e)}"
        state["current_step"] = "error"
        return state

# Step 7: Create LangGraph Workflow
def create_pdf_quiz_workflow():
    """Create the PDF quiz generation workflow using LangGraph"""

    workflow = StateGraph(PDFQuizState)

    # Add processing nodes
    workflow.add_node("load_content", load_pdf_content)
    workflow.add_node("summarize", summarize_pdf_content)
    workflow.add_node("generate_quiz", generate_pdf_quiz)

    # Define the workflow path
    workflow.set_entry_point("load_content")
    workflow.add_edge("load_content", "summarize")
    workflow.add_edge("summarize", "generate_quiz")
    workflow.add_edge("generate_quiz", END)

    # Compile the workflow
    return workflow.compile()

In [19]:
def load_pdf_content(state: PDFQuizState) -> PDFQuizState:
    """Node to load and process PDF content"""
    try:
        # If pdf_content is already loaded, skip this step
        if state.get("pdf_content"):
            state["current_step"] = "content_loaded"
            return state

        # This would be called with pre-loaded content
        state["current_step"] = "content_loaded"
        return state
    except Exception as e:
        state["error_message"] = f"PDF loading error: {str(e)}"
        state["current_step"] = "error"
        return state

def summarize_pdf_content(state: PDFQuizState) -> PDFQuizState:
    """Node to summarize PDF content into key points"""

    summary_prompt = PromptTemplate(
        template="""
        You are an expert educational content summarizer analyzing a PDF document.

        Please analyze the following content from a PDF and create a comprehensive summary:

        PDF Content:
        {content}

        Instructions:
        - Create 6-10 clear, concise bullet points that capture the most important concepts
        - Focus on key facts, definitions, processes, and main ideas
        - Each bullet point should be specific and informative
        - Organize points logically from general to specific concepts
        - Use educational language appropriate for quiz generation
        - Ignore any formatting artifacts or page numbers

        Provide only the bullet-point summary:
        """,
        input_variables=["content"]
    )

    try:
        print("🔄 Generating summary from PDF content...")

        # Truncate content if too long (Gemini has token limits)
        content = state["pdf_content"]
        if len(content) > 15000:  # Approximate token limit consideration
            print("⚠️  PDF content is long, using first 15000 characters")
            content = content[:15000] + "... [content truncated]"

        prompt_text = summary_prompt.format(content=content)
        response = llm.invoke([HumanMessage(content=prompt_text)])

        state["summary"] = response.content
        state["current_step"] = "summarization_complete"
        print("✅ Summary generated successfully")
        return state

    except Exception as e:
        state["error_message"] = f"Summarization error: {str(e)}"
        state["current_step"] = "error"
        return state

def generate_pdf_quiz(state: PDFQuizState) -> PDFQuizState:
    """Node to generate quiz questions from PDF summary"""

    quiz_prompt = PromptTemplate(
        template="""
        You are an expert quiz creator. Based on the following summary from a PDF document,
        create exactly 6 high-quality multiple-choice questions.

        Summary from PDF:
        {summary}

        Requirements:
        - Create exactly 6 questions that test understanding of key concepts
        - Each question must have exactly 4 options (a, b, c, d)
        - Only one correct answer per question
        - Make incorrect options plausible but clearly wrong
        - Questions should test comprehension, not just memorization
        - Include a brief explanation for each correct answer

        Format your response as valid JSON with this exact structure:
        {{
            "questions": [
                {{
                    "question": "Clear, specific question text?",
                    "options": {{
                        "a": "First option text",
                        "b": "Second option text",
                        "c": "Third option text",
                        "d": "Fourth option text"
                    }},
                    "correct_answer": "a",
                    "explanation": "Clear explanation of why this answer is correct"
                }}
            ]
        }}

        Generate the 6 quiz questions now:
        """,
        input_variables=["summary"]
    )

    try:
        print("🔄 Generating quiz questions...")

        prompt_text = quiz_prompt.format(summary=state["summary"])
        response = llm.invoke([HumanMessage(content=prompt_text)])

        # Clean the response content to ensure valid JSON
        response_content = response.content.strip()

        # Remove markdown code blocks if present
        if response_content.startswith("```json"):
            response_content = response_content[7:]
        if response_content.endswith("```"):
            response_content = response_content[:-3]

        # Parse JSON response
        quiz_data = json.loads(response_content.strip())

        if "questions" not in quiz_data or len(quiz_data["questions"]) == 0:
            raise ValueError("No questions generated in response")

        state["quiz_questions"] = quiz_data["questions"]
        state["current_step"] = "quiz_complete"
        print(f"✅ Generated {len(quiz_data['questions'])} quiz questions")
        return state

    except json.JSONDecodeError as e:
        state["error_message"] = f"JSON parsing error: {str(e)}"
        state["current_step"] = "error"
        return state
    except Exception as e:
        state["error_message"] = f"Quiz generation error: {str(e)}"
        state["current_step"] = "error"
        return state

In [20]:
# Step 7: Create LangGraph Workflow
def create_pdf_quiz_workflow():
    """Create the PDF quiz generation workflow using LangGraph"""

    workflow = StateGraph(PDFQuizState)

    # Add processing nodes
    workflow.add_node("load_content", load_pdf_content)
    workflow.add_node("summarize", summarize_pdf_content)
    workflow.add_node("generate_quiz", generate_pdf_quiz)

    # Define the workflow path
    workflow.set_entry_point("load_content")
    workflow.add_edge("load_content", "summarize")
    workflow.add_edge("summarize", "generate_quiz")
    workflow.add_edge("generate_quiz", END)

    # Compile the workflow
    return workflow.compile()

In [21]:
class PDFQuizGenerator:
    """Main class for generating quizzes from PDF files"""

    def __init__(self):
        self.workflow = create_pdf_quiz_workflow()
        self.processor = PDFProcessor()

    def generate_quiz_from_upload(self) -> Dict[str, Any]:
        """Upload PDF and generate quiz"""
        try:
            # Upload PDF file
            filename, filepath = self.processor.upload_pdf()

            # Extract content
            content = self.processor.extract_pdf_content(filepath)

            # Process through workflow
            return self._process_pdf_content(content, filename)

        except Exception as e:
            return {
                "success": False,
                "summary": "",
                "quiz_questions": [],
                "error": str(e),
                "file_name": ""
            }

    def generate_quiz_from_path(self, pdf_path: str) -> Dict[str, Any]:
        """Generate quiz from existing PDF path"""
        try:
            filename = pdf_path.split('/')[-1]
            content = self.processor.extract_pdf_content(pdf_path)
            return self._process_pdf_content(content, filename)

        except Exception as e:
            return {
                "success": False,
                "summary": "",
                "quiz_questions": [],
                "error": str(e),
                "file_name": pdf_path.split('/')[-1] if '/' in pdf_path else pdf_path
            }

    def _process_pdf_content(self, content: str, filename: str) -> Dict[str, Any]:
        """Process PDF content through the workflow"""

        # Initialize state
        initial_state = {
            "pdf_content": content,
            "summary": "",
            "quiz_questions": [],
            "current_step": "initialized",
            "error_message": None,
            "file_name": filename
        }

        # Run workflow
        try:
            print(f"🚀 Processing {filename} through workflow...")
            result = self.workflow.invoke(initial_state)

            if result.get("error_message"):
                return {
                    "success": False,
                    "summary": result.get("summary", ""),
                    "quiz_questions": result.get("quiz_questions", []),
                    "error": result["error_message"],
                    "file_name": filename
                }

            return {
                "success": True,
                "summary": result["summary"],
                "quiz_questions": result["quiz_questions"],
                "error": "",
                "file_name": filename
            }

        except Exception as e:
            return {
                "success": False,
                "summary": "",
                "quiz_questions": [],
                "error": str(e),
                "file_name": filename
            }

    def display_results(self, result: Dict[str, Any]):
        """Display quiz results in formatted way"""
        print("\n" + "=" * 60)
        print(f"📄 PDF QUIZ GENERATOR RESULTS")
        print(f"📁 File: {result['file_name']}")
        print("=" * 60)

        if not result["success"]:
            print(f"❌ Error: {result['error']}")
            return

        # Display summary
        print("\n📋 CONTENT SUMMARY:")
        print("-" * 40)
        print(result["summary"])

        # Display quiz questions
        print("\n" + "=" * 60)
        print("❓ QUIZ QUESTIONS")
        print("=" * 60)

        for i, question in enumerate(result["quiz_questions"], 1):
            print(f"\nQuestion {i}: {question['question']}")
            print()
            for option_key, option_text in question["options"].items():
                marker = "✓" if option_key == question["correct_answer"] else " "
                print(f"   {option_key}) {option_text} {marker}")

            print(f"\n   💡 Explanation: {question['explanation']}")
            print("-" * 50)

        print(f"\n✅ Generated {len(result['quiz_questions'])} questions from PDF")


In [22]:
def run_interactive_quiz(quiz_questions: List[Dict[str, Any]], file_name: str = ""):
    """Run an interactive quiz session"""
    if not quiz_questions:
        print("❌ No quiz questions available")
        return

    score = 0
    total_questions = len(quiz_questions)

    print("\n" + "=" * 60)
    print(f"🎯 INTERACTIVE QUIZ MODE")
    if file_name:
        print(f"📁 Source: {file_name}")
    print("=" * 60)
    print("Instructions: Type a, b, c, or d for each question")
    print("-" * 60)

    for i, question in enumerate(quiz_questions, 1):
        print(f"\nQuestion {i}/{total_questions}: {question['question']}")
        print()

        for option_key, option_text in question["options"].items():
            print(f"   {option_key}) {option_text}")

        # Get user input
        while True:
            user_answer = input(f"\nYour answer (a/b/c/d): ").lower().strip()
            if user_answer in ['a', 'b', 'c', 'd']:
                break
            print("Please enter a, b, c, or d")

        # Check answer
        if user_answer == question["correct_answer"]:
            print("✅ Correct!")
            score += 1
        else:
            print(f"❌ Incorrect. The correct answer is: {question['correct_answer']}")

        print(f"💡 {question['explanation']}")

        if i < total_questions:
            input("\nPress Enter to continue...")
            print("-" * 60)

    # Final score
    percentage = (score / total_questions) * 100
    print("\n" + "=" * 60)
    print(f"🏆 QUIZ COMPLETED!")
    print(f"📊 Final Score: {score}/{total_questions} ({percentage:.1f}%)")

    if percentage >= 80:
        print("🌟 Excellent work!")
    elif percentage >= 60:
        print("👍 Good job!")
    else:
        print("📚 Keep studying!")
    print("=" * 60)

In [23]:
def main():
    """Main function to run the PDF quiz generator"""
    print("🎓 PDF Quiz Generator")
    print("=" * 40)

    # Initialize generator
    generator = PDFQuizGenerator()

    # Choose input method
    print("\nChoose an option:")
    print("1. Upload a new PDF file")
    print("2. Use existing PDF file path")

    choice = input("Enter choice (1 or 2): ").strip()

    if choice == "1":
        # Upload and process new PDF
        result = generator.generate_quiz_from_upload()
    elif choice == "2":
        # Use existing PDF path
        pdf_path = input("Enter PDF file path: ").strip()
        result = generator.generate_quiz_from_path(pdf_path)
    else:
        print("Invalid choice")
        return

    # Display results
    generator.display_results(result)

    # Ask if user wants interactive quiz
    if result["success"] and result["quiz_questions"]:
        print("\n" + "=" * 60)
        interactive = input("Would you like to take the interactive quiz? (y/n): ").lower().strip()
        if interactive == 'y':
            run_interactive_quiz(result["quiz_questions"], result["file_name"])

In [24]:
def quick_test():
    """Quick test function"""
    print("🧪 Quick Test Mode")
    generator = PDFQuizGenerator()

    # For testing, you can create a sample PDF or use existing one
    # This is just to show the structure
    sample_content = """
    Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines that can perform tasks that typically require human intelligence. Machine Learning is a subset of AI that enables computers to learn and improve from experience without being explicitly programmed. Deep Learning is a subset of machine learning that uses neural networks with multiple layers to analyze and learn from data. Natural Language Processing (NLP) is a field of AI that focuses on the interaction between computers and human language, enabling machines to understand, interpret, and generate human language.
    """

    # Simulate processing
    result = generator._process_pdf_content(sample_content, "sample_ai_document.pdf")
    generator.display_results(result)

    return result

# Run the main function
if __name__ == "__main__":
    print("🚀 Starting PDF Quiz Generator...")
    print("Make sure you have added your Gemini API key to Colab secrets!")

    # Uncomment the line below to run the main program
    main()

    # Uncomment the line below to run quick test
    # quick_test()

🚀 Starting PDF Quiz Generator...
Make sure you have added your Gemini API key to Colab secrets!
🎓 PDF Quiz Generator

Choose an option:
1. Upload a new PDF file
2. Use existing PDF file path
Enter choice (1 or 2): 2
Enter PDF file path: content/ai_in_education.pdf
📖 Processing 10 pages...
✅ Extracted 43379 characters from PDF
🚀 Processing ai_in_education.pdf through workflow...
🔄 Generating summary from PDF content...
⚠️  PDF content is long, using first 15000 characters
✅ Summary generated successfully
🔄 Generating quiz questions...
✅ Generated 6 quiz questions

📄 PDF QUIZ GENERATOR RESULTS
📁 File: ai_in_education.pdf

📋 CONTENT SUMMARY:
----------------------------------------
* This phenomenological study investigated the implications of Artificial Intelligence (AI) in education, gathering perspectives from academics, legal experts, engineers, and teachers.

* AI in education is defined as the use of computer systems to mimic human thought processes and actions to enhance learning. 