In [None]:
# !pip install google-generativeai pydantic
# !pip install PyPDF2 google-generativeai

In [100]:
import PyPDF2
import google.generativeai as genai
from pydantic import BaseModel
import json

from typing import List, Dict, Any
from dataclasses import dataclass

from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_google_genai import ChatGoogleGenerativeAI
import os

## Configure API Key

In [102]:
google_api_key = "<API_KEY>"
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = google_api_key

genai.configure(api_key=google_api_key)
model = genai.GenerativeModel("gemini-2.0-flash")

## Configure PDF Path

In [116]:
pdf_path = "Prompt Engineering.pdf"

In [23]:
def extract_text_from_pdf(pdf_path):
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            for page in reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
        return None


document_text = extract_text_from_pdf(pdf_path)
print(f"Extracted {len(document_text)} characters from the PDF.")

Extracted 4917 characters from the PDF.


In [106]:
# pre-processing as observed many '\n' inbetween
document_text = document_text.replace('\n',' ')

## Getting Topic Hint
One time inference with top 500 characters to get overall topic so that the prompt that comes later can be adapted to this identified topic

In [80]:
def detect_topic_hint(text: str, max_chars: int = 500) -> str:
    """
    Use Gemini-2.0-flash to infer a short, friendly study topic hint from the first part of the text.
    This is done once, instead of using multiple heuristics.
    """
    snippet = text.strip().replace("\n", " ")[:max_chars]

    prompt = (
        f"You are an intelligent study assistant. Based on the following text snippet, "
        f"provide a short, clear, friendly topic title suitable for study purposes. "
        f"Keep it concise (max 6-8 words) and descriptive.\n\nText snippet:\n{snippet}\n\n"
        f"Output only the topic title:"
    )

    # Gemini API call
    response = model.generate_content(prompt)
    # Depending on the API, response might be a dict or object; extract text
    topic = response.text.strip() if hasattr(response, "text") else str(response).strip()

    return topic

In [86]:
topic_hint = detect_topic_hint(document_text,500)
print(topic_hint)

Prompt Engineering: Guiding AI with Text


## Summary & MCQ prompt

In [44]:
SUMMARY_PROMPT = PromptTemplate(
input_variables=["chunk_text", "topic_hint"],
    template=(
        "You are an expert study assistant.\n"
        "Summarize the following content into concise bullet points suitable for quick review.\n"
        "Be brief, use 6-12 bullets if content is long, and include any important formulas, dates, or definitions.\n"
        "Adapt your tone to be friendly and clear for someone studying {topic_hint}.\n\n"
        "Content:\n{chunk_text}\n\n"
        "Summary (bullet points):"
        )
    )


MCQ_PROMPT = PromptTemplate(
input_variables=["summary_text", "num_questions", "topic_hint"],
    template=(
        "You are a friendly teacher preparing a short multiple-choice quiz from the study notes below.\n"
        "Generate exactly {num_questions} multiple-choice questions that test important concepts from the summary.\n"
        "For each question: provide a clear question statement, 4 answer options labeled A-D, and mark the correct answer.\n"
        "At the end of each question, include a one-sentence explanation for why the correct answer is correct.\n"
        "Make sure distractors (wrong options) are plausible. Include the topic: {topic_hint}.\n\n"
        "Study notes:\n{summary_text}\n\n"
        "Output format (use this strictly):\n"
        "Q<n>. <question text>\n"
        "A) option 1\n"
        "B) option 2\n"
        "C) option 3\n"
        "D) option 4\n"
        "Answer: <A/B/C/D>\n"
        "Explanation: <one-sentence explanation>\n"
        )
    )

## Chunking

In [88]:
def chunk_text(text: str, chunk_size: int = 2000, chunk_overlap: int = 200) -> List[str]:
    splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    return splitter.split_text(text)
    
def summarize_chunks(llm, chunks: List[str], topic_hint: str) -> List[str]:
    chain = LLMChain(llm=llm, prompt=SUMMARY_PROMPT)
    summaries = []
    for idx, c in enumerate(chunks, start=1):
        out = chain.run({"chunk_text": c, "topic_hint": topic_hint})
        summaries.append(out.strip())
    return summaries



## Gemini Inferences

In [None]:
def combine_summaries(llm, partial_summaries: List[str], topic_hint: str) -> str:
    combined = "\n\n".join(partial_summaries)
    final_prompt = PromptTemplate(
    input_variables=["combined_text", "topic_hint"],
    template=(
        "You are an expert study assistant. Condense the following partial summaries into a single, concise set of 8-12 bullet points for quick studying.\n"
        "Preserve important facts, formulas, and definitions. Adapt the wording for clarity for {topic_hint}.\n\n"
        "{combined_text}\n\n"
        "Final summary (bullets):"
        )
        )
    final_chain = LLMChain(llm=llm, prompt=final_prompt)
    final = final_chain.run({"combined_text": combined, "topic_hint": topic_hint})
    return final.strip()

def generate_mcqs(llm, final_summary: str, topic_hint: str, num_questions: int = 6) -> str:
    mcq_chain = LLMChain(llm=llm, prompt=MCQ_PROMPT)
    mcq_output = mcq_chain.run({"summary_text": final_summary, "num_questions": str(num_questions), "topic_hint": topic_hint})
    return mcq_output.strip()


## Structured Output

In [None]:
@dataclass
class StudyOutput:
    summary: str
    mcq_text: str

def process_pdf_text(pdf_text: str, llm, topic_hint, num_questions: int = 6) -> StudyOutput:
    chunks = chunk_text(pdf_text)
    print(f"Debug: created {len(chunks)} chunks. Topic hint: {topic_hint}")

    partial_summaries = summarize_chunks(llm, chunks, topic_hint)
    final_summary = combine_summaries(llm, partial_summaries, topic_hint)
    mcq_text = generate_mcqs(llm, final_summary, topic_hint, num_questions=num_questions)
    
    return StudyOutput(summary=final_summary, mcq_text=mcq_text)

## Execution

In [98]:
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)
out = process_pdf_text(document_text, llm, topic_hint, num_questions=2)

print('\n===== FINAL SUMMARY =====')
print(out.summary)
print('\n===== MCQs =====')
print(out.mcq_text)

Debug: created 3 chunks. Topic hint: Prompt Engineering: Guiding AI with Text


  out = chain.run({"chunk_text": c, "topic_hint": topic_hint})



===== FINAL SUMMARY =====
Here's a concise summary of Prompt Engineering for quick studying:

*   **Prompt Engineering:** Guiding AI models with text-based instructions (prompts) to achieve specific tasks.
*   **Prompt Components:** Effective prompts include clear instructions, relevant context, input data (if needed), and a desired output indicator.
*   **Prompt Types:** Text prompts (text generation), Code prompts (code tasks), Image prompts (image generation).
*   **Zero-Shot Prompting:** Model performs task based on general knowledge, without specific examples.
*   **Few-Shot Prompting:** Provide a few examples in the prompt to guide the model's output.
*   **Chain-of-Thought (CoT):** Break down complex tasks into intermediate reasoning steps for improved reasoning.
*   **Prompt Improvement:** Use role-playing (assign personas), be concise and specific, and maintain a consistent tone.
*   **Avoid Overload & Vagueness:** Keep prompts focused and avoid open-ended questions for preci