In [10]:
import fitz  
import os
import json
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

In [11]:
load_dotenv("D:\Langchain\summary\.env.txt")
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [12]:
def extract_pages_text(pdf_path, page_numbers):
    doc = fitz.open(pdf_path)
    combined_text = ""
    for num in page_numbers:
        if 0 <= num < len(doc):
            combined_text += doc.load_page(num).get_text()
    return combined_text

In [13]:
def summarize_text(text, instruction="Please summarize this content:"):
    prompt = PromptTemplate(
        input_variables=["instruction", "text"],
        template="{instruction}\n\n{text}"
    )
    formatted_prompt = prompt.format(instruction=instruction, text=text)
    return llm.invoke(formatted_prompt)

In [14]:
pdf_path = "Unit- I. Number Theorem.pdf"
selected_pages = [2, 3, 4]  # Pages 3, 4, 5 (0-indexed)
text_to_summarize = extract_pages_text(pdf_path, selected_pages)

summary = summarize_text(text_to_summarize)
print("Summary:\n", summary)

Summary:
 content='The content is about the benefits of practicing mindfulness, including improved focus, reduced stress, and increased emotional regulation. It also discusses how mindfulness can help individuals become more present and aware in their daily lives.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 40, 'prompt_tokens': 12, 'total_tokens': 52, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BmJ1PNZcuxUVUSZ2u0VsqcZHr3PR5', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='run--feb6a72e-35b1-4402-9722-4fb1ab73220e-0' usage_metadata={'input_tokens': 12, 'output_tokens': 40, 'total_tokens': 52, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'aud

In [16]:
output_data = {
    "pdf_file": pdf_path,
    "pages_summarized": [p + 1 for p in selected_pages], 
    "summary": str(summary)
}

with open("summary_output.json", "w", encoding="utf-8") as f:
    json.dump(output_data, f, ensure_ascii=False, indent=4)

print("Summary saved to summary_output.json")

Summary saved to summary_output.json
