In [1]:
import os
import pdfplumber
import csv
import re

def extract_full_text(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

def create_summary(text, keyword, max_sentences=5):
    keyword_lower = keyword.lower()
    sentences = re.split(r'(?<=[.!?]) +', text)
    relevant_sentences = [s.strip() for s in sentences if keyword_lower in s.lower()]
    
    # Basic summarization: just return the first few sentences
    if relevant_sentences:
        return " ".join(relevant_sentences[:max_sentences])
    else:
        return ""

def scan_pdfs_and_summarize(folder_path, keyword, output_file="description.csv"):
    summary_data = []

    for filename in os.listdir(folder_path):
        if filename.lower().endswith(".pdf"):
            pdf_path = os.path.join(folder_path, filename)
            full_text = extract_full_text(pdf_path)

            if keyword.lower() in full_text.lower():
                summary = create_summary(full_text, keyword)
                count = full_text.lower().count(keyword.lower())
                summary_data.append([filename, count, summary])

    # Save results
    with open(output_file, mode='w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Filename', 'Keyword Count', 'Summary'])
        writer.writerows(summary_data)

    print(f"\n✅ Summary file saved as: {output_file}")

# 👇 Run with predefined folder and keyword
if __name__ == "__main__":
    folder = r"C:\Users\Ashritha Gopu\OneDrive\Desktop\movie"      # Change this to your folder path
    keyword = "geetha govindam"           # Change this to your search term

    if not os.path.isdir(folder):
        print("❌ Invalid folder path!")
    else:
        scan_pdfs_and_summarize(folder, keyword)



✅ Summary file saved as: description.csv
