In [None]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from pypdf import PdfReader
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from dotenv import load_dotenv


In [11]:
# Configure Gemini
load_dotenv()

llm = ChatGoogleGenerativeAI(model="models/gemini-2.0-flash",
                             temperature=0,
                             api_key=os.getenv("api_key"))

In [7]:
def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""

    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text.strip()

In [8]:
def LLM_extract_specs(text):
    prompt = f"""
    You are an expert at extracting phone specs.

    From the following text, extract ONLY:
    - Name
    - Dimensions
    - Weight
    - Screen Size (inches)
    - CPU (e.g., Snapdragon 8 Gen 3, bionic 17, dimensity 9600, etc.)

    Text:
    {text}

    Return output in clean format like:
    Name: ...
    Dimensions: ...
    Weight: ...
    Screen Size: ...
    CPU: ...
    """

    response = llm.invoke(prompt)
    return response.content.strip()

In [None]:
def print_specs(input_path):
    text = extract_text_from_pdf(input_path)
    content = LLM_extract_specs(text)

    styles = getSampleStyleSheet()

    filename = os.path.splitext(os.path.basename(input_path))[0]
    output_name = f"specs_{filename}.pdf"

    doc = SimpleDocTemplate(output_name)
    story = [
        Paragraph("Phone Specifications Extracted:", styles['Title']),
        Spacer(1, 12)
    ]

    for line in content.split('\n'):
        story.append(Paragraph(line, styles['Normal']))
        story.append(Spacer(1, 6))

    doc.build(story)


In [10]:
print_specs("./data_files/a1.pdf")  # Replace with your PDF file path