In [1]:
from pypdf import PdfReader

pdf_path = "data/gemini-2.5-tech.pdf"
reader = PdfReader(pdf_path)

text_content = []
for i in range(0, 18):  # Pages are 0-indexed
    page = reader.pages[i]
    text_content.append(page.extract_text())

print(f"Loaded {len(text_content)} pages.")
print(text_content[0][:200]) # Print first 200 characters of the first page to verify

Loaded 18 pages.
Gemini 2.5: Pushing the Frontier with
Advanced Reasoning, Multimodality, Long
Context, and Next Generation Agentic
Capabilities.
Gemini Team, Google
In this report, we introduce the Gemini 2.X model f


In [6]:
from pypdf import PdfReader

pdf_path = "data/gemini-2.5-tech.pdf"
reader = PdfReader(pdf_path)

text_content = []
for i in range(0, 18):  # Pages are 0-indexed
    page = reader.pages[i]
    text_content.append(page.extract_text())

print(f"Loaded {len(text_content)} pages.")

while True:
    try:
        page_num_input = input(f"Enter page number (1-{len(text_content)}) or 'q' to quit: ")
        if page_num_input.lower() == 'q':
            break
        
        page_num = int(page_num_input)
        if 1 <= page_num <= len(text_content):
            print(f"--- Page {page_num} ---")
            print(text_content[page_num - 1]) # Pages are 0-indexed in list
            print("\n")
        else:
            print("Invalid page number. Please try again.")
    except ValueError:
        print("Invalid input. Please enter a number or 'q'.")

Loaded 18 pages.
--- Page 13 ---
Gemini 2.5: Pushing the Frontier with Advanced Reasoning, Multimodality, Long Context, and Next Generation Agentic Capabilities.
Capability BenchmarkFlashGemini 1.5
ProGemini 1.5
Flash-LiteGemini 2.0
FlashGemini 2.0
FlashGemini 2.5
ProGemini 2.5
CodeLiveCodeBench 30.3% 29.7% 29.1% 29.1% 59.3% 74.2%
Aider Polyglot 2.8% 16.9% 10.5% 21.3% 56.7% 82.2%
VerifiedSWE-benchattemptsingle9.6% 22.3% 12.5% 21.4% 48.9% 59.6%
attemptsmultiple19.7% 34.2% 23.1% 34.2% 60.3% 67.2%
Reasoning(diamond)GPQA50.0% 58.1% 50.5% 65.2% 82.8% 86.4%
Last ExamHumanity’sno tools - 4.6% 4.6% † 5.1%† 11.0% 21.6%
FactualitySimpleQA 8.6% 24.9% 16.5% 29.9% 26.9% 54.0%
GroundingFACTS82.9% 80.0% 82.4% 84.6% 85.3% 87.8%
Multilinguality (Lite)Global MMLU72.5% 80.8% 78.0% 83.4% 88.4% 89.2%
ECLeKTic 16.4% 27.0% 27.7% 33.6% 36.8% 46.8%
MathAIME 2025 14.7% 17.5% 23.8% 29.7% 72.0% 88.0%
HiddenMath-
Hard36.8% 44.3% 47.4% 53.7% 75.5% 80.5%
Long-contextretrieval)LOFT (hard ≤128K 67.3% 75.9% 50.7% 58.0%

In [None]:
import os
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader

documents = LlamaParse(result_type="markdown")


def pdf_parser(pdf_file_path: str):
    """
    PDF 파일을 파싱하여 그 내용을 Markdown 파일로 저장합니다.

    Args:
        pdf_file_path (str): 처리할 PDF 파일의 경로.
    """
    print(f"🔄 '{pdf_file_path}' 파일 파싱을 시작합니다...")

    try:
        # parsing instruction 을 지정합니다.
        parsing_instruction = (
            "You are parsing a AI Report. Please extract tables in markdown format."
        )

        # LlamaParse 설정
        parser = LlamaParse(
            use_vendor_multimodal_model=True,
            vendor_multimodal_model_name="openai-gpt4o",
            vendor_multimodal_api_key=os.environ["OPENAI_API_KEY"],
            result_type="markdown",
            # parsing_mode="Unstructured",
            language="ko",
            parsing_instruction=parsing_instruction,
        )

        # 1. LlamaParse를 사용하여 PDF 파일을 로드합니다.
        # 'documents' 객체는 이 함수 외부에서 미리 정의되어 있어야 합니다.
        parsed_docs = documents.load_data(file_path=pdf_file_path)

        # 2. LangChain 형식의 도큐먼트로 변환합니다.
        docs = [doc.to_langchain_format() for doc in parsed_docs]

        # 3. 저장할 Markdown 파일의 경로를 생성합니다. (확장자 변경)
        file_root, _ = os.path.splitext(pdf_file_path)
        output_file_path = file_root + ".md"

        # 4. 모든 페이지의 내용을 하나의 텍스트로 합칩니다.
        #    페이지 사이는 두 줄로 띄어 가독성을 높입니다.
        full_text = "\n\n".join([doc.page_content for doc in docs])

        # 5. 추출된 전체 텍스트를 .md 파일로 저장합니다.
        with open(output_file_path, "w", encoding="utf-8") as f:
            f.write(full_text)

        print(f"✅ 파일 저장 완료: {output_file_path}")

    except FileNotFoundError:
        print(f"❌ 오류: 파일을 찾을 수 없습니다 - {pdf_file_path}")
    except Exception as e:
        print(f"❌ 오류 발생: {e}")


# --- 함수 사용 예시 ---
# 이 코드를 실행하기 전에 'documents' 파서 객체를 초기화해야 합니다.
# file_to_parse = "data/디지털정부혁신추진계획.pdf"
file_to_parse = "./data/lorem-ipsum-10pages.pdf"
pdf_parser(file_to_parse)

In [5]:
import fitz  # PyMuPDF
import os

def extract_pdf_content(pdf_path, output_image_dir="extracted_images"):
    try:
        document = fitz.open(pdf_path)
        
        # Create directory for images if it doesn't exist
        if not os.path.exists(output_image_dir):
            os.makedirs(output_image_dir)

        for page_num in range(min(18, len(document))):
            page = document.load_page(page_num)
            
            # Extract text
            text = page.get_text()
            print(f"--- Page {page_num + 1} Text ---")
            print(text)
            print("\n")

            # Extract images
            image_list = page.get_images(full=True)
            for img_index, img in enumerate(image_list):
                xref = img[0]
                base_image = document.extract_image(xref)
                image_bytes = base_image["image"]
                image_ext = base_image["ext"]
                image_filename = os.path.join(output_image_dir, f"page{page_num + 1}_img{img_index}.{image_ext}")
                with open(image_filename, "wb") as img_file:
                    img_file.write(image_bytes)
                print(f"Saved image: {image_filename}")

    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    pdf_file = "data/gemini-2.5-tech.pdf"
    extract_pdf_content(pdf_file)


--- Page 1 Text ---
Gemini 2.5: Pushing the Frontier with
Advanced Reasoning, Multimodality, Long
Context, and Next Generation Agentic
Capabilities.
Gemini Team, Google
In this report, we introduce the Gemini 2.X model family: Gemini 2.5 Pro and Gemini 2.5 Flash, as well
as our earlier Gemini 2.0 Flash and Flash-Lite models. Gemini 2.5 Pro is our most capable model yet,
achieving SoTA performance on frontier coding and reasoning benchmarks. In addition to its incredible
coding and reasoning skills, Gemini 2.5 Pro is a thinking model that excels at multimodal understanding
and it is now able to process up to 3 hours of video content. Its unique combination of long context,
multimodal and reasoning capabilities can be combined to unlock new agentic workflows. Gemini 2.5
Flash provides excellent reasoning abilities at a fraction of the compute and latency requirements and
Gemini 2.0 Flash and Flash-Lite provide high performance at low latency and cost. Taken together, the
Gemini 2.X model