First we take an image. This image itself is not searchable or structured.

**Image → Text**

What it produces:

extracted words/lines

OCR reads the text from image, like a human reading a receipt.


In [2]:
# installs Poppler tools (system-level utilities).

# Why needed:
# pdf2image needs Poppler to read and convert PDFs.
# Poppler provides commands like pdfinfo and pdftoppm.

# In your pipeline:
# This enables PDF → image conversion.
!apt-get install -y poppler-utils



# installs the Python library pdf2image.

# Why needed:
# So your code can do:

# from pdf2image import convert_from_path
# images = convert_from_path(pdf_path, dpi=300)


# In your pipeline:
# This is the direct step: PDF → images (page-wise).

!pip install pdf2image

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
poppler-utils is already the newest version (22.02.0-2ubuntu0.12).
0 upgraded, 0 newly installed, 0 to remove and 2 not upgraded.


In [3]:
ls

[0m[01;34msample_data[0m/


In [4]:
# Refreshes the Linux package list.
# So Colab knows the latest versions of Ubuntu packages before installing anything.
!apt-get update

# installs Poppler tools + supporting data files.
!apt-get install -y poppler-utils poppler-data

# checks where pdfinfo is installed.
!which pdfinfo

Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:2 https://cli.github.com/packages stable InRelease [3,917 B]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ Packages [83.8 kB]
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:7 https://cli.github.com/packages stable/main amd64 Packages [356 B]
Get:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:10 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [9,682 kB]
Get:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease [24.6 kB]
Get:12 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [3,640 kB]
Get:13 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:14 https://ppa

In [6]:
# Part A — Import libraries

from pdf2image import convert_from_path
import os

# Part B — Provide PDF input

# Path to PDF file
pdf_path = "/content/Receipt-template-example.pdf"

# Part C — Create output folder based on PDF name

# Get the PDF name without extension
pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]
# Directory to save images
output_dir = f"/content/{pdf_name}"
os.makedirs(output_dir, exist_ok=True)  # Create the folder if it doesn't exist

# Part D — Convert PDF pages to images

# Convert PDF to images (one image per page)
images = convert_from_path(pdf_path, dpi=300, fmt='jpeg')

# Part E — Select only specific pages

pages = [1]  # Specify the pages you want to convert e.g., [1,2,3] → first 3 pages


# Part F — Save selected pages as images + store their paths
# List to store the names of saved images as strings
saved_image_paths = []

# Save specified pages as images in the created folder
for i in pages:
    if i <= len(images):  # Check if the page number is valid
        # Save images in the created folder with incremental filenames
        image_path = f"{output_dir}/{pdf_name}_page_{i}.jpg"
        images[i - 1].save(image_path, "JPEG")  # -1 because list is 0-indexed
        print(f"Saved {image_path}")
        saved_image_paths.append(image_path)  # Append the image path as a string to the list
    else:
        print(f"Page {i} does not exist in the PDF.")

# Part G — Print final output list
# Print the list of saved image paths as strings
print("Saved Image Paths:", saved_image_paths)


PDFPageCountError: Unable to get page count.
I/O Error: Couldn't open file '/content/Receipt-template-example.pdf': No such file or directory.


In [None]:
# import google.generativeai as genai
# import os

# genai.configure(api_key="AIzaSyCX2YlpPas8NmACRymVk90cxlD9lfjzWbI")

# models = genai.list_models()
# for m in models:
#     print(m.name, "->", m.supported_generation_methods)

In [None]:
import google.generativeai as genai
import PIL.Image
import os

# Setup API key
genai.configure(api_key="YOUR API Key")   # set env var outside code

# Choose model
model = genai.GenerativeModel("gemini-2.5-flash")


# Load image
img = PIL.Image.open("/content/Receipt-template-example/Receipt-template-example_page_1.jpg")

# Prompt
prompt = "Read the text in this image and output it as a Markdown table."
response = model.generate_content([prompt, img])

print(response.text)


In [None]:
from google.colab import drive
drive.mount('/content/drive')