##### Copyright 2024 Google LLC.

In [None]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Gemini API - read a PDF

This notebook demonstrates how you can convert a PDF file so that it can be read by the Gemini API.

<table align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/google-gemini/cookbook/blob/main/quickstarts/PDF_Files.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
</table>

## Setup

In [None]:
!pip install -Uq google-generativeai

In [None]:
import google.generativeai as genai


import pathlib
import tqdm
import os

In [None]:
import json

GOOGLE_API_KEY=json.load(open('api_key.json'))['GOOGLE_API_KEY']
genai.configure(api_key=GOOGLE_API_KEY)

Install the PDF processing tools:

In [None]:
!apt install poppler-utils

## Download and proces the PDF

This textbook is from OpenStax, it's License is Commons Attribution License v4.0. More detrails are [available on the site](https://openstax.org/details/books/university-physics-volume-2).

In [None]:
import pathlib

In [None]:
if not pathlib.Path('test.pdf').exists():
    !curl -o test.pdf https://assets.openstax.org/oscms-prodcms/media/documents/UniversityPhysicsVolume2-WEB_5eNhMSa.pdf

You'll extract Chapter 3, pages [121-154].

In [None]:
first = 121
last = 154

In [None]:
!mkdir output
! # extract images of Chapter 3
!pdftoppm test.pdf -f {first} -l {last} output/images -jpeg
!ls output

Look at the first image, scaled down:

In [None]:
import PIL.Image

In [None]:
img = PIL.Image.open(f"output/images-{first}.jpg")
img.thumbnail([600, 600])
img

Extract the text for those same pages.

In [None]:
for page_number in range(first,last+1):
  page_number = f"{page_number:03d}"
  ! pdftotext  test.pdf -f {page_number} -l {page_number}
  ! mv test.txt output/text-{page_number}.txt

In [None]:
!ls output

In [None]:
!cat output/text-{first}.txt

## Assemble the files into a prompt

Upload all the files using the files API, there are too many to send with the `generate_content` request.

In [None]:
files = []
image_files = list(pathlib.Path("output").glob('images-*.jpg'))
for img in tqdm.tqdm(image_files):
    files.append(genai.upload_file(img))

Load all the texts:

In [None]:
texts = [t.read_text() for t in pathlib.Path("output").glob('text-*.txt')]

Interleave the page-numbers, texts, and image-file references:

In [None]:
textbook = []
for page, (text, image) in enumerate(zip(texts, files)):
  textbook.append(f'## Page {first+page} ##')
  textbook.append(text)
  textbook.append(image)

## Try it out

In [None]:
model = genai.GenerativeModel(model_name='gemini-1.5-flash')

In [None]:
response = model.generate_content(
    ['# Here is a chapter from a physics text book:']+
    textbook +
    ["[END]\n\nPlease sumarize it in sections for a better understanding"]
)

In [None]:
from IPython.display import Markdown
Markdown(response.text)