In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Long PDF Q&A with Gemini 1.5

The goal of this notebook is to extract specific information from a large PDF by using Gemini 1.5.

In this notebook, you will:
 - Use Gemini to answer a specific question contained in a PDF document.

In [1]:
# Import python packages

from typing import Iterable
import io
import time

import vertexai
from vertexai.preview.generative_models import (
    GenerationResponse,
    GenerativeModel,
    HarmBlockThreshold,
    HarmCategory,
    Part
)

Include information about your project in the next cell.

In [2]:
PROJECT_ID = "rl-llm-dev"  # Replace with your project ID
LOCATION = "us-central1"  # Replace with your location
MODEL_NAME = "gemini-1.5-pro-preview-0409"  # Replace with model name

vertexai.init(project=PROJECT_ID, location=LOCATION)
model = GenerativeModel(MODEL_NAME)
BLOCK_LEVEL = HarmBlockThreshold.BLOCK_ONLY_HIGH

In [3]:
prompt = """
Use the document above to answer the question below. Follow the Instructions and Suggestions below as a guide to answering the question.
<Instructions>
- First, analyze the question below and return which variables need to be analyzed, from what time period (example: second quarter of 2020), and any other details present in the question.
- Then return an analysis of what is asked in the question.
- Finally, carefully analyze the document above and answer the question below completely and correctly, using the variables determined in the previous step.
- Explain how you arrived at this result.
- Answer ONLY what was asked.
<Instructions>
<Suggestions>
- The document above is a financial report with various tables, graphs, infographics, lists, and additional information in text.
- PAY VERY CLOSE ATTENTION to the legends of the graphs and the COLORS of the graphs to answer the question below. The colors may indicate which information is important to answer the question.
- The color of the graph legends represents the color of the graph bars.
- Use ONLY this document as context to answer the question below.
</Suggestions>
<Question>
{question}
</Question>
answer:"""

In [4]:
def generate(
    prompt: list,
    max_output_tokens: int = 2048,
    temperature: int = 2,
    top_p: float = 0.4,
    stream: bool = False,
) -> GenerationResponse | Iterable[GenerationResponse]:
    """
    Function to generate response using Gemini 1.5 Pro

    Args:
        prompt:
            List of prompt parts
        max_output_tokens:
            Max Output tokens
        temperature:
            Temperature for the model
        top_p:
            Top-p for the model
        stream:
            Strem results?

    Returns:
        Model response

    """
    responses = model.generate_content(
        prompt,
        generation_config={
            "max_output_tokens": max_output_tokens,
            "temperature": temperature,
            "top_p": top_p,
        },
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: BLOCK_LEVEL,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: BLOCK_LEVEL,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: BLOCK_LEVEL,
            HarmCategory.HARM_CATEGORY_HARASSMENT: BLOCK_LEVEL,
        },
        stream=stream,
    )

    return responses


def retry_generate(pdf_document: Part, prompt: str, question: str):
    predicted = False
    while not predicted:
        try:
            response = generate(
                prompt=[pdf_document, prompt.format(question=question)]
            )
        except Exception as e:
            print("sleeping for 2 seconds ...")
            print(e)
            time.sleep(2)
        else:
            predicted = True

    return response

# Sample questions

In the next cell, include information about your question and the pdf_path.  

**(Optional)**  
If you are using Colab to test this notebook, you can try the following code to upload your PDF files.  
```python
from google.colab import files
files.upload()
```

You can uncomment the code in the cell to use this method.

In [5]:
# from google.colab import files
# files.upload()

In [9]:
question = "From the Consolidated Balance Sheet, what was the difference between the total assets from 2022 to 2023?"
pdf_path = "./Cymbal Bank - Financial Statements.pdf"

In [10]:
with open(pdf_path, "rb") as fp:
    pdf_document = Part.from_data(data=fp.read(), mime_type="application/pdf")

response = retry_generate(pdf_document, prompt, question)
print(response.text)

## Analysis of the Question:

The question asks for the difference in **total assets** between the years **2022** and **2023** from the **Consolidated Balance Sheet**. This requires locating the relevant section within the document and identifying the values associated with each year. 


## Locating the Information:

1. **Consolidated Balance Sheet:** The document provides a "Consolidated Balance Sheet" table which contains financial data for the years 2022 and 2023.
2. **Total Assets:** We need to identify the row labeled "Total assets" within the table. 
3. **Values for 2022 and 2023:**  We will find the corresponding values under the "12/31/2022" and "12/31/2023" columns.


## Calculation:

1. **2023 Total Assets:**  $2,238,274 million 
2. **2022 Total Assets:** $2,281,868 million
3. **Difference:** $2,281,868 million - $2,238,274 million = $43,594 million


## Answer:

The difference in total assets between 2022 and 2023 according to the Consolidated Balance Sheet is **$43,594 mill