In [1]:
#!pip install python-dotenv
#!pip install openai
#!pip install imageio
#!pip install azure-ai-formrecognizer
#!pip install azure-core

In [2]:
from openai import AzureOpenAI
import dotenv
import os
import base64
from azure.core.credentials import AzureKeyCredential

dotenv.load_dotenv(override=True)

True

In [3]:
client = AzureOpenAI(
    azure_endpoint=os.getenv('AzureOpenAiEndpoint'),
    api_key=os.getenv('AzureOpenAiKey'),
    azure_deployment='gpt-4o',
    api_version='2024-02-15-preview'
)

In [4]:
with open("msft.png", "rb") as image_file:
    msft_tables = base64.b64encode(image_file.read()).decode('utf-8')

In [5]:
def analyze_document(encoded_image,prompt):
    res = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/png;base64,{encoded_image}"}
                    },
                ],
            }
        ],
    )
    return res.choices[0].message.content

In [6]:
prompt = 'Provide the exact number of recorded basis for equity investments of level 1 in 2023.'
analyze_document(msft_tables, prompt)

'The exact recorded basis for equity investments of level 1 in 2023 is $10,138 million.'

In [7]:
prompt = 'Provide the exact number of recorded basis for equity investments of level 1 in 2022.'
analyze_document(msft_tables, prompt)

'The recorded basis for equity investments of Level 1 in June 30, 2022, is $1,590 million.'

In [16]:
prompt = 'Provide the Recorded Basis for Corporate notes and bonds of Level 3 in 2023.'
analyze_document(msft_tables, prompt)

'The recorded basis for Level 3 Corporate notes and bonds in 2023 is $0 million.'

In [9]:
prompt = 'Provide the recorded basis for Corporate notes and bonds of Level 2 in 2022.'
analyze_document(msft_tables, prompt)

'The recorded basis for Corporate notes and bonds of Level 3 in 2022 was $0 million.'

In [10]:
prompt = 'Provide the total number for unrealized gains of total debt investments for 2023 and 2022'
analyze_document(msft_tables, prompt)

'For the total debt investments, the unrealized gains for the years 2023 and 2022 are as follows:\n\n- **2023:** $13 million\n- **2022:** $53 million'

**Extra-curriculum:**

In [17]:
prompt = 'Extract the table as json format'
table_json = analyze_document(msft_tables, prompt)

In [18]:
table_json

'```json\n{\n  "June 30, 2023": {\n    "Changes in Fair Value Recorded in Other Comprehensive Income": [\n      {\n        "Category": "Commercial paper",\n        "Level": 2,\n        "Fair Value": 16589,\n        "Adjusted Cost Basis": 0,\n        "Unrealized Gains": 0,\n        "Unrealized Losses": 0,\n        "Recorded Basis": 16589,\n        "Cash and Cash Equivalents": 12231,\n        "Short-term Investments": 4358,\n        "Equity Investments": 0\n      },\n      {\n        "Category": "Certificates of deposit",\n        "Level": 2,\n        "Fair Value": 2701,\n        "Adjusted Cost Basis": 0,\n        "Unrealized Gains": 0,\n        "Unrealized Losses": 0,\n        "Recorded Basis": 2701,\n        "Cash and Cash Equivalents": 2657,\n        "Short-term Investments": 44,\n        "Equity Investments": 0\n      },\n      {\n        "Category": "U.S. government securities",\n        "Level": 1,\n        "Fair Value": 65237,\n        "Adjusted Cost Basis": 2,\n        "Unrealize

In [22]:
prompt = 'Provide the Recorded Basis for Corporate notes and bonds of Level 3 in 2023.'
analyze_document(msft_tables, prompt)

'The recorded basis for corporate notes and bonds of Level 3 on June 30, 2023, is $0 million.'