In [2]:
#!pip install python-dotenv
#!pip install openai
#!pip install imageio
#!pip install azure-ai-formrecognizer
#!pip install azure-core

In [3]:
from openai import AzureOpenAI
import dotenv
import os
import base64
from azure.core.credentials import AzureKeyCredential

dotenv.load_dotenv(override=True)

True

In [4]:
client = AzureOpenAI(
    azure_endpoint=os.getenv('AzureOpenAiEndpoint'),
    api_key=os.getenv('AzureOpenAiKey'),
    azure_deployment='gpt-4o',
    api_version='2024-02-15-preview'
)

In [5]:
with open("aspirine.jpg", "rb") as image_file:
    aspirine = base64.b64encode(image_file.read()).decode('utf-8')
with open("flowchart.png", "rb") as image_file:
    flowchart = base64.b64encode(image_file.read()).decode('utf-8')
with open("flowchart2.jpg", "rb") as image_file:
    flowchart2 = base64.b64encode(image_file.read()).decode('utf-8')
with open("rag.png", "rb") as image_file:
    rag = base64.b64encode(image_file.read()).decode('utf-8')
with open("tablet.jpg", "rb") as image_file:
    tablet = base64.b64encode(image_file.read()).decode('utf-8')


In [6]:
def analyze_document_png(encoded_image,prompt):
    res = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/png;base64,{encoded_image}"}
                    },
                ],
            }
        ],
    )
    return res.choices[0].message.content

def analyze_document_jpg(encoded_image,prompt):
    res = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpg;base64,{encoded_image}"}
                    },
                ],
            }
        ],
    )
    return res.choices[0].message.content

In [7]:
prompt = 'Describe the given picture with as much as details as possible'
analyze_document_jpg(aspirine, prompt)

'The image shows a package of Aspirin tablets. Here are the details visible:\n\n- **Brand Name**: The large text at the top center of the package reads "ASPIRINE" in bold, black letters.\n- **Dosage**: The dosage is indicated as "500 mg" in black text aligned to the right of the brand name.\n- **Tablets Information**: It states that each tablet contains "500 mg acetylsalicylzuur."\n- **Quantity**: The package contains "20 tabletten."\n- **Manufacturer**: The bottom left side of the package has a green circular logo which says "BAYER" in white letters. \n- **Language**: The instructions appear to be in Dutch. The instruction says: "Wijze van gebruik: De tablet(ten) met een ruime hoeveelheid water na een maaltijd innemen." This translates to "Method of use: Take the tablet(s) with a generous amount of water after a meal."\n- **Visuals**: Toward the right side, there is an image showing two white tablets with markings "0,5" on them.\n- **Color Scheme**: The package primarily uses green an

In [8]:
prompt = 'Describe the given flowchart as best as you can.'
analyze_document_png(flowchart, prompt)

'The flowchart illustrates the recruitment process flow, starting from defining the job description and ending with hiring the candidate. The steps in the process are as follows:\n\n1. **Define job description**:\n    - The process begins with defining the job description.\n\n2. **Request personnel to HR**:\n    - The personnel request is sent to HR.\n\n3. **Classify job hiring**:\n    - The job hiring is classified.\n\n4. **Is it a regular process?**\n    - If the process is regular, proceed to create a job posting.\n    - If not, follow a special hiring process which involves:\n        - Reviewing the previous candidate.\n        - If the candidate is suitable, set up a meeting.\n        - If not, make a hiring plan.\n        - Create a job posting online.\n        - Collect submitted resumes.\n        - Assess if the candidate is qualified.\n        - If the candidate is not qualified, they are not hired.\n\n5. **Create job posting**:\n    - A job posting is created.\n\n6. **Found a

In [9]:
prompt = 'Describe the given flowchart as best as you can.'
analyze_document_jpg(flowchart2, prompt)

'This flowchart is titled "Flussdiagramm" (which is "Flowchart" in English) and depicts a sequence of processes leading from an input to an output. Here is a description of the flowchart step by step:\n\n1. **Input**: The process starts with an "Input" (Vorgabe).\n2. **Process Step 1 (Prozessschritt 1)**: The input leads to the first process step.\n3. **Document Creation**: After Process Step 1, a document (Dokument) is generated.\n4. **Condition Check (Bedingung)**: The flow then reaches a decision point (diamond shape labeled "Bedingung erfüllt?", which means "Condition met?").\n   - If the condition is met ("ja"), the flow proceeds to Process Step 2.\n   - If the condition is not met ("nein"), the flow goes to a different process (labelled "Prozess").\n5. **Process Step 2 (Prozessschritt 2)**: If the condition is met, Process Step 2 is carried out.\n6. **Document Creation**: Another document is generated after Process Step 2.\n7. **Process Step 3 (Prozessschritt 3)**: The flow advan

In [10]:
prompt = 'Describe the given IT Architecture as best as possible. Include as many information as possible from the picture.'
analyze_document_png(rag, prompt)

"This IT architecture diagram provides a detailed overview of a Retrieval Augmented Generation (RAG) system with embeddings. It explains the flow from a user's query to the retrieval and generation of the answer, utilizing both a knowledge base and a large language model. Here's a breakdown of the various components and their interactions:\n\n1. **User Interaction:**\n   - **User Question:** The process starts with a user question. This can be a query input on a device like a mobile or web interface.\n\n2. **RAG System:**\n   - **Retriever over Knowledge Base:**\n     - The user question is sent as a query to a retriever system that searches over a knowledge base.\n     - Next, the query is sent to two main components simultaneously: \n       1. **Large Language Model (Embedding):**\n          - The query is embedded into a vector using a large language model.\n          - The embedded vectors are then used to find related vectors within the knowledge base.\n       2. **Search Index:**

In [11]:
prompt = 'Describe the given picture with as much as details as possible'
analyze_document_jpg(tablet, prompt)

'The image shows a close-up view of a pair of hands. The right hand is open with the palm facing up, holding a single yellow capsule-shaped pill. The left hand is holding a clear glass of water, with some reflections visible on the glass from the light. The background is a smooth, white surface, and there are shadows and light patterns visible, suggesting natural or bright lighting from a window or another source. The overall composition focuses on the hands, the pill, and the glass of water, emphasizing the act of preparing to take medication. The veins and contours of the hands are noticeable, highlighting a natural and realistic appearance.'