In [4]:
#!pip install python-dotenv
#!pip install openai
#!pip install imageio
#!pip install azure-ai-formrecognizer
#!pip install azure-core

In [5]:
from openai import AzureOpenAI
import dotenv
import os
import base64
from azure.core.credentials import AzureKeyCredential

dotenv.load_dotenv(override=True)

True

In [6]:
client = AzureOpenAI(
    azure_endpoint=os.getenv('AzureOpenAiEndpoint'),
    api_key=os.getenv('AzureOpenAiKey'),
    azure_deployment='gpt-4o',
    api_version='2024-02-15-preview'
)

In [12]:
with open("aspirine.jpg", "rb") as image_file:
    aspirine = base64.b64encode(image_file.read()).decode('utf-8')
with open("flowchart.png", "rb") as image_file:
    flowchart = base64.b64encode(image_file.read()).decode('utf-8')
with open("flowchart2.jpg", "rb") as image_file:
    flowchart2 = base64.b64encode(image_file.read()).decode('utf-8')
with open("rag.png", "rb") as image_file:
    rag = base64.b64encode(image_file.read()).decode('utf-8')
with open("tablet.jpg", "rb") as image_file:
    tablet = base64.b64encode(image_file.read()).decode('utf-8')


In [8]:
def analyze_document_png(encoded_image,prompt):
    res = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/png;base64,{encoded_image}"}
                    },
                ],
            }
        ],
        max_tokens=300,
    )
    return res.choices[0].message.content

def analyze_document_jpg(encoded_image,prompt):
    res = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpg;base64,{encoded_image}"}
                    },
                ],
            }
        ],
        max_tokens=300,
    )
    return res.choices[0].message.content

In [9]:
prompt = 'Describe the given picture with as much as details as possible'
analyze_document_jpg(aspirine, prompt)

'The image shows the packaging for Aspirine, a medication produced by Bayer. Here are the details from the packaging:\n\n1. **Product Name**: ASPIRINE\n2. **Dosage**: 500 mg per tablet\n3. **Active Ingredient**: Acetylsalicylic acid (indicated by "per tablet 500 mg acetylsalicylzuur" which is Dutch for acetylsalicylic acid)\n4. **Quantity**: 20 tablets ("20 tabletten")\n5. **Manufacturer**: Bayer, as indicated by the Bayer logo on the left side of the packaging.\n6. **Usage Instructions**: Complete instructions are in Dutch.\n    - "Wijze van gebruik: De tablet(ten) met een ruime hoeveelheid water na een maaltijd innemen."\n    - This translates to: "Usage instructions: Take the tablets with a generous amount of water after a meal."\n7. **Visual**: The package is mainly white with green accents below the product name. There are two images of round tablets at the bottom right, both labeled "0.5".\n\nThe text is prominent and well-organized, designed to be easy to read and understand.'

In [13]:
prompt = 'Describe the given flowchart as best as you can.'
analyze_document_png(flowchart, prompt)

"The flowchart outlines the steps involved in the recruitment process. Here's a detailed description:\n\n1. **Start and Define Job Description**\n   - The process begins with defining the job description.\n\n2. **Request Personnel to HR**\n   - A request for personnel is submitted to Human Resources (HR).\n\n3. **Classify Job Hiring**\n   - The type of job hiring is classified.\n\n4. **Is it a Regular Process?**\n   - A decision is made on whether it is a regular hiring process or not:\n     - If **NO, Special Hiring Process**\n       - Follow the special hiring process path.\n     - If **YES, Create Job Posting**\n       - Create a job posting for the vacancy.\n\n5. **Review Previous Candidate**\n   - Before proceeding with the job posting, review previous candidates.\n\n6. **Is the Candidate Suitable for the Job?**\n   - Evaluate if the previous candidate is suitable for the job:\n     - If **YES, Setup a Meeting with Candidate**\n       - Set up a meeting with the candidate.\n     -

In [14]:
prompt = 'Describe the given flowchart as best as you can.'
analyze_document_jpg(flowchart2, prompt)

'This flowchart is titled "Flussdiagramm" (Flow Diagram in English) and outlines a sequence of processes based on given input. Here\'s a detailed description of the flowchart:\n\n1. **Start with Input**\n   - The process begins with an input termed "Vorgabe (Input)".\n\n2. **Prozessschritt 1**\n   - The input leads to "Prozessschritt 1" (Process Step 1).\n   - After this step, a document ("Dokument") is generated.\n\n3. **Condition Check**\n   - The next step is to check whether a certain condition is met ("Bedingung erfüllt?").\n   - This decision node has two possible paths:\n     - If the condition is **not met** ("nein"), the process moves to another separate process ("Prozess").\n     - If the condition is **met** ("ja"), the flow proceeds to "Prozessschritt 2" (Process Step 2).\n\n4. **Prozessschritt 2**\n   - After "Prozessschritt 2", another document ("Dokument") is generated.\n\n5. **Prozessschritt 3**\n   - The flow continues to "Prozessschritt 3" (Process Step 3).\n   - Afte

In [15]:
prompt = 'Describe the given IT Architecture as best as possible. Include as many information as possible from the picture.'
analyze_document_png(rag, prompt)

"The image depicts an IT architecture for Retrieval Augmented Generation (RAG) with embeddings. Here’s a detailed breakdown of the architecture:\n\n1. **User Interaction:**\n   - The process starts with a user question, which is represented as an input from a user device (e.g., a smartphone).\n\n2. **Retrieval Phase:**\n   - The user's question is sent to the **Retriever over Knowledge Base**.\n   - The retriever operates by sending a query to the **Large Language Model (Embedding)**, which processes the query and generates a vector representation.\n   - The vector and the original query are then sent to a **Search Index** to search for relevant content.\n   - The search index returns search results based on the vector and query, which are then sent back to the retriever.\n\n3. **Generation Phase:**\n   - The retriever aggregates the search results, combining the retrieved content with the prompt (user question) and acquired knowledge.\n   - This combined information is used to query a

In [16]:
prompt = 'Describe the given picture with as much as details as possible'
analyze_document_jpg(tablet, prompt)

'The image shows a close-up view of two hands. The left hand is holding a transparent glass of water, and the right hand is open, palm up, holding a single yellow capsule-like pill. The background is a plain, light-colored surface, and there are some light reflections or shadows on the background, possibly indicating sunlight or a bright light source from above. The focus is on the hands, the pill, and the glass, highlighting a moment where someone is about to take the pill with water. The clear glass also shows a slight reflection of light. The image is credited to Kanur Ismail, as noted by the watermark from iStock.'