In [1]:
#!pip install python-dotenv
#!pip install openai
#!pip install imageio
#!pip install azure-ai-formrecognizer
#!pip install azure-core

In [2]:
from openai import AzureOpenAI
import dotenv
import os
import base64
from azure.core.credentials import AzureKeyCredential

dotenv.load_dotenv(override=True)

True

In [3]:
client = AzureOpenAI(
    azure_endpoint=os.getenv('AzureOpenAiEndpoint'),
    api_key=os.getenv('AzureOpenAiKey'),
    azure_deployment='gpt-4o',
    api_version='2024-02-15-preview'
)

In [4]:
with open("graph1.png", "rb") as image_file:
    graph1 = base64.b64encode(image_file.read()).decode('utf-8')
with open("graph2.png", "rb") as image_file:
    graph2 = base64.b64encode(image_file.read()).decode('utf-8')
with open("graph3.png", "rb") as image_file:
    graph3 = base64.b64encode(image_file.read()).decode('utf-8')


In [5]:
def analyze_document_png(encoded_image,prompt):
    res = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/png;base64,{encoded_image}"}
                    },
                ],
            }
        ],
        max_tokens=300,
    )
    return res.choices[0].message.content

def analyze_document_jpg(encoded_image,prompt):
    res = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpg;base64,{encoded_image}"}
                    },
                ],
            }
        ],
        max_tokens=300,
    )
    return res.choices[0].message.content

In [6]:
prompt = 'Describe the given graph with as much as details as possible'
analyze_document_png(graph1, prompt)

'The graph presents data on the total number of prescriptions and the total number of patients each year from 2013 to 2021.\n\n1. **Total Prescriptions (blue line)**:\n   - **2013**: Approximately 19,000,000 prescriptions.\n   - **2014**: Slight increase to about 20,000,000 prescriptions.\n   - **2015**: Peak at approximately 22,000,000 prescriptions, the highest point on the graph.\n   - **2016**: Slight decrease to around 21,000,000 prescriptions.\n   - **2017**: Noticeable drop to around 19,000,000 prescriptions.\n   - **2018**: Marginal recovery to about 20,000,000 prescriptions.\n   - **2019**: Small drop again to about 19,500,000 prescriptions.\n   - **2020**: Stability continues around 19,000,000 prescriptions.\n   - **2021**: Slight increase back up to approximately 20,000,000 prescriptions.\n\n2. **Total Patients (red line)**:\n   - Throughout the period 2013 to 2021, the total number of patients remains relatively constant, fluctuating mildly around 10,000,000 patients. The t

In [7]:
prompt = 'Describe the given graphs with as much as details as possible'
system_message = "Average monthly rainfall for the period 2000-2018 (bar chart in blue, Tenkodogo station), air temperatures for the period 2000-2018 (red line with dots, Fada N'Gourma station) and satellite-derived surface water temperatures for the period 2000-2018 (blue line with crosses). Annual rainfall from 2000 to 2018 is shown in the inset (no data are available for 2001, 2003-2005 and 2010). https://doi.org/10.1371/journal.pntd.0009634.g002"
analyze_document_png(graph2, prompt+system_message)

"The graph provided depicts three key metrics over the period 2000-2018: average monthly rainfall (measured at Tenkodogo station), air temperatures (measured at Fada N'Gourma station), and satellite-derived surface water temperatures. It also includes an inset showing annual rainfall from 2000 to 2018, with some years missing data.\n\n### Main Graph:\n- **Average Monthly Rainfall (2000-2018)**:\n  - Represented by blue bars.\n  - Rainfall is lowest in January, February, and December (near zero).\n  - Rainfall begins to increase in May, peaks in August at approximately 250 mm, and then decreases rapidly through September to November.\n  - Significant rainfall is observed from June to September, indicating these months as the rainy season.\n\n- **Air Temperatures (2000-2018)**:\n  - Represented by a red line with dots.\n  - Air temperatures show seasonal variation:\n    - Peaks in April around 30.5°C.\n    - Drop significantly to the lowest point of around 26.5°C in August, during the pe

In [8]:
prompt = 'Describe the given graph with as much as details as possible'
analyze_document_png(graph3, prompt)

"The graph provides detailed information on PM₂.₅ concentrations across India in the year 2000. PM₂.₅ refers to particulate matter less than 2.5 micrometers in diameter, which is a critical indicator of air pollution.\n\n### Map Details\nThe map of India illustrates the concentration of PM₂.₅ across various regions with the following color coding:\n- Dark green (0 to 5 µg/m³)\n- Light green (5 to 20 µg/m³)\n- Yellow-green (20 to 30 µg/m³)\n- Yellow (30 to 40 µg/m³)\n- Light orange (40 to 50 µg/m³)\n- Darker orange (50 to 60 µg/m³)\n- Light red (60 to 70 µg/m³)\n- Darker red (70 to 80 µg/m³)\n- Dark red (80 to 90 µg/m³)\n- Darkest red (90 to 100 µg/m³)\n\n### Population Exposure\nThe bar chart on the top right shows the percentage of India's population exposed to different pollution levels:\n- 25.8% of the population is exposed to PM₂.₅ concentrations between 20 to 30 µg/m³.\n- 19.7% of the population is exposed to concentrations between 30 to 40 µg/m³.\n- 14.7% of the population is exp

In [9]:
prompt = 'Describe the landscape in the given image and highlight the areas with huge polutions'
analyze_document_png(graph3, prompt)

'The image presents a map of India that conveys air quality information, specifically the reanalyzed PM₂.₅ (particulate matter) concentrations for the year 2000. Below is a detailed description of the landscape and pollution levels:\n\n### Landscape Description\n- The map covers the entire country of India, marked with state boundaries.\n- The Arabian Sea is labeled to the southwest, and the Bay of Bengal is to the southeast.\n- The map spans from latitudes 10°N to 35°N and longitudes 70°E to 95°E.\n\n### Areas with High Pollution\nThe map uses color-coding to depict PM₂.₅ concentration levels in micrograms per cubic meter (µg/m³). The darkest red areas indicate the highest pollution levels:\n\n1. **Red Zone (80-100 µg/m³)**\n   - **Highlighted Area:** Northern India, especially the region around Delhi and parts of Uttar Pradesh and Bihar.\n   - **Pollution Level:** Extremely high PM₂.₅ concentration indicating severe pollution.\n\n2. **Orange Zone (60-80 µg/m³)**\n   - **Highlighted A