# Image to Text with LCEL
### (with GPT-4o and maybe others)

Inspired by: https://tykimos.github.io/2024/05/15/image_descriptions_with_gpt_4o_and_lcel/

In [None]:
import base64
from pathlib import Path

from dotenv import load_dotenv
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.messages.base import BaseMessage
from langchain_core.output_parsers import StrOutputParser

from python.ai_core.llm import get_llm

load_dotenv(verbose=True)

!export PYTHONPATH=":./python"

###  Chain to query an image 

In [None]:
def gen_prompt(param_dict: dict) -> list[BaseMessage]:
    # Function to generate a prompt based on given parameters
    system_message = (
        "You are a helpful assistant that kindly explains images and answers questions provided by the user."
    )
    human_messages = [
        {
            "type": "text",
            "text": f"{param_dict['question']}",
        },
        {
            "type": "image_url",
            "image_url": {
                "url": f"{param_dict['image_url']}",
            },
        },
    ]
    return [SystemMessage(content=system_message), HumanMessage(content=human_messages)]


llm = get_llm(llm_id="gpt_4o_openai")
# Does not work;
# llm = get_llm(llm_id="gpt_4o_edenai")
# llm = get_llm(llm_id="gpt_4_azure")
llm = get_llm(llm_id="llava_phi3_ollama")
chain = gen_prompt | llm | StrOutputParser()

### Embed the image in the message

In [None]:
# IMAGE_PATH = "use_case_data/railway/network rail.png"


REPO = Path("/mnt/c/Users/a184094/OneDrive - Eviden/_ongoing/training GenAI/")
IMAGE_PATH = REPO / "network rail.png"


def encode_image(image_path: Path) -> str:
    # Open the image file and encode it as a base64 string
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


base64_image = encode_image(IMAGE_PATH)

In [None]:
response = chain.invoke(
    {
        "question": "Please describe this junction.",
        "image_url": f"data:image/jpeg;base64,{base64_image}",
    }
)
print(response)

### New Langchain API ?  
Seems to work for  Ollama only

In [None]:
llm_with_image_context = llm.bind(images=[base64_image])  # image_b64 is your base64 encoded image
response = llm_with_image_context.invoke("Please describe this rail junction.")
print(response)

** EXPERIMENT

In [None]:
import os

import requests

headers = {"Authorization": f"Bearer {os.environ['EDENAI_API_KEY']}"}
url = "https://api.edenai.run/v2/multimodal/chat"


# Function to read the image file and convert it to base64
with open(IMAGE_PATH, "rb") as image_file:
    base64_image = base64.b64encode(image_file.read()).decode("utf-8")
payload = {
    "providers": "openai, google",
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "content": {"text": "Describe this image please!"},
                },
                {
                    "type": "media_base64",
                    "content": {
                        "media_base64": base64_image,
                        "media_type": "image/png",
                    },
                },
            ],
        }
    ],
    "chatbot_global_action": "",
}

response = requests.post(url, json=payload, headers=headers)
result = response.json()
print(result["openai"]["generated_text"])

In [None]:
print(result["google"]["generated_text"])