In [None]:
!pip install langchain openai langchain-experimental langchain-openai pillow open_clip_torch torch matplotlib --quiet

In [None]:
!pip install transformers --quiet

In [None]:
from langchain.vectorstores import SingleStoreDB
import os

from langchain_experimental.open_clip import OpenCLIPEmbeddings

os.environ["SINGLESTOREDB_URL"] = f'{connection_user}:{connection_password}@{connection_host}:{connection_port}/{connection_default_database}'

In [None]:
from langchain_openai import ChatOpenAI

os.environ["OPENAI_API_KEY"]='api_key_here'

In [None]:
vectorstore=SingleStoreDB(embedding=OpenCLIPEmbeddings())

In [None]:
import requests

url = "https://www.image.123566685.jpg"
response = requests.get(url)

if response.status_code == 200:
    with open("downloaded_image.jpg", "wb") as f:
        f.write(response.content)
    print("Download successful. Image saved as 'downloaded_image.jpg'")
else:
    print(f"Failed to download the image. Status code: {response.status_code}")

Download successful. Image saved as 'downloaded_image.jpg'


In [None]:
def plt_img_local(image_path):
    """Display image from a local file path"""
    # Display the image using IPython.display.Image
    display(Image(filename=image_path))

plt_img_local('downloaded_image.jpg')

In [None]:
# Get image URIs with .jpg extension only
image_uris = sorted(
    [
        os.path.join(path, image_name)
        for image_name in os.listdir(path)
        if image_name.endswith(".jpg")
    ]
)

In [None]:
vectorstore.add_images(uris=image_uris)

[]

In [None]:
query = "Show me an image with a woman and two children"
docs = vectorstore.similarity_search(query)
print(docs[0].page_content)

/home/jovyan/downloaded_image.jpg


In [None]:
import io
import re

from IPython.display import display, Image
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage

import base64

def encode_image_base64(image_path):
    """Getting the base64 string"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


def get_image_urls(docs):
    """
    Split base64-encoded images
    """
    b64_images = []
    for doc in docs:
        b64 = encode_image_base64(doc.page_content)
        b64_images.append(b64)

    return {'images': b64_images}


def img_prompt_func(data_dict):
    """
    Join the context into a single string
    """
    messages = []

    # Adding image(s) to the messages if present
    if data_dict["context"]["images"]:
        for img_base64 in data_dict["context"]["images"]:
            image_message = {
                "type": "image_url",
                "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
            }
            messages.append(image_message)

    # Adding the text for analysis
    text_message = {
        "type": "text",
        "text": (
            f"User-provided question: {data_dict['question']}\n\n"
        ),
    }
    messages.append(text_message)
    return [HumanMessage(content=messages)]


def multi_modal_rag_chain(retriever):
    """
    Multi-modal RAG chain
    """

    # Multi-modal LLM
    model = ChatOpenAI(temperature=0, model="gpt-4-vision-preview", max_tokens=1024)

    # RAG pipeline
    chain = (
        {
            "context": retriever | RunnableLambda(get_image_urls),
            "question": RunnablePassthrough(),
        }
        | RunnableLambda(img_prompt_func)
        | model
        | StrOutputParser()
    )

    return chain


# Create RAG chain

retriever = vectorstore.as_retriever()

chain_multimodal_rag = multi_modal_rag_chain(retriever)

In [None]:
query="Show me portrait pictures and describe them as a an expert photographer."
docs = retriever.get_relevant_documents(query=query)