In [None]:
# ! pip install setuptools==69.5.1 numpy==1.21.3 torch torchvision ftfy faiss-cpu==1.7.4 openai-clip langchain langchain-community langchain-experimental langchain-openai open_clip_torch 'arize-phoenix[evals]'

In [1]:
from langchain_core.documents import Document
import glob
paths = glob.glob('../images/*.jpeg', recursive=True)
from langchain_community.vectorstores import FAISS

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [3]:
import phoenix as px
session = px.launch_app()

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [4]:
from phoenix.trace.langchain import LangChainInstrumentor

LangChainInstrumentor().instrument()

In [5]:
from langchain_experimental.open_clip import OpenCLIPEmbeddings
import base64

In [6]:
lc_docs = []
def encode_image(path):
    with open(path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

for path in paths:
    doc = Document(
        page_content=encode_image(path),
        lookup_str = '',
        metadata ={
            'source': path
        },
        lookup_index=0
    )
    lc_docs.append(doc)

In [7]:
vector_store = FAISS.from_documents(lc_docs, embedding=OpenCLIPEmbeddings())

In [8]:
retriever = vector_store.as_retriever()

In [9]:
import base64
import io
from io import BytesIO

import numpy as np
from PIL import Image

def resize_base64_image(base64_string, size=(128, 128)):
    """
    Resize an image encoded as a Base64 string.

    Args:
    base64_string (str): Base64 string of the original image.
    size (tuple): Desired size of the image as (width, height).

    Returns:
    str: Base64 string of the resized image.
    """
    # Decode the Base64 string
    img_data = base64.b64decode(base64_string)
    img = Image.open(io.BytesIO(img_data))

    # Resize the image
    resized_img = img.resize(size, Image.LANCZOS)

    # Save the resized image to a bytes buffer
    buffered = io.BytesIO()
    resized_img.save(buffered, format=img.format)

    # Encode the resized image to Base64
    return base64.b64encode(buffered.getvalue()).decode("utf-8")


def is_base64(s):
    """Check if a string is Base64 encoded"""
    try:
        return base64.b64encode(base64.b64decode(s)) == s.encode()
    except Exception:
        return False


def split_image_text_types(docs):
    """Split numpy array images and texts"""
    images = []
    text = []
    for doc in docs:
        doc = doc.page_content  # Extract Document contents
        if is_base64(doc):
            # Resize image to avoid OAI server error
            images.append(
                resize_base64_image(doc, size=(250, 250))
            )  # base64 encoded str
        else:
            text.append(doc)
    return {"images": images, "texts": text}

In [10]:
from operator import itemgetter

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI


def prompt_func(data_dict):
    # Joining the context texts into a single string
    formatted_texts = "\n".join(data_dict["context"]["texts"])
    messages = []

    # Adding image(s) to the messages if present
    if data_dict["context"]["images"]:
        image_message = {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{data_dict['context']['images'][0]}"
            },
        }
        messages.append(image_message)

    # Adding the text message for analysis
    text_message = {
        "type": "text",
        "text": (
            "As an animal lover, your task is to analyze and interpret images of cute animals, "
            "Please use your extensive knowledge and analytical skills to provide a "
            "summary that includes:\n"
            "- A detailed description of the visual elements in the image.\n"
            f"User-provided keywords: {data_dict['question']}\n\n"
            "Text and / or tables:\n"
            f"{formatted_texts}"
        ),
    }
    messages.append(text_message)

    return [HumanMessage(content=messages)]


foundation = ChatOpenAI(temperature=0, model="gpt-4o-mini", max_tokens=1024)

# RAG pipeline
chain = (
    {
        "context": retriever | RunnableLambda(split_image_text_types),
        "question": RunnablePassthrough(),
    }
    | RunnableLambda(prompt_func)
    | foundation
    | StrOutputParser()
)

In [11]:
chain.invoke("german shepard")

ERROR [openinference.instrumentation.langchain._tracer] Failed to get attribute.
Traceback (most recent call last):
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 274, in wrapper
    yield from wrapped(*args, **kwargs)
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 426, in _parse_message_data
    assert isinstance(content, str), f"expected str, found {type(content)}"
AssertionError: expected str, found <class 'list'>


"The image features a German Shepherd, a breed known for its intelligence and versatility. Here’s a detailed description of the visual elements:\n\n### Description of the German Shepherd:\n- **Coloration**: The dog has a classic coat pattern, predominantly black and tan. The back is mostly black, while the sides and legs exhibit a rich tan color.\n- **Build**: The German Shepherd has a strong, athletic build with a well-defined musculature. Its body is slightly longer than it is tall, giving it a balanced appearance.\n- **Head**: The dog has a broad head with a pronounced muzzle. Its ears are erect and pointed, typical of the breed, contributing to its alert expression.\n- **Eyes**: The eyes are dark and expressive, conveying intelligence and curiosity.\n- **Tail**: The tail is bushy and hangs down, with a slight curve at the end, which is characteristic of the breed.\n- **Posture**: The dog stands confidently on a lush green lawn, with its body slightly turned to the side, showcasing 

In [12]:
docs = retriever.invoke("german shepard", k=3)

for doc in docs:
    print(doc.metadata)

{'source': '../images/dog_1.jpeg'}
{'source': '../images/cat_5.jpeg'}
{'source': '../images/cat_4.jpeg'}
{'source': '../images/cat_3.jpeg'}


In [13]:
chain.invoke("cat laying down on white background")

ERROR [openinference.instrumentation.langchain._tracer] Failed to get attribute.
Traceback (most recent call last):
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 274, in wrapper
    yield from wrapped(*args, **kwargs)
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 426, in _parse_message_data
    assert isinstance(content, str), f"expected str, found {type(content)}"
AssertionError: expected str, found <class 'list'>


"The image features a cat lying down on a soft, light-colored surface, likely a carpet. Here’s a detailed description of the visual elements:\n\n- **Subject**: The main focus is a domestic cat, characterized by its striking green eyes and a mix of gray and brown fur with distinct tabby stripes.\n- **Positioning**: The cat is sprawled out comfortably, with its body elongated and one paw extended forward, suggesting a relaxed and content demeanor.\n- **Background**: The background is neutral, enhancing the cat's features without distraction. The light color of the surface contrasts nicely with the cat's fur.\n- **Lighting**: The lighting appears soft and natural, highlighting the cat's fur texture and the vividness of its eyes.\n- **Expression**: The cat's expression seems calm and curious, with its ears perked up slightly, indicating alertness while still being at ease.\n\nOverall, the image captures a serene moment, showcasing the cat's beauty and relaxed nature."

In [14]:
docs = retriever.invoke("cat laying down on white background", k=3)

for doc in docs:
    print(doc.metadata)

{'source': '../images/cat_4.jpeg'}
{'source': '../images/dog_1.jpeg'}
{'source': '../images/cat_5.jpeg'}
{'source': '../images/cat_3.jpeg'}


In [15]:
docs = retriever.invoke("cat showing teeth with open mouth", k=3)

for doc in docs:
    print(doc.metadata)

{'source': '../images/cat_4.jpeg'}
{'source': '../images/dog_1.jpeg'}
{'source': '../images/cat_5.jpeg'}
{'source': '../images/dog_4.jpeg'}


In [16]:
chain.invoke("cat showing teeth with open mouth")

ERROR [openinference.instrumentation.langchain._tracer] Failed to get attribute.
Traceback (most recent call last):
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 274, in wrapper
    yield from wrapped(*args, **kwargs)
  File "/Users/yujian/Documents/workspace/rag_cookbooks/p310/lib/python3.10/site-packages/openinference/instrumentation/langchain/_tracer.py", line 426, in _parse_message_data
    assert isinstance(content, str), f"expected str, found {type(content)}"
AssertionError: expected str, found <class 'list'>


"The image features a domestic cat lying on a soft, light-colored carpet. Here are the visual elements observed:\n\n- **Cat's Appearance**: The cat has a tabby coat with a mix of gray and brown stripes. Its fur appears soft and well-groomed.\n- **Eyes**: The cat has striking green eyes that stand out against its fur, giving it an alert and curious expression.\n- **Mouth**: The cat's mouth is slightly open, revealing its teeth, which adds a playful or inquisitive vibe to its demeanor.\n- **Pose**: The cat is stretched out comfortably, with one paw extended, suggesting relaxation and contentment.\n- **Background**: The background is softly blurred, focusing attention on the cat. There is a hint of a blue toy in the background, indicating a playful environment.\n\nOverall, the image captures a moment of tranquility and playfulness, showcasing the cat's personality and charm."