In [1]:
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_experimental.open_clip import OpenCLIPEmbeddings
import glob
import base64

paths = glob.glob('../images/*.jpeg', recursive=True)

In [2]:
lc_docs = []
def encode_image(path):
    with open(path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

for path in paths:
    doc = Document(
        page_content=encode_image(path),
        metadata ={
            'source': path
        }
    )

    lc_docs.append(doc)

In [3]:
vector_store = FAISS.from_documents(lc_docs, embedding=OpenCLIPEmbeddings())

In [4]:
retriever = vector_store.as_retriever()

In [6]:
docs = retriever.invoke(encode_image("../images/cat_1.jpeg"), k=4) # cat 1

for doc in docs:
    print(doc.metadata)

{'source': '../images/cat_1.jpeg'}
{'source': '../images/cat_2.jpeg'}
{'source': '../images/dog_2.jpeg'}
{'source': '../images/dog_5.jpeg'}
