In [1]:
! pip install trulens_eval llama_index ftfy regex tqdm git+https://github.com/openai/CLIP.git torch torchvision matplotlib scikit-image

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-e75l1479
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-e75l1479
  Resolved https://github.com/openai/CLIP.git to commit a1d071733d7111c9c014f024669f959182114e33
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [2]:
!pip install matplotlib scikit-image -q


In [3]:
!pip install pyarrow>=12.0.0

In [4]:
!pip install milvus pymilvus -q  # for Milvus


In [5]:
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd

In [6]:
!pip install pypdf



In [7]:
!pip install replicate



In [8]:
!pip install transformers



In [9]:
!pip install accelerate



In [10]:
!pip install --upgrade pyarrow




Set keys

In [11]:
from google.colab import userdata


In [12]:
import os


In [13]:
os.environ["REPLICATE_API_TOKEN"] = userdata.get('REPLICATE_API_TOKEN')

Initializing model and Initializing Clients

In [14]:
from llama_index import Document, SimpleDirectoryReader
from llama_index.indices.multi_modal.base import MultiModalVectorStoreIndex
from llama_index.multi_modal_llms.replicate_multi_modal import ReplicateMultiModal
from llama_index.prompts import PromptTemplate
import matplotlib.pyplot as plt
import pandas as pd
import json

# Initialize Fuyu-8B MultiModal LLM
fuyu_mm_llm = ReplicateMultiModal(
    model="lucataco/fuyu-8b:42f23bc876570a46f5a90737086fbc4c3f79dd11753a28eaa39544dd391815e9",
    max_new_tokens=500,
    temperature=0.1,
    num_input_files=1,
    top_p=0.9,
    num_beams=1,
    repetition_penalty=1,
)




In [15]:
CLUSTER_ENDPOINT=userdata.get('CLUSTER_ENDPOINT')
TOKEN=userdata.get('TOKEN')


In [16]:
from pymilvus import connections
connections.connect(
   alias = 'default',
   uri=CLUSTER_ENDPOINT,
   token=TOKEN)

Loading the data


In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
from llama_index import SimpleDirectoryReader, Document

# context images
image_path = "/content/drive/MyDrive/NDVI/NDVI_imgs"
image_with_text_documents = SimpleDirectoryReader(image_path).load_data()
image_documents = SimpleDirectoryReader(image_path).load_data()

# context text
text_path = "/content/drive/MyDrive/NDVI/NDVI-analysis"
text_documents = SimpleDirectoryReader(text_path).load_data()

Adding fuyu-8b descriptions

In [19]:
load_previously_generated_text_descriptions = False

In [20]:
import torch

# Check if GPU is available and set as default device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [21]:

from llama_index.schema import ImageDocument
import tqdm
import json
from llama_index import SimpleDirectoryReader, Document

# Assuming image_with_text_documents is already loaded using SimpleDirectoryReader
# Now let's add the text descriptions manually

# Define the path to save the descriptions
description_save_path = "/content/drive/MyDrive/NDVI/color_ndvi/ndvi_image_descriptions.json"

# Check if descriptions file exists and load it
if os.path.exists(description_save_path):
    with open(description_save_path, "r") as f:
        image_descriptions = json.load(f)
        # If descriptions exist, update image_with_text_documents with loaded descriptions
        for img_doc in image_with_text_documents:
            filename = os.path.basename(img_doc.image_path)
            if filename in image_descriptions:
                img_doc.text = image_descriptions[filename]

# Manually add descriptions to each image document
for img_doc in tqdm.tqdm(image_with_text_documents):
    # Skip if the document already has a description
    if getattr(img_doc, 'text', None):
        continue

    # Prompt for manual text description input
    print(f"Enter description for {os.path.basename(img_doc.image_path)}:")
    description = input()  # Manually input the description
    img_doc.text = description  # Assign the inputted description to the ImageDocument

# Save the updated descriptions back to JSON
# Creating a dictionary with filename as key and description as value
desc_dict = {os.path.basename(doc.image_path): doc.text for doc in image_with_text_documents}
with open(description_save_path, "w") as f:
    json.dump(desc_dict, f)

100%|██████████| 22/22 [00:00<00:00, 179174.15it/s]


Making nodes

In [22]:
from llama_index.node_parser import SentenceSplitter

node_parser = SentenceSplitter.from_defaults()

image_nodes = node_parser.get_nodes_from_documents(image_documents)
image_with_text_nodes = node_parser.get_nodes_from_documents(image_with_text_documents)
text_nodes = node_parser.get_nodes_from_documents(text_documents)

In [23]:
print(image_with_text_nodes)

[ImageNode(id_='bd18ae63-f5b2-4eb5-b741-d662c5342ad9', embedding=None, metadata={'file_path': '/content/drive/MyDrive/NDVI/NDVI_imgs/NDVI.jpg', 'file_name': 'NDVI.jpg', 'file_type': 'image/jpeg', 'file_size': 11106, 'creation_date': '2023-12-10', 'last_modified_date': '2023-12-10', 'last_accessed_date': '2023-12-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='44e84ffb-586b-473b-8068-c744f655fff3', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/content/drive/MyDrive/NDVI/NDVI_imgs/NDVI.jpg', 'file_name': 'NDVI.jpg', 'file_type': 'image/jpeg', 'file_size': 11106, 'creation_date': '2023-12-10', 'last_modified_date': '2023-12-10', 'last_accessed_date': '2023-12-10'}, hash='fb3bd05e091adc49

Build Multi-modal index and vector store to index both text and images

In [24]:
from llama_index.vector_stores import MilvusVectorStore
from llama_index import SimpleDirectoryReader, StorageContext

image_store = MilvusVectorStore(uri = CLUSTER_ENDPOINT, token = TOKEN, collection_name = "image_store", dim=512, overwrite=True)

text_store = MilvusVectorStore(uri = CLUSTER_ENDPOINT, token = TOKEN, collection_name = "text_store", dim=384, overwrite=True)

storage_context = StorageContext.from_defaults(vector_store=text_store,image_store=image_store)


DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: 0bb47d1b932145ae93bf9cb3073d53d0
DEBUG:pymilvus.milvus_client.milvus_client:Successfully created collection: image_store
DEBUG:pymilvus.milvus_client.milvus_client:Successfully created an index on collection: image_store
DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: 1b2e877e292a4416bca2792fcd6943ef
DEBUG:pymilvus.milvus_client.milvus_client:Successfully created collection: text_store
DEBUG:pymilvus.milvus_client.milvus_client:Successfully created an index on collection: text_store


Make Multimodal Vector Store Index

In [25]:
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index import ServiceContext
# loads BAAI/bge-small-en
# embed_model = HuggingFaceEmbedding()
from llama_index.llms import Replicate

llm = Replicate(
    model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5"
)
# loads BAAI/bge-small-en-v1.5
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model,)


In [26]:
# Create the MultiModal index
from llama_index.indices.multi_modal.base import MultiModalVectorStoreIndex
ndvi_storage_context = StorageContext.from_defaults(vector_store=text_store)

img_index = MultiModalVectorStoreIndex(nodes=image_with_text_nodes + text_nodes, is_image_to_text=True, storage_context=ndvi_storage_context, service_context=service_context, show_progress=True)


Generating embeddings:   0%|          | 0/291 [00:00<?, ?it/s]

Generating image embeddings:   0%|          | 0/22 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/22 [00:00<?, ?it/s]

Test this by commenting out previous index and uncommenting this next section then restarting session - due to limitations in zilliz

In [27]:
#ndvi_storage_context = StorageContext.from_defaults(vector_store=text_store)
#MultiModalVectorStoreIndex(image_nodes + text_nodes, storage_context = storage_context,service_context = service_context, show_progress=True, image_vector_store=image_store,  image_embed_model = "clip")

#ndvi_index = MultiModalVectorStoreIndex(nodes=image_with_text_nodes + text_nodes, is_image_to_text=True, storage_context=ndvi_storage_context, service_context=service_context, show_progress=True)

Retrieve


In [28]:
def plot_images(image_paths):
    images_shown = 0
    plt.figure(figsize=(16, 12))
    for img_path in image_paths:
        if os.path.isfile(img_path):
            image = Image.open(img_path)

            plt.subplot(5, 5, images_shown + 1)
            plt.imshow(image)
            plt.xticks([])
            plt.yticks([])

            images_shown += 1
            if images_shown >= 25:
                break


In [29]:
from llama_index.prompts import PromptTemplate
from llama_index.multi_modal_llms.replicate_multi_modal import ReplicateMultiModal

from llama_index.query_engine import SimpleMultiModalQueryEngine
from llama_index import QueryBundle
# Initialize Milvus vector stores for text and images
image_store = MilvusVectorStore(uri=CLUSTER_ENDPOINT, token=TOKEN, collection_name="image_store")
text_store = MilvusVectorStore(uri=CLUSTER_ENDPOINT, token=TOKEN, collection_name="text_store")

# Initialize storage context
storage_context = StorageContext.from_defaults(vector_store=text_store, image_store=image_store)

# Initialize LLM and embedding models
# Initialize Fuyu-8B MultiModal LLM
fuyu_mm_llm = ReplicateMultiModal(
    model="yorickvp/llava-13b:2facb4a474a0462c15041b78b1ad70952ea46b5ec6ad29583c0b29dbd4249591",
    max_new_tokens=500,
    temperature=0.1,
    num_input_files=1,
    top_p=0.9,
    num_beams=1,
    repetition_penalty=1,
)

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
service_context = ServiceContext.from_defaults(llm=llm,embed_model=embed_model)

# Initialize the MultiModal index
img_vector_index = MultiModalVectorStoreIndex.from_vector_store(
    vector_store=text_store,
    service_context=service_context,
    image_vector_store=image_store,
    image_embed_model="clip"
)

# Define the prompt template for NDVI map analysis
qa_tmpl_str = (
    "Given an NDVI map, use the details in the image to answer the user's query.\n"
    "Query: {query_str}\n"
    "Response: "
)
qa_tmpl = PromptTemplate(qa_tmpl_str)
input_image = "/content/drive/MyDrive/NDVI/NDVI_imgs/Converted_NDVI.jpg"
plot_images([input_image])
# instantiate a retriever
retriever_engine = img_vector_index.as_retriever(
    similarity_top_k=3, image_similarity_top_k=3
)

# get images semantically similar to our own
retrieval_results = retriever_engine.retrieve("How do I analyze the following map: there are dense vegetation but most of it is sparse")

from llama_index.response.notebook_utils import display_source_node
from llama_index.schema import ImageNode

retrieved_image = []
for res_node in retrieval_results:
    if isinstance(res_node.node, ImageNode):
        retrieved_image.append(res_node.node.metadata["file_path"])
    else:
        display_source_node(res_node, source_length=200)

plot_images(retrieved_image)


DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: 430d42a419c04ae0b83f2539965cf3d5
DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: 7ef669070b8b4f128b70eaee67bab016


**Node ID:** 920164ce-600d-4540-8059-7b424bd67d57<br>**Similarity:** 0.784331202507019<br>**Text:** NDVI map shows a variation in greenness. The majority of the area is covered in a light green shade, suggesting that the vegetation is generally sparse or not very dense. This could be characterist...<br>

**Node ID:** aeda6731-82f9-4207-b03f-d43381149ac0<br>**Similarity:** 0.7670001983642578<br>**Text:** the presence of dark green along the edges suggests dense vegetation, possibly indicating the boundaries of a forested area or a well-established field. The majority of the area, however, is covere...<br>

**Node ID:** caaf387b-5870-479a-aee4-010321d7a864<br>**Similarity:** 0.7513655424118042<br>**Text:** In the NDVI map, there are areas of light to medium green interspersed with areas of dark green, which indicates a mix of vegetation density. The darker green areas are indicative of dense, healthy...<br>

<Figure size 1600x1200 with 0 Axes>

<Figure size 1600x1200 with 0 Axes>