In [1]:
import os
from dotenv import load_dotenv
from llama_index.llms.openai import OpenAI
from llama_index.multi_modal_llms.openai import OpenAIMultiModal

load_dotenv()
        
openai_mm_llm = OpenAIMultiModal(
    model="gpt-4o-mini",
)

In [2]:
import os
from pdf2image import convert_from_path

# Path to the folder containing PDF files
pdf_folder = 'data_store/pdf_data/'
# Path to the folder where images will be saved
image_folder = 'data_store/images_data/'

# Create the 'images_data' folder if it doesn't exist
if not os.path.exists(image_folder):
    os.makedirs(image_folder)

# Loop through all the files in the pdf_folder
for pdf_file in os.listdir(pdf_folder):
    if pdf_file.endswith(".pdf"):
        pdf_path = os.path.join(pdf_folder, pdf_file)
        
        # Convert PDF pages to images
        images = convert_from_path(pdf_path)
        
        # Save each page as an image
        for i, image in enumerate(images):
            image_file_name = os.path.join(image_folder, f"{os.path.splitext(pdf_file)[0]}_page_{i + 1}.jpg")
            image.save(image_file_name, 'JPEG')

print("PDF to image conversion completed.")


PDF to image conversion completed.


In [3]:
from llama_index.core import SimpleDirectoryReader
image_dir='./data_store/images_data'
image_documents = SimpleDirectoryReader(image_dir).load_data()

print(image_documents)

[ImageDocument(id_='7da1e081-3874-4ea1-887f-a80c8eb50a46', embedding=None, metadata={'file_path': 'c:\\Users\\Sagar G R\\BrandBaiston\\dataAnalyst-agent\\dataanalyst_agent\\data_store\\images_data\\chart1_page_1.jpg', 'file_name': 'chart1_page_1.jpg', 'file_type': 'image/jpeg', 'file_size': 8304, 'creation_date': '2024-10-18', 'last_modified_date': '2024-10-20'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n', image=None, image_path='c:\\Users\\Sagar G R\\BrandBaiston\\dataAnalyst-agent\\dataanalyst_agent\\data_store\\images_data\\chart1_page_1.jpg', image_url=None, image_mimetype=None

In [4]:
from llama_index.core.indices import MultiModalVectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import SimpleDirectoryReader, StorageContext
from llama_index.embeddings.nomic import NomicEmbedding

import qdrant_client

# Create a local Qdrant vector store
client = qdrant_client.QdrantClient(path="./qdrant_store")


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
text_store = QdrantVectorStore(
    client=client, collection_name="text_collection"
)
image_store = QdrantVectorStore(
    client=client, collection_name="image_collection"
)

storage_context = StorageContext.from_defaults(
    # vector_store=text_store,
    image_store=image_store
)

# embedding_model = NomicEmbedding(
#     api_key="nk-n7HB3TCeSHPpU6mptDYVhbwhSPaWS_NuWb3DSkNdMdk",
#     model_name="nomic-embed-text-v1.5",
#     vision_model_name="nomic-embed-vision-v1.5",
# )

# Create the MultiModal index
documents = SimpleDirectoryReader(image_folder).load_data()
index = MultiModalVectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    # embed_model=embedding_model,
    # image_embed_model=embedding_model,
)



In [None]:
# # Load text and image documents from local folder
# documents = SimpleDirectoryReader(image_folder).load_data()
# # Create the MultiModal index
# index = MultiModalVectorStoreIndex.from_documents(
#     documents,
#     storage_context=storage_context,
# )

In [1]:
from llama_index.multi_modal_llms.openai import OpenAIMultiModal
from llama_index.core import PromptTemplate
from llama_index.core.query_engine import SimpleMultiModalQueryEngine

query_str = "can you find the example commments that are negative"
# "give me an overview of the points where we're doing worse than in the last reporting period"
# "what is the total nuber of conversation and tell me what percenteage is positive and what percentage is negative"

retriever_engine = index.as_retriever(
    # similarity_top_k=3, 
    image_similarity_top_k=3
)

# # retrieve more information from the GPT4V response
retrieval_results = retriever_engine.retrieve(query_str)

# # if you only need image retrieval without text retrieval
# # you can use `text_to_image_retrieve`
# retrieval_results = retriever_engine.text_to_image_retrieve(query_str)

print(retrieval_results)



qa_tmpl_str = (
    "You are a Data analyst that specializes in reading data-heavy reports and extracting insights regarding the social media activity on a media brand space.\n"
    "Evaluate if the query from the user is analytical in nature. If not, reply to the user politely and re-steer the conversation to be about analytics or follow up on analytics. Do not allow the conversation to diverge from these topics.\n"
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the query.\n"
    "Query: {query_str}\n"
    "Answer: "
)
qa_tmpl = PromptTemplate(qa_tmpl_str)

query_engine = index.as_query_engine(
    llm=openai_mm_llm,similarity_top_k=2, 
    image_similarity_top_k=5
)

response = query_engine.query(query_str)

NameError: name 'index' is not defined

In [9]:
print(str(response))

Here are some examples of negative comments from the provided context:

1. "This platform used to be great, but it’s really gone downhill."
2. "Can’t believe I waited for this, what a disaster."
3. "I didn’t love it, but it wasn’t as bad as people are saying."
4. "This series was a waste of time. So disappointed."
5. "The character development in this show is insane!"
6. "This show had so much potential but totally flopped."
7. "This was just painful to watch. Horrible writing."
8. "I regret watching this. Do better."
9. "The selection is terrible lately, what’s happening?"

These comments express dissatisfaction or disappointment regarding various shows, platforms, or content.


In [10]:
from llama_index.core.tools import QueryEngineTool,ToolMetadata

query_tool = QueryEngineTool(
    query_engine=index.as_query_engine(llm=openai_mm_llm,similarity_top_k=2, image_similarity_top_k=5),
    metadata=ToolMetadata(
        name=f"vector_tool",
        description=(
            "Useful to retrieve relevand data to answer the question"
            # "Useful to lookup any information regarding the image"
        ),
    ),
)

In [16]:
from llama_index.core.agent import ReActAgent
context = """\
"System:You are a Data analyst that specializes in reading data-heavy reports and extracting insights regarding the social media activity on a media brand space.\n"
    "Evaluate if the query from the user is analytical in nature. If not, reply to the user politely and re-steer the conversation to be about analytics or follow up on analytics. Do not allow the conversation to diverge from these topics.\n"
    "Context information is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the query.\n"
    "Query: {query_str}\n"
    "Answer: "
"""
agent = ReActAgent.from_tools(
    [query_tool], 
    verbose=True,
    context=context

)



In [18]:
response = agent.chat(
#    "what are people so mad about that we have so many negative comments? can you list some sample comments"
    "give me an overview of the points where we're doing worse than in the last reporting period."
)

print(response)

> Running step d86a518d-8f16-4ce5-8fe7-74d3e469d121. Step input: give me an overview of the points where we're doing worse than in the last reporting period.
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: vector_tool
Action Input: {'input': "points where we're doing worse than in the last reporting period"}
[0m[1;3;34mObservation: Based on the provided context, here are the points where we're doing worse than in the last reporting period:

1. **Negative Conversations**: There has been an increase in negative conversations, with a total of **28,862** negative comments, which reflects an **81.8%** increase compared to the previous period.

2. **Hidden Comments by Tag**: The number of harmful comments has also increased significantly, particularly in categories such as:
   - **Spam**: Increased by **171%**.
   - **Personal Attacks/Bullying**: Increased by **125.2%**.
   - **Distributing/Molesting**: Incr

In [19]:

from llama_index.core.memory import (
    VectorMemory,
    SimpleComposableMemory,
    ChatMemoryBuffer,
)
from llama_index.core.llms import ChatMessage
from llama_index.embeddings.openai import OpenAIEmbedding

vector_memory = VectorMemory.from_defaults(
    vector_store=None,  # leave as None to use default in-memory vector store
    embed_model=OpenAIEmbedding(),
    retriever_kwargs={"similarity_top_k": 2},
)

chat_memory_buffer = ChatMemoryBuffer.from_defaults()

composable_memory = SimpleComposableMemory.from_defaults(
    primary_memory=chat_memory_buffer,
    secondary_memory_sources=[vector_memory],
)

In [21]:

from llama_index.llms.openai import OpenAI
from llama_index.core.tools import FunctionTool
from llama_index.core.agent import FunctionCallingAgent

agent = ReActAgent.from_tools(
    [query_tool], 
    verbose=True,
    context=context,
    memory=composable_memory)



In [26]:
response = agent.chat("hi whats my name")

> Running step a63a8d50-f013-46ff-9e62-d549a8245671. Step input: hi whats my name
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: vector_tool
Action Input: {'input': "user's name"}
[0m[1;3;34mObservation: I'm unable to determine the user's name based on the provided context information.
[0m> Running step 5b9422a9-6ab1-474e-850d-348c8e5a5cc8. Step input: None
[1;3;38;5;200mThought: I cannot answer the question with the provided tools.
Answer: I cannot determine your name based on the information provided.
[0m