# Multi Modal RAG Demo
This notebook is to run each step within the Multi-Modal RAG workflow in isolation primarily for debugging and development purposed.

## Setup

In [None]:
import os
import importlib
from IPython.display import Markdown, display
import ipywidgets as widgets
import logging

from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.multi_modal_llms.azure_openai import AzureOpenAIMultiModal
from llama_index.core.vector_stores.types import VectorStoreQueryMode
from llama_parse import LlamaParse

from dotenv import load_dotenv
load_dotenv(override=True)

import sys
sys.path.append('..') 

import src.tools as tools
importlib.reload(tools)
from src.tools import get_index_docs_summary, MultimodalQueryEngine, display_query_and_multimodal_response, download_sharepoint_files

logging.basicConfig(
    level=logging.ERROR,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler()
    ]
)

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

In [3]:
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME")
similarity_top_k = 3

In [4]:
llm = AzureOpenAI(
    model=AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
    deployment_name=AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
    api_key=AZURE_OPENAI_API_KEY,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version="2024-08-01-preview"
)

azure_openai_mm_llm = AzureOpenAIMultiModal(
    engine=AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
    api_version="2024-08-01-preview",
    model=AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
    max_new_tokens=4096,
    api_key=AZURE_OPENAI_API_KEY,
    api_base=AZURE_OPENAI_ENDPOINT,
)

## Indexing Pipeline

In [None]:
# Get all files in the LOCAL_BASE_DIR
document_url_dict = download_sharepoint_files()

In [None]:
document_url_dict

In [None]:
# 2. Build or load indexes and summaries
indexes, document_summary_dict = get_index_docs_summary()

In [None]:
indexes

In [None]:
document_summary_dict

## Execution

In [None]:
# 1. Input your query
query_widget = widgets.Textarea(
    value="",
    placeholder="Type your query here...",
    description="Query:",
    layout=widgets.Layout(width='100%', height='100px')
)

# Display the widget
display(query_widget)

In [None]:
# 2. Processing the query
query = "in terms of scaling AI/ML what percentage of executives disagree with this statement?"
if query_widget.value:
    query = query_widget.value

# Create the prompt
prompt = (
    f"Given the query '{query}', determine the most appropriate index (key) in the dictionary "
    f"based on the description (value) that corresponds best to the query. "
    f"Return only the index (key) as the output, nothing else. "
    f"Options: {', '.join([f'Index: {key}, Description: {value}' for key, value in document_summary_dict.items()])}"
    )

# Call the LLM endpoint with the prompt
selected_index = llm.complete(prompt).text.strip()
print(selected_index)

index = indexes[selected_index]

# Create and initialize the query engine
query_engine = MultimodalQueryEngine(
    retriever=index.as_retriever(
        vector_store_query_mode=VectorStoreQueryMode.DEFAULT, similarity_top_k=similarity_top_k 
    ),
    multi_modal_llm=azure_openai_mm_llm,
)

# Execute the query
response = query_engine.query(query)

In [None]:
# 3. Display the response
display(Markdown(response.response))

In [None]:
# 4. Display the sources (1)
display_query_and_multimodal_response(response, 8, 20)

In [None]:
# 5. Display the sources (2)
print(f"Retrieved document:\n{selected_index}\n")

print(f"Url to the document:")
print(f"{document_url_dict[selected_index]}\n")

print(f"Summary of the document:")
display(Markdown(document_summary_dict[selected_index]))

In [None]:
# 4. Display the sources (3)
logger.info("\nSource Nodes:")
for node in response.source_nodes:
    print(f"\nPage Number: {node.metadata['page_num']}")
    print(f"Text Content Length: {len(node.text)}")

## Token Retrieval
The token retrieved in this step can be used to send POST requests to the backend API using tools such as FastAPI Swagger for testing purposes.

In [None]:
import os
from azure.identity import ClientSecretCredential
from azure.core.exceptions import AzureError

from dotenv import load_dotenv
load_dotenv(override=True)


tenant_id = os.getenv("TENANT_ID")
client_id = os.getenv("APP_REGISTRATION_CLIENT_ID")
client_secret = os.getenv("APP_REGISTRATION_CLIENT_SECRET")
resource_scope = os.getenv("APP_REGISTRATION_RESOURCE_SCOPE")

# Get token
credential = ClientSecretCredential(tenant_id, client_id, client_secret)

try:
    token = credential.get_token(resource_scope)
except AzureError as e:
    print("Failed to get token:", str(e))

print(token.token) # use this token to authenticate before sending requests to the API