# Adaptive Multi-Modal RAG Example

This notebook demonstrates the Adaptive Multi-Modal RAG architecture that handles inputs and outputs across multiple modalities (text, image, audio, video). It dynamically adjusts its retrieval and generation strategies based on the input modality and context.

In [None]:
# Import required modules
import numpy as np
import hashlib
from datetime import datetime, timedelta
from src.rag_specialized.adaptive_multimodal.adaptive_multimodal_rag import (
    AdaptiveMultiModalRAG, MultiModalDocument, MultiModalQuery, ModalityType
)

## Initialize the Adaptive Multi-Modal RAG System

In [None]:
# Initialize the Adaptive Multi-Modal RAG system
adaptive_rag = AdaptiveMultiModalRAG()
print("Adaptive Multi-Modal RAG system initialized!")

## Create Sample Multi-Modal Documents

In [None]:
# Create sample multi-modal documents
documents = [
    MultiModalDocument(
        id="doc1",
        text_content="Machine learning is a subset of artificial intelligence that focuses on algorithms that can learn from data. It encompasses supervised, unsupervised, and reinforcement learning techniques.",
        metadata={"source": "AI textbook", "topic": "ML Basics", "author": "Dr. Smith"},
        modality_type=ModalityType.TEXT
    ),
    MultiModalDocument(
        id="doc2",
        text_content="Deep learning uses neural networks with multiple layers to model complex patterns in data. These networks can automatically discover representations needed for feature detection or classification.",
        metadata={"source": "Deep Learning Course", "topic": "Neural Networks", "author": "Prof. Johnson"},
        modality_type=ModalityType.TEXT
    ),
    MultiModalDocument(
        id="doc3",
        text_content="Natural language processing enables computers to understand, interpret, and generate human language in a valuable way. NLP combines computational linguistics with machine learning and deep learning models.",
        metadata={"source": "NLP Guide", "topic": "Language Models", "author": "Dr. Williams"},
        modality_type=ModalityType.TEXT
    )
]

print(f"Created {len(documents)} sample multi-modal documents")

## Add Documents to the RAG System

In [None]:
# Add documents to the system
num_added = adaptive_rag.add_documents(documents)
print(f"Added {num_added} documents to the Adaptive Multi-Modal RAG system")

## Create and Execute Queries

In [None]:
# Create a text-based query
text_query = MultiModalQuery(
    text_query="What is machine learning?",
    preferred_modality=ModalityType.TEXT
)

# Create a simple embedding for the query
query_text_hash = hashlib.md5(text_query.text_query.encode()).hexdigest()
query_embedding = np.frombuffer(bytes.fromhex(query_text_hash[:32]), dtype=np.float32)
if len(query_embedding) < 384:
    query_embedding = np.pad(query_embedding, (0, 384 - len(query_embedding)), 'constant')
elif len(query_embedding) > 384:
    query_embedding = query_embedding[:384]

# Execute the query
result = adaptive_rag.query(text_query, k=3)

print("Query Results:")
print(f"Answer: {result.answer}")
print(f"Confidence: {result.confidence:.3f}")
print(f"Latency: {result.latency_ms:.2f}ms")
print(f"Sources: {len(result.sources)} documents retrieved")

In [None]:
# Create another query about deep learning
dl_query = MultiModalQuery(
    text_query="Explain deep learning concepts",
    preferred_modality=ModalityType.TEXT
)

# Create a simple embedding for the query
dl_query_hash = hashlib.md5(dl_query.text_query.encode()).hexdigest()
dl_query_embedding = np.frombuffer(bytes.fromhex(dl_query_hash[:32]), dtype=np.float32)
if len(dl_query_embedding) < 384:
    dl_query_embedding = np.pad(dl_query_embedding, (0, 384 - len(dl_query_embedding)), 'constant')
elif len(dl_query_embedding) > 384:
    dl_query_embedding = dl_query_embedding[:384]

# Execute the query
dl_result = adaptive_rag.query(dl_query, k=2)

print("Deep Learning Query Results:")
print(f"Answer: {dl_result.answer}")
print(f"Confidence: {dl_result.confidence:.3f}")
print(f"Latency: {dl_result.latency_ms:.2f}ms")
print(f"Modalities used: {[m.value for m in dl_result.modalities_used]}")

## Explore the System's Internal State

In [None]:
# Examine the retriever's internal state
print(f"Number of documents in system: {len(adaptive_rag.retriever.documents)}")
print(f"Embedding matrix shape: {adaptive_rag.retriever.embeddings.shape if adaptive_rag.retriever.embeddings is not None else 'None'}")
print(f"Number of unique modalities: {len(adaptive_rag.retriever.encoder.processors)}")

# Display document information
for i, doc in enumerate(adaptive_rag.retriever.documents):
    print(f"Document {i+1}: {doc.id} - {doc.modality_type.value} - {len(doc.text_content)} chars")

## Performance Analysis

In [None]:
# Perform multiple queries to analyze performance
queries = [
    "What is machine learning?",
    "How does deep learning work?",
    "Explain natural language processing",
    "What are neural networks?",
    "Describe supervised learning"
]

latencies = []
confidences = []

for query_text in queries:
    query = MultiModalQuery(text_query=query_text, preferred_modality=ModalityType.TEXT)
    
    # Create embedding
    query_hash = hashlib.md5(query_text.encode()).hexdigest()
    query_emb = np.frombuffer(bytes.fromhex(query_hash[:32]), dtype=np.float32)
    if len(query_emb) < 384:
        query_emb = np.pad(query_emb, (0, 384 - len(query_emb)), 'constant')
    elif len(query_emb) > 384:
        query_emb = query_emb[:384]
    
    result = adaptive_rag.query(query, k=2)
    latencies.append(result.latency_ms)
    confidences.append(result.confidence)

print(f"Average query latency: {np.mean(latencies):.2f}ms")
print(f"Latency std deviation: {np.std(latencies):.2f}ms")
print(f"Average confidence: {np.mean(confidences):.3f}")
print(f"Confidence std deviation: {np.std(confidences):.3f}")

## Summary

In this notebook, we explored the Adaptive Multi-Modal RAG architecture:

1. **Initialization**: Created an instance of the AdaptiveMultiModalRAG system
2. **Document Addition**: Added multi-modal documents with different content and metadata
3. **Query Processing**: Executed text-based queries and received responses
4. **System Analysis**: Examined the internal state of the system
5. **Performance Evaluation**: Measured query latency and confidence metrics

The Adaptive Multi-Modal RAG system successfully processed queries and returned relevant responses while adapting its retrieval strategy based on the input modality.