# Continual Learning RAG Example

This notebook demonstrates the Continual Learning RAG architecture that adapts and improves over time without forgetting previously learned information. It incorporates techniques like elastic weight consolidation and experience replay.

In [None]:
# Import required modules
import numpy as np
import hashlib
from datetime import datetime, timedelta
from src.rag_specialized.continual_learning.continual_learning_rag import (
    ContinualLearningRAG, ContinualDocument, ContinualQuery, ForgettingMechanism
)

## Initialize the Continual Learning RAG System

In [None]:
# Initialize the Continual Learning RAG system
continual_rag = ContinualLearningRAG(
    forgetting_mechanism=ForgettingMechanism.EXPERIENCE_REPLAY,
    experience_buffer_size=100
)
print("Continual Learning RAG system initialized!")
print(f"Forgetting mechanism: {continual_rag.retriever.forgetting_mechanism.mechanism.value}")
print(f"Experience buffer size: {continual_rag.retriever.experience_buffer.capacity}")

## Create Sample Continual Learning Documents

In [None]:
# Create sample continual learning documents with different topics and importance levels
documents = [
    ContinualDocument(
        id="doc1",
        content="Machine learning is a subset of artificial intelligence that focuses on algorithms that can learn from data. It encompasses supervised, unsupervised, and reinforcement learning techniques.",
        importance_score=0.8,
        metadata={"domain": "AI", "difficulty": 0.5, "topic": "ML Basics"}
    ),
    ContinualDocument(
        id="doc2",
        content="Deep learning uses neural networks with multiple layers to model complex patterns in data. These networks can automatically discover representations needed for feature detection or classification.",
        importance_score=0.9,
        metadata={"domain": "Deep Learning", "difficulty": 0.7, "topic": "Neural Networks"}
    ),
    ContinualDocument(
        id="doc3",
        content="Natural language processing enables computers to understand, interpret, and generate human language in a valuable way. NLP combines computational linguistics with machine learning and deep learning models.",
        importance_score=0.7,
        metadata={"domain": "NLP", "difficulty": 0.6, "topic": "Language Models"}
    ),
    ContinualDocument(
        id="doc4",
        content="Computer vision is a field of artificial intelligence that trains computers to interpret and understand the visual world. Using digital images from cameras and videos and deep learning models, machines can accurately identify and classify objects.",
        importance_score=0.8,
        metadata={"domain": "Computer Vision", "difficulty": 0.6, "topic": "Image Recognition"}
    )
]

print(f"Created {len(documents)} sample continual learning documents")
for i, doc in enumerate(documents):
    print(f"  Doc {i+1}: {doc.id} - {doc.metadata['domain']} - Importance: {doc.importance_score}")

## Add Documents to the RAG System

In [None]:
# Add documents to the system
num_added = continual_rag.add_documents(documents)
print(f"Added {num_added} continual learning documents to the system")

## Create and Execute Continual Learning Queries

In [None]:
# Create a continual learning query
ml_query = ContinualQuery(
    text="What is machine learning?",
    domain="AI",
    difficulty=0.4
)

# Create a simple embedding for the query
query_text_hash = hashlib.md5(ml_query.text.encode()).hexdigest()
query_embedding = np.frombuffer(bytes.fromhex(query_text_hash[:32]), dtype=np.float32)
if len(query_embedding) < 384:
    query_embedding = np.pad(query_embedding, (0, 384 - len(query_embedding)), 'constant')
elif len(query_embedding) > 384:
    query_embedding = query_embedding[:384]

# Execute the query
ml_result = continual_rag.query(ml_query, query_embedding, k=2)

print("Machine Learning Query:")
print(f"Query: {ml_query.text}")
print(f"Answer: {ml_result.answer}")
print(f"Adaptation needed: {ml_result.adaptation_needed}")
print(f"Confidence: {ml_result.confidence:.3f}")
print(f"Latency: {ml_result.latency_ms:.2f}ms")
print(f"Sources: {len(ml_result.sources)} documents retrieved")

In [None]:
# Create another query about deep learning
dl_query = ContinualQuery(
    text="Explain deep learning concepts",
    domain="Deep Learning",
    difficulty=0.7
)

# Create a simple embedding for the query
dl_query_hash = hashlib.md5(dl_query.text.encode()).hexdigest()
dl_query_embedding = np.frombuffer(bytes.fromhex(dl_query_hash[:32]), dtype=np.float32)
if len(dl_query_embedding) < 384:
    dl_query_embedding = np.pad(dl_query_embedding, (0, 384 - len(dl_query_embedding)), 'constant')
elif len(dl_query_embedding) > 384:
    dl_query_embedding = dl_query_embedding[:384]

# Execute the query
dl_result = continual_rag.query(dl_query, dl_query_embedding, k=2)

print("\nDeep Learning Query:")
print(f"Query: {dl_query.text}")
print(f"Answer: {dl_result.answer}")
print(f"Adaptation needed: {dl_result.adaptation_needed}")
print(f"Sources: {len(dl_result.sources)} documents retrieved")

## Simulate Learning Over Time

In [None]:
# Simulate multiple interactions to demonstrate learning
print("Simulating learning over multiple interactions...")

interaction_queries = [
    ("How does supervised learning work?", "ML", 0.5),
    ("What are convolutional neural networks?", "CV", 0.8),
    ("Explain transformer models", "NLP", 0.7),
    ("What is reinforcement learning?", "RL", 0.6),
    ("How does backpropagation work?", "ML", 0.7)
]

for i, (query_text, domain, difficulty) in enumerate(interaction_queries):
    query = ContinualQuery(text=query_text, domain=domain, difficulty=difficulty)
    
    # Create embedding
    query_hash = hashlib.md5(query_text.encode()).hexdigest()
    query_emb = np.frombuffer(bytes.fromhex(query_hash[:32]), dtype=np.float32)
    if len(query_emb) < 384:
        query_emb = np.pad(query_emb, (0, 384 - len(query_emb)), 'constant')
    elif len(query_emb) > 384:
        query_emb = query_emb[:384]
    
    result = continual_rag.query(query, query_emb, k=2)
    print(f"  Interaction {i+1}: '{query_text[:30]}...' -> Adaptation needed: {result.adaptation_needed}")

print(f"\nExperience buffer now contains {len(continual_rag.retriever.experience_buffer.buffer)} experiences")

## Check Learning Status

In [None]:
# Check the learning status of the system
learning_status = continual_rag.get_learning_status()
print("Learning Status:")
print(f"  Total documents: {learning_status['total_documents']}")
print(f"  Total experiences: {learning_status['total_experiences']}")
print(f"  Total queries processed: {learning_status['total_queries_processed']}")
print(f"  Average performance: {learning_status['average_performance']:.3f}")
print(f"  Domains handled: {learning_status['domains_handled']}")
print(f"  Adaptation needed: {learning_status['adaptation_needed']}")

# Show performance statistics
perf_stats = learning_status['performance_stats']
print(f"  Performance stats: avg={perf_stats['average']:.3f}, min={perf_stats['min']:.3f}, max={perf_stats['max']:.3f}")

## Performance Analysis

In [None]:
# Perform multiple queries to analyze performance over time
test_queries = [
    "What is machine learning?",
    "How do neural networks work?",
    "Explain natural language processing",
    "What is computer vision?",
    "Describe reinforcement learning"
]

latencies = []
confidences = []
adaptation_needed = []

for i, query_text in enumerate(test_queries):
    query = ContinualQuery(text=query_text, domain="General", difficulty=0.5)
    
    # Create embedding
    query_hash = hashlib.md5(query_text.encode()).hexdigest()
    query_emb = np.frombuffer(bytes.fromhex(query_hash[:32]), dtype=np.float32)
    if len(query_emb) < 384:
        query_emb = np.pad(query_emb, (0, 384 - len(query_emb)), 'constant')
    elif len(query_emb) > 384:
        query_emb = query_emb[:384]
    
    result = continual_rag.query(query, query_emb, k=2)
    latencies.append(result.latency_ms)
    confidences.append(result.confidence)
    adaptation_needed.append(result.adaptation_needed)
    
    # Print progress
    print(f"Query {i+1}: '{query_text[:20]}...' -> Latency: {result.latency_ms:.2f}ms, Confidence: {result.confidence:.3f}")

print(f"\nOverall Performance:")
print(f"Average query latency: {np.mean(latencies):.2f}ms")
print(f"Latency std deviation: {np.std(latencies):.2f}ms")
print(f"Average confidence: {np.mean(confidences):.3f}")
print(f"Adaptation needed in {sum(adaptation_needed)}/{len(adaptation_needed)} queries")

## Trigger System Adaptation

In [None]:
# Add more documents to trigger adaptation
additional_docs = [
    ContinualDocument(
        id="doc5",
        content="Large language models are transformer-based neural networks trained on vast amounts of text data. They can generate human-like text and perform various natural language tasks.",
        importance_score=0.9,
        metadata={"domain": "LLM", "difficulty": 0.8, "topic": "Transformers"}
    ),
    ContinualDocument(
        id="doc6",
        content="Generative AI models can create new content such as text, images, music, or other data based on patterns learned from training data.",
        importance_score=0.8,
        metadata={"domain": "GenAI", "difficulty": 0.7, "topic": "Content Generation"}
    )
]

continual_rag.add_documents(additional_docs)
print(f"Added {len(additional_docs)} additional documents")

# Check if adaptation is needed
adaptation_performed = continual_rag.adapt()
print(f"System adaptation performed: {adaptation_performed}")

# Check updated learning status
updated_status = continual_rag.get_learning_status()
print(f"Updated total documents: {updated_status['total_documents']}")

## Summary

In this notebook, we explored the Continual Learning RAG architecture:

1. **Initialization**: Created an instance of the ContinualLearningRAG system with experience replay
2. **Continual Learning Documents**: Added documents with importance scores and domain metadata
3. **Learning Simulation**: Executed multiple queries to simulate learning over time
4. **Learning Status**: Examined the system's learning progress and experience accumulation
5. **Performance Evaluation**: Measured query latency, confidence, and adaptation needs
6. **System Adaptation**: Demonstrated how the system adapts based on accumulated experiences

The Continual Learning RAG system successfully demonstrated learning capabilities, accumulating experiences and adapting its behavior over time while preventing catastrophic forgetting.