In [None]:
# PRE-STEP: Install Required Dependencies
%pip install langchain
%pip install langgraph
%pip install langchain-openai
%pip install chromadb
%pip install python-dotenv
%pip install pydantic

In [None]:
# Run this if you want to run from a clean slate
# This clears ALL generated data and memory stores

import shutil
import os

print("🧹 Starting complete cleanup...")

# 1. Clear the domain memory store (vector database)
if os.path.exists("./domain_investment/domain_memory_store"):
    shutil.rmtree("./domain_investment/domain_memory_store")
    print("  ✓ Cleared domain memory store")

# 2. Clear the old memory stores from testing (if they exist)
test_dirs = [
    "./baseline_memory_store",  # From Cell 1
    "./full_memory_store",       # Old name from earlier versions
    "./investment_memory_store"  # Another old name
]
for dir_path in test_dirs:
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)
        print(f"  ✓ Cleared {dir_path}")

# 3. Clear generated data (optional - comment out if you want to keep the data)
if os.path.exists("./domain_investment/investment_advisor_data"):
    # Only clear the generated files, not the directory itself
    data_files = [
        "./domain_investment/investment_advisor_data/conversations.jsonl",
        "./domain_investment/investment_advisor_data/extracted_patterns.json",
        "./domain_investment/investment_advisor_data/test_scenarios.json",
        "./domain_investment/investment_advisor_data/statistics.json"
    ]
    for file_path in data_files:
        if os.path.exists(file_path):
            os.remove(file_path)
    print("  ✓ Cleared generated data files")

# 4. Clear any Chroma persistence files in the root directory
chroma_files = [
    "chroma.sqlite3",
    ".chroma"
]
for file_path in chroma_files:
    if os.path.exists(file_path):
        if os.path.isfile(file_path):
            os.remove(file_path)
        else:
            shutil.rmtree(file_path)
        print(f"  ✓ Cleared {file_path}")

print("\n✅ Cleanup complete!")
print("   You can now restart from Cell 6 with a completely fresh system")
print("\n   Note: The code files (.py) are preserved - only runtime data was cleared")

In [None]:
# Cell 1: Initialize the Full CoALA Agent with All Memory Types
"""
This cell sets up a complete agent with:
- Episodic Memory: Stores past conversations
- Semantic Memory: Extracts and stores facts
- Procedural Memory: Learns strategies and patterns
"""

import os
import sys
import json

# Import the complete CoALA system
from coala_agent import CoALAAgent
from domain_investment.investment_advisor_agent import InvestmentAdvisorAgent
from domain_investment.investment_advisor_data import EnhancedInvestmentAdvisorDataGenerator
from domain_investment.investor_test_scenarios import (
    process_baseline_conversations, test_agent_with_queries, 
    process_performance_feedback, process_remaining_conversations,
    test_hierarchical_retrieval, get_key_achievements
)

from dotenv import load_dotenv
load_dotenv(dotenv_path='env.txt')
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

# Create a FRESH agent for testing
print("🚀 Initializing Full CoALA Agent with All Memory Types")
print("=" * 60)

# Create domain agent which encapsulates investment-specific logic
domain_agent = InvestmentAdvisorAgent()

# Set up memory storage directory
domain_memory_dir = os.path.join(domain_agent.domain_dir, "domain_memory_store")
os.makedirs(domain_memory_dir, exist_ok=True)

# Create the full agent with all memory systems
full_agent = CoALAAgent(
    domain_agent=domain_agent,
    model_name="gpt-4.1-mini",
    temperature=0.0,
    persist_directory=domain_memory_dir,
    optimization_algorithm="prompt_memory"  # Can be "gradient" or "metaprompt"
)

print("✅ Full CoALA agent initialized with:")
print(f"  • Domain: {full_agent.domain_agent.__class__.__name__}")
print(f"  • Memory Store: {domain_memory_dir}")

# Verify initial state
initial_stats = full_agent.get_memory_stats()
print(f"\n📊 Initial state:")
print(f"  Episodic/Semantic docs: {initial_stats.get('episodic_semantic', {}).get('total_documents', 0)}")
print(f"  Procedural strategies: {initial_stats.get('procedural', {}).get('total_strategies', 0)}")

# Test with a simple query
test_response = full_agent.process_message(
    "I'm thinking about rebalancing my portfolio. I'm 35 with moderate risk tolerance.",
    user_id="test_client_001"
)
print(f"\n🧪 Test Response: {test_response}")

In [None]:
# Cell 2: Load or Generate Synthetic Investment Advisor Data
"""
This cell loads realistic conversation data that simulates various types of
investment advisory interactions, including both successful and failed conversations.
"""

# Get data directory from domain agent
data_dir = domain_agent.data_dir
conversations_file = os.path.join(data_dir, "conversations.jsonl")

if os.path.exists(conversations_file):
    # Load existing data
    print(f"📂 Loading existing conversation data from {data_dir}...")
    conversations = []
    with open(conversations_file, 'r') as f:
        for line in f:
            conversations.append(json.loads(line))
else:
    # Generate new data
    print(f"🔨 Generating new conversation data...")
    generator = EnhancedInvestmentAdvisorDataGenerator(seed=42)
    data = generator.export_realistic_data()
    conversations = data['conversations']
    
    # Convert to dict format
    conversations = [
        conv if isinstance(conv, dict) else conv.__dict__ 
        for conv in conversations
    ]

# TESTING:
print(f"✅ Loaded {len(conversations)} conversations")
print(f"👥 Unique users: {len(set(c['user_id'] for c in conversations))}")

# Display data statistics
success_rate = sum(1 for c in conversations if c['feedback']['success']) / len(conversations)
avg_satisfaction = sum(c['feedback']['satisfaction_score'] for c in conversations) / len(conversations)

print(f"\n📊 Data Overview:")
print(f"  Data location: {data_dir}")
print(f"  Total conversations: {len(conversations)}")
print(f"  Unique users: {len(set(c['user_id'] for c in conversations))}")
print(f"  Success rate: {sum(1 for c in conversations if c['feedback']['success']) / len(conversations):.1%}")
print(f"  Avg satisfaction: {sum(c['feedback']['satisfaction_score'] for c in conversations) / len(conversations):.1f}/5.0")

# Examine a sample conversation
sample_conv = conversations[0]
print(f"\n🔍 Sample Conversation:")
print(f"  User {sample_conv['user_id']}: {sample_conv['messages'][0]['content']}")
print(f"  Assistant: {sample_conv['messages'][1]['content']}")
print(f"  Success: {sample_conv['feedback']['success']}")
print(f"  Satisfaction: {sample_conv['feedback']['satisfaction_score']}/5.0")
print(f"  Behavioral signals: {sum(sample_conv['behavioral_signals'].values())} active")

print(f"\n✅ Data ready for processing by full agent\n  Will be stored in: {domain_agent.memory_dir}")

In [None]:
# Cell 3: Process Baseline Conversations to Establish Initial Learning
"""
This cell processes the first batch of conversations to establish baseline memories
and learning patterns across all three memory systems.
"""
print(f"📚 Processing baseline conversations...")
results = process_baseline_conversations(full_agent, conversations, num_baseline=30)
print(f"\n✅ Baseline processing complete")
print(f"  Episodic memories stored: {results['baseline_count']}")
print(f"  Semantic facts extracted: {results['facts_extracted']} (errors: {results['extraction_errors']})")
learning = results['learning_summary']
processed = results['processed']
print(f"  Global strategies: {learning.get('global', 0)}")
print(f"  User strategies: {learning.get('user', 0)} (for {processed.get('users', 0)} users)")
print(f"  Community strategies: {learning.get('community', 0)} (for {processed.get('communities', 0)} communities)")
print(f"  Task strategies: {learning.get('task', 0)} (for {processed.get('tasks', 0)} task types)")
print("\n👥 Community Membership:")
for community_id, members in full_agent.procedural_memory.community_members.items():
    if members:
        print(f"  {community_id}: {len(members)} members")
final_stats = full_agent.procedural_memory.get_stats()
print(f"\n📊 Procedural Memory Statistics:")
print(f"  Total strategies: {final_stats['total_strategies']}")
print(f"  By scope: {final_stats['by_scope']}")
print(f"  Avg success rate: {final_stats['avg_success_rate']:.2f}")

In [None]:
# Cell 4: Test Performance and Trigger Adaptations
"""
This cell demonstrates how the agent uses its learned strategies and adapts
based on performance feedback, showing continuous improvement.
"""
print("🧪 Testing agent with learned strategies...")

# Test with different query types
test_results = test_agent_with_queries(full_agent)

for result in test_results:
    print(f"\n❓ {result['query']}")
    print(f"💬 {result['response']}")
    if result['strategy']:
        print(f"   → Using: {result['strategy']['source']} ({result['strategy']['confidence']:.0%})")

# Trigger performance adaptations
print("\n📊 Processing performance feedback...")

adaptations, adaptation_details = process_performance_feedback(
    full_agent, conversations, start_idx=30, end_idx=40
)

for detail in adaptation_details:
    print(f"  ✓ {detail['strategy']} → {detail['new_rate']:.0%}")

print(f"\n✅ {adaptations} strategies adapted")

# Show final memory stats
final_stats = full_agent.get_memory_stats()
print("\n📚 Final Memory State:")
print(f"  Episodic/Semantic: {final_stats['episodic_semantic'].get('total_documents', 0)} documents")
print(f"  Procedural: {final_stats['procedural'].get('total_strategies', 0)} strategies")
print(f"  Data location: {full_agent.domain_agent.data_dir}")
print(f"  Memory location: {full_agent.domain_agent.memory_dir}")

In [None]:

# Cell 5: Complete Learning Progression and Hierarchical Retrieval
"""
This final cell demonstrates the complete learning progression and shows how
the agent uses hierarchical retrieval to provide personalized responses.
"""
print("📊 COMPLETE LEARNING PROGRESSION ANALYSIS")
print(f"\n🔄 Processing additional conversations...")
num_processed, learned = process_remaining_conversations(full_agent, conversations, 50, 100)
print(f"\n✅ Learning complete ({num_processed} conversations processed):")
for scope, count in learned.items():
    if count:
        print(f"  {scope.capitalize()}: {count} new strategies")
final_stats = full_agent.procedural_memory.get_stats()
print(f"\n📈 FINAL PROCEDURAL MEMORY STATISTICS:")
print(f"  Total strategies learned: {final_stats['total_strategies']}")
print(f"  Breakdown by scope:")
for scope, count in final_stats['by_scope'].items():
    print(f"    {scope.capitalize()}: {count}")
print(f"  Average success rate: {final_stats['avg_success_rate']}")
print(f"  Total adaptations: {final_stats['total_adaptations']}")
print(f"  Segments discovered: {', '.join(final_stats['segments'])}")
print("🔬 DEMONSTRATING FULL MEMORY INTEGRATION")
query = "I'm worried about market volatility. Should I move to safer investments?"
test_results = test_hierarchical_retrieval(full_agent, query)
for result in test_results:
    print(f"\n👤 {result['description']}\n   User ID: {result['user_id']}")
    if result['strategy']:
        print(f"   Strategy source: {result['strategy']['source']}")
        print(f"   Scope: {result['strategy']['scope']}")
        print(f"   Confidence: {result['strategy']['confidence']:.1%}")
print("\n📊 STRATEGY PERFORMANCE BY SCOPE:")
full_agent.procedural_memory.show_strategy_performance()
print("🎯 KEY ACHIEVEMENTS DEMONSTRATED:")
for achievement in get_key_achievements():
    print(f"✓ {achievement}")
memory_stats = full_agent.get_memory_stats()
memory_stats = full_agent.get_all_memory_stats()
print(f"\n📊 COMPLETE MEMORY SYSTEM STATISTICS:")
print(f"  Episodic/Semantic documents: {memory_stats['episodic_semantic'].get('total_documents', 'N/A')}")
print(f"  Procedural strategies: {memory_stats['procedural']['total_strategies']}")
print(f"  Optimization algorithm: {memory_stats['procedural']['algorithm']}")
print(f"  Total optimizations: {memory_stats['procedural']['total_optimizations']}")