In [7]:
# Financial RAG System Demo Notebook
# Run this in Jupyter to interactively test the system

import sys
import json
from IPython.display import display, HTML, JSON
import pandas as pd

# Import our main classes (assuming main.py is in the same directory)
from main import (
    SECDataCollector, DocumentProcessor, VectorStore, 
    FinancialAgent, Document, QueryResult
)

# Setup and initialization
print("🚀 Initializing Financial RAG System...")

ImportError: cannot import name 'TypeIs' from 'typing_extensions' (/opt/anaconda3/lib/python3.11/site-packages/typing_extensions.py)

In [None]:
def create_mock_data():
    mock_documents = []
    
    # More detailed mock data for better demo
    company_data = {
        'GOOGL': {
            '2022': {
                'revenue': '282.8 billion',
                'operating_margin': '25.2%',
                'rd_spending': '39.5 billion (14% of revenue)',
                'cloud_revenue': 'Google Cloud generated $26.3 billion',
                'advertising': 'Search and other advertising revenues were $162.5 billion'
            },
            '2023': {
                'revenue': '307.4 billion', 
                'operating_margin': '29.8%',
                'rd_spending': '43.1 billion (14% of revenue)',
                'cloud_revenue': 'Google Cloud generated $33.1 billion',
                'advertising': 'Search and other advertising revenues were $175.0 billion'
            }
        },
        'MSFT': {
            '2022': {
                'revenue': '198.3 billion',
                'operating_margin': '37.9%',
                'rd_spending': '24.5 billion (12% of revenue)', 
                'cloud_revenue': 'Microsoft Cloud revenue was $91.2 billion',
                'azure': 'Azure and other cloud services revenue grew 40%'
            },
            '2023': {
                'revenue': '211.9 billion',
                'operating_margin': '42.1%', 
                'rd_spending': '27.2 billion (13% of revenue)',
                'cloud_revenue': 'Microsoft Cloud revenue was $111.6 billion',
                'azure': 'Azure and other cloud services revenue grew 31%'
            }
        },
        'NVDA': {
            '2022': {
                'revenue': '27.0 billion',
                'operating_margin': '15.0%',
                'rd_spending': '7.3 billion (27% of revenue)',
                'datacenter': 'Data Center revenue was $15.0 billion',
                'ai_focus': 'Significant investments in AI and machine learning infrastructure'
            },
            '2023': {
                'revenue': '60.9 billion', 
                'operating_margin': '32.5%',
                'rd_spending': '8.7 billion (14% of revenue)',
                'datacenter': 'Data Center revenue was $47.5 billion', 
                'ai_focus': 'Leading provider of AI computing platforms and accelerated computing'
            }
        }
    }
    
    for company, years_data in company_data.items():
        for year, metrics in years_data.items():
            content = f"""
            {company} Annual Report {year} - Management Discussion and Analysis
            
            Financial Performance Overview:
            Total revenue for fiscal year {year} was {metrics['revenue']}.
            Operating margin improved to {metrics['operating_margin']} in {year}.
            
            Research and Development:
            Research and development expenses were {metrics['rd_spending']}.
            We continue to invest heavily in innovation and new technologies.
            
            Business Segments:
            {metrics.get('cloud_revenue', metrics.get('datacenter', ''))}
            {metrics.get('advertising', '')}
            {metrics.get('azure', '')}
            {metrics.get('ai_focus', '')}
            
            Our strategic focus remains on long-term growth and market leadership.
            """
            
            doc = Document(
                content=content,
                company=company,
                year=year,
                chunk_id=f"{company}_{year}_detailed"
            )
            mock_documents.append(doc)
    
    return mock_documents

# Initialize system components
print("📊 Creating mock financial data...")
mock_docs = create_mock_data()

print("🔍 Building vector store...")
vector_store = VectorStore()
vector_store.add_documents(mock_docs)

print("🤖 Initializing financial agent...")
agent = FinancialAgent(vector_store)

print("✅ System ready!")

In [None]:
# Interactive query function
def ask_question(query):
    """Ask a question and display formatted results"""
    print(f"\n🔍 Query: {query}")
    print("=" * 60)
    
    try:
        result = agent.process_query(query)
        
        # Display answer
        print(f"💡 Answer: {result.answer}")
        print(f"\n🧠 Reasoning: {result.reasoning}")
        
        # Display sub-queries if any decomposition happened
        if len(result.sub_queries) > 1:
            print(f"\n📋 Sub-queries executed:")
            for i, sq in enumerate(result.sub_queries, 1):
                print(f"   {i}. {sq}")
        
        # Display sources
        print(f"\n📚 Sources ({len(result.sources)} documents):")
        for i, source in enumerate(result.sources, 1):
            print(f"   {i}. {source['company']} {source['year']} (score: {source['score']:.3f})")
            print(f"      {source['excerpt'][:100]}...")
        
        # Return structured result for further analysis
        return result
        
    except Exception as e:
        print(f"❌ Error: {e}")
        return None

# Demo queries
demo_queries = [
    "What was NVIDIA's total revenue in fiscal year 2023?",
    "How much did Microsoft's cloud revenue grow from 2022 to 2023?", 
    "Which company had the highest operating margin in 2023?",
    "Compare R&D spending as percentage of revenue across all companies in 2023",
    "What percentage of Google's revenue came from advertising in 2023?"
]

print("\n🎯 Running Demo Queries:")
print("=" * 60)

# Store results for analysis
results = []

In [None]:
# Analysis function
def analyze_agent_performance():
    """Analyze how well the agent performed query decomposition"""
    print("\n📈 Agent Performance Analysis:")
    print("=" * 40)
    
    decomposed_queries = [r for r in results if len(r.sub_queries) > 1]
    simple_queries = [r for r in results if len(r.sub_queries) == 1]
    
    print(f"Total queries processed: {len(results)}")
    print(f"Simple queries (no decomposition): {len(simple_queries)}")
    print(f"Complex queries (decomposed): {len(decomposed_queries)}")
    
    if decomposed_queries:
        print("\nQuery decomposition examples:")
        for result in decomposed_queries:
            print(f"• '{result.query[:50]}...' → {len(result.sub_queries)} sub-queries")
    
    # Source distribution
    all_sources = []
    for result in results:
        all_sources.extend(result.sources)
    
    if all_sources:
        source_df = pd.DataFrame(all_sources)
        print(f"\nSource distribution:")
        print(source_df.groupby(['company', 'year']).size().to_string())

# Run analysis
if results:
    analyze_agent_performance()

print("\n🎉 Demo complete! Try asking your own questions using ask_question('your query here')")

In [None]:
print("\n" + "="*60)
print("INTERACTIVE SECTION - Try your own queries!")
print("="*60)
print("\nExample usage:")
print("result = ask_question('What was Google\\'s revenue growth from 2022 to 2023?')")
print("result = ask_question('Compare operating margins across all three companies')")