In [None]:
import os
import json
import time
import asyncio
from datetime import datetime
from strands import Agent, tool
from strands.models.anthropic import AnthropicModel
import httpx
from typing import Dict, Any, List, Optional
import numpy as np
from scipy import stats
import pandas as pd

# Configuration - set your API base URL here
BASE_URL = "http://localhost:8000"  # Change this to your deployed URL as needed

# ============= STATISTICAL ANALYSIS FUNCTIONS =============

def calculate_statistical_measures(data: List[Dict], field: str) -> Dict[str, float]:
    """Calculate comprehensive statistical measures for a numeric field."""
    try:
        values = [float(item.get('properties', {}).get(field, 0)) for item in data if item.get('properties', {}).get(field) is not None]
        if not values:
            return {}
        
        values = np.array(values)
        return {
            'mean': float(np.mean(values)),
            'std': float(np.std(values)),
            'median': float(np.median(values)),
            'q25': float(np.percentile(values, 25)),
            'q75': float(np.percentile(values, 75)),
            'min': float(np.min(values)),
            'max': float(np.max(values)),
            'cv': float(np.std(values) / np.mean(values)) if np.mean(values) != 0 else 0,
            'skewness': float(stats.skew(values)),
            'kurtosis': float(stats.kurtosis(values))
        }
    except Exception:
        return {}

def detect_outliers_zscore(data: List[Dict], field: str, threshold: float = 3.0) -> Dict[str, Any]:
    """Detect outliers using Z-score method."""
    try:
        values = [float(item.get('properties', {}).get(field, 0)) for item in data if item.get('properties', {}).get(field) is not None]
        if len(values) < 3:
            return {'outlier_count': 0, 'outlier_percentage': 0, 'z_threshold': threshold}
        
        z_scores = np.abs(stats.zscore(values))
        outliers = z_scores > threshold
        return {
            'outlier_count': int(np.sum(outliers)),
            'outlier_percentage': float(np.sum(outliers) / len(values) * 100),
            'z_threshold': threshold,
            'max_z_score': float(np.max(z_scores))
        }
    except Exception:
        return {'outlier_count': 0, 'outlier_percentage': 0, 'z_threshold': threshold}

def perform_trend_analysis(data: List[Dict], time_field: str, value_field: str) -> Dict[str, Any]:
    """Perform linear trend analysis on time series data."""
    try:
        # Extract time and value pairs
        time_values = []
        values = []
        for item in data:
            props = item.get('properties', {})
            if props.get(time_field) and props.get(value_field):
                time_values.append(float(props[time_field]))
                values.append(float(props[value_field]))
        
        if len(time_values) < 3:
            return {}
        
        # Perform linear regression
        slope, intercept, r_value, p_value, std_err = stats.linregress(time_values, values)
        
        return {
            'slope': float(slope),
            'r_squared': float(r_value ** 2),
            'p_value': float(p_value),
            'correlation': float(r_value),
            'trend_strength': 'strong' if abs(r_value) > 0.7 else 'moderate' if abs(r_value) > 0.4 else 'weak',
            'trend_direction': 'increasing' if slope > 0 else 'decreasing',
            'significance': 'significant' if p_value < 0.05 else 'not_significant'
        }
    except Exception:
        return {}

def analyze_categorical_distribution(data: List[Dict], field: str) -> Dict[str, Any]:
    """Analyze distribution of categorical data."""
    try:
        categories = [item.get('properties', {}).get(field) for item in data if item.get('properties', {}).get(field)]
        if not categories:
            return {}
        
        from collections import Counter
        counts = Counter(categories)
        total = len(categories)
        
        # Calculate entropy for distribution evenness
        probabilities = [count/total for count in counts.values()]
        entropy = -sum(p * np.log2(p) for p in probabilities if p > 0)
        max_entropy = np.log2(len(counts))
        normalized_entropy = entropy / max_entropy if max_entropy > 0 else 0
        
        return {
            'unique_categories': len(counts),
            'most_common': counts.most_common(1)[0] if counts else None,
            'entropy': float(entropy),
            'normalized_entropy': float(normalized_entropy),
            'distribution_evenness': 'even' if normalized_entropy > 0.8 else 'moderate' if normalized_entropy > 0.5 else 'skewed'
        }
    except Exception:
        return {}

def test_correlation(data: List[Dict], field1: str, field2: str) -> Dict[str, Any]:
    """Test correlation between two numeric fields."""
    try:
        values1 = []
        values2 = []
        for item in data:
            props = item.get('properties', {})
            if props.get(field1) and props.get(field2):
                values1.append(float(props[field1]))
                values2.append(float(props[field2]))
        
        if len(values1) < 3:
            return {}
        
        # Pearson correlation
        pearson_r, pearson_p = stats.pearsonr(values1, values2)
        
        # Spearman correlation (rank-based)
        spearman_r, spearman_p = stats.spearmanr(values1, values2)
        
        return {
            'pearson_correlation': float(pearson_r),
            'pearson_p_value': float(pearson_p),
            'spearman_correlation': float(spearman_r),
            'spearman_p_value': float(spearman_p),
            'correlation_strength': 'strong' if abs(pearson_r) > 0.7 else 'moderate' if abs(pearson_r) > 0.4 else 'weak',
            'pearson_significant': pearson_p < 0.05,
            'spearman_significant': spearman_p < 0.05
        }
    except Exception:
        return {}

# ============= API-CALLING TOOLS =============

@tool
async def get_recent_financial_data(limit: int = 16) -> dict:
    """
    Get the most recent financial data rows for analysis.
    
    Args:
        limit (int): Number of recent rows to fetch (default: 100, max: 1000)
    
    Returns:
        dict: Financial data response with events
    """
    try:
        # Ensure limit doesn't exceed 1000
        limit = min(limit, 100)
        
        async with httpx.AsyncClient() as client:
            response = await client.get(
                f"{BASE_URL}/api/agent-query",
                params={"limit": limit},
                timeout=30.0
            )
            response.raise_for_status()
            
            data = response.json()
            
            return {
                "status": "success",
                "timestamp": datetime.now().isoformat(),
                "data": data.get("events", data) if isinstance(data, dict) else data,
                "count": len(data.get("events", data) if isinstance(data, dict) else data),
                "message": f"Retrieved {len(data.get('events', data) if isinstance(data, dict) else data)} financial records"
            }
    
    except httpx.HTTPStatusError as e:
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "data": [],
            "count": 0,
            "message": f"HTTP {e.response.status_code} error: {str(e)}"
        }
    except Exception as e:
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "data": [],
            "count": 0,
            "message": f"Error retrieving financial data: {str(e)}"
        }

@tool
async def get_current_graphs() -> dict:
    """
    Get all currently displayed graphs from the dashboard.
    
    Returns:
        dict: Response containing list of current graphs with their configurations
    """
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(
                f"{BASE_URL}/api/graphs/",
                timeout=30.0
            )
            response.raise_for_status()
            
            graphs = response.json()

            print("GOT GRAPHS")
            print(graphs)
            
            return {
                "status": "success",
                "timestamp": datetime.now().isoformat(),
                "graphs": graphs,
                "count": len(graphs),
                "message": f"Retrieved {len(graphs)} graphs from dashboard"
            }
    
    except httpx.HTTPStatusError as e:
        print(f'ERROR: {e}')
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "graphs": [],
            "count": 0,
            "message": f"HTTP {e.response.status_code} error: {str(e)}"
        }
    except Exception as e:
        print(f'ERROR: {e}')
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "graphs": [],
            "count": 0,
            "message": f"Error retrieving graphs: {str(e)}"
        }

@tool
async def remove_graph(graph_id: str) -> dict:
    """
    Remove a graph by its ID from the dashboard.
    
    Args:
        graph_id (str): ID of the graph to remove
    
    Returns:
        dict: Removal result with confirmation
    """
    try:
        async with httpx.AsyncClient() as client:
            response = await client.delete(
                f"{BASE_URL}/api/graphs/{graph_id}",
                timeout=30.0
            )
            response.raise_for_status()
            
            result = response.json()
            
            return {
                "status": "success",
                "timestamp": datetime.now().isoformat(),
                "removed_id": graph_id,
                "message": result.get("message", f"Successfully removed graph {graph_id}")
            }
    
    except httpx.HTTPStatusError as e:
        if e.response.status_code == 404:
            return {
                "status": "error",
                "timestamp": datetime.now().isoformat(),
                "removed_id": None,
                "message": f"Graph {graph_id} not found"
            }
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "removed_id": None,
            "message": f"HTTP {e.response.status_code} error: {str(e)}"
        }
    except Exception as e:
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "removed_id": None,
            "message": f"Error removing graph: {str(e)}"
        }

@tool
async def add_new_graph(type: str, title: str, sql_query: str, justification: str, extra: Optional[Dict] = None) -> dict:
    """
    Add a new graph to the financial dashboard with statistical justification.
    
    Args:
        type (str): Type of graph (bar, line, pie, area, scatter, scatter3d)
        title (str): Title for the graph
        sql_query (str): SQL query to fetch data for the graph
        justification (str): Statistical justification including numerical test results
        extra (Optional[Dict]): Additional parameters like axis labels
    
    Returns:
        dict: Addition result with new graph details
    """
    try:
        # Prepare the graph configuration
        graph_config = {
            "type": type,
            "title": title,
            "sql_query": sql_query,
            "justification": justification
        }
        
        if extra:
            graph_config["extra"] = extra
        
        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"{BASE_URL}/api/graphs/",
                json=graph_config,
                timeout=30.0
            )
            response.raise_for_status()
            
            new_graph = response.json()
            
            return {
                "status": "success",
                "timestamp": datetime.now().isoformat(),
                "new_graph_id": new_graph.get("id"),
                "graph": new_graph,
                "message": f"Successfully added graph: {title}"
            }
    
    except httpx.HTTPStatusError as e:
        print('ERROR!!!!')
        print(e)
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "new_graph_id": None,
            "graph": None,
            "message": f"HTTP {e.response.status_code} error: {str(e)}"
        }
    except Exception as e:
        print(e)
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "new_graph_id": None,
            "graph": None,
            "message": f"Error adding graph: {str(e)}"
        }

@tool
async def update_graph(graph_id: str, type: str, sql_query: str, justification: str, title: Optional[str] = None, extra: Optional[Dict] = None) -> dict:
    """
    Update an existing graph's configuration with statistical justification.
    
    Args:
        graph_id (str): ID of graph to update
        type (str): New graph type (bar, line, pie, area, scatter, scatter3d)
        sql_query (str): New SQL query for the graph
        justification (str): Statistical justification including numerical test results
        title (Optional[str]): New title for the graph
        extra (Optional[Dict]): Additional parameters like axis labels
    
    Returns:
        dict: Update result with changes made
    """
    try:
        # Prepare update data
        update_data = {
            "type": type,
            "sql_query": sql_query,
            "justification": justification
        }
        
        if title:
            update_data["title"] = title
        
        if extra:
            update_data["extra"] = extra
        
        async with httpx.AsyncClient() as client:
            response = await client.put(
                f"{BASE_URL}/api/graphs/{graph_id}",
                json=update_data,
                timeout=30.0
            )
            response.raise_for_status()
            
            updated_graph = response.json()
            
            return {
                "status": "success",
                "timestamp": datetime.now().isoformat(),
                "updated_graph_id": graph_id,
                "graph": updated_graph,
                "changes": update_data,
                "message": f"Successfully updated graph {graph_id}"
            }
    
    except httpx.HTTPStatusError as e:
        if e.response.status_code == 404:
            return {
                "status": "error",
                "timestamp": datetime.now().isoformat(),
                "updated_graph_id": None,
                "message": f"Graph {graph_id} not found"
            }
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "updated_graph_id": None,
            "message": f"HTTP {e.response.status_code} error: {str(e)}"
        }
    except Exception as e:
        return {
            "status": "error",
            "timestamp": datetime.now().isoformat(),
            "updated_graph_id": None,
            "message": f"Error updating graph: {str(e)}"
        }

# ============= MODEL & AGENT CONFIGURATION =============

def create_agent(api_key: str):
    """
    Create the financial analysis agent with API tools.
    
    Args:
        api_key (str): Anthropic API key
    
    Returns:
        Agent: Configured agent instance
    """
    
    model = AnthropicModel(
        client_args={
            "api_key": api_key,
        },
        max_tokens=4096,
        model_id="claude-sonnet-4-20250514",  # Updated to use available model
        params={
            "temperature": 0.3,
        },
    )
    
    # Only include the API-calling tools
    agent = Agent(
        model=model, 
        tools=[
            get_recent_financial_data,
            get_current_graphs,
            remove_graph,
            add_new_graph,
            update_graph
        ],
        system_prompt=get_enhanced_financial_prompt(4),
    )
    
    return agent

# ============= ENHANCED PROMPT WITH STATISTICAL ANALYSIS =============

def get_enhanced_financial_prompt(max_graphs: int) -> str:
    """
    Returns the comprehensive financial analysis prompt with embedded statistical analysis logic.
    
    Args:
        max_graphs (int): Maximum number of graphs to maintain
        
    Returns:
        str: Detailed financial analysis prompt with statistical requirements
    """
    return f"""
You are a sophisticated Financial Data Analysis Agent with advanced statistical capabilities. Execute comprehensive graph management with rigorous statistical justification for every decision.

## STATISTICAL ANALYSIS REQUIREMENTS

### When analyzing financial data, you MUST perform these statistical tests:

1. **Descriptive Statistics**: Calculate mean, median, std dev, quartiles, skewness, kurtosis
2. **Outlier Detection**: Use Z-score analysis (threshold=3.0) to identify anomalous transactions
3. **Trend Analysis**: Perform linear regression for time series (R², p-values, slope significance)
4. **Distribution Analysis**: Calculate entropy and distribution evenness for categories
5. **Correlation Testing**: Pearson and Spearman correlation with significance tests
6. **Variance Analysis**: Coefficient of variation to measure relative variability

### Justification Format Requirements:

For every `add_new_graph()` and `update_graph()` call, the `justification` parameter MUST include:

```
Statistical Analysis Results:
- Sample size: [N] observations
- Key metric statistics: mean=[X], std=[Y], CV=[Z]%
- Outlier detection: [N] outliers ([X]%) detected using Z-score>3.0
- Trend analysis: R²=[X], p-value=[Y], slope=[Z] ([significant/not significant])
- Distribution: entropy=[X], evenness=[even/moderate/skewed]
- Correlation: r=[X], p=[Y] ([strong/moderate/weak] and [significant/not significant])
- Business impact: [High/Medium/Low] - [specific reason]
```

## GRAPH TYPES & SQL FORMAT REQUIREMENTS

### Table
Always use the table 'events'. The SQL query should always use FROM events.

### Queries
For accessing properties in events, use the format properties->>'property_name' for text and (properties->>'property_name')::NUMERIC for numeric values.

### Available Graph Types:
1. **bar** - Category vs Value comparison
   - SQL Format: (category TEXT, value NUMERIC)
   - Use for: Department expenses, regional comparisons, categorical metrics
   - Extra params: y_axis_label

2. **line** - Time series trends  
   - SQL Format: (time INT8, value NUMERIC)
   - Use for: Revenue over time, customer growth, trend analysis
   - Extra params: x_axis_label, y_axis_label

3. **pie** - Composition/Share of whole
   - SQL Format: (slice TEXT, value NUMERIC)  
   - Use for: Revenue breakdown, market share, portfolio composition

4. **area** - Time series with filled area
   - SQL Format: (time INT8, value NUMERIC)
   - Use for: Cumulative trends, volume over time
   - Extra params: x_axis_label, y_axis_label

5. **scatter** - Correlation/Distribution analysis
   - SQL Format: (x_value NUMERIC, y_value NUMERIC, size NUMERIC NULL, label TEXT NULL)
   - Use for: Risk vs return, cost vs volume correlations
   - Extra params: x_axis_label, y_axis_label
   
## ENHANCED WORKFLOW EXECUTION WITH STATISTICAL ANALYSIS

### 1. CURRENT GRAPH ANALYSIS & STATISTICAL VALIDATION

**Step 1a: Get Current State**
- Use `get_current_graphs()` to retrieve all existing graphs
- Analyze each graph's statistical validity and current performance

**Step 1b: Statistical Re-evaluation of Existing Graphs**
For each existing graph, mentally perform:
- Freshness analysis: Is the underlying pattern still statistically significant?
- Efficiency analysis: Can the SQL query capture stronger statistical relationships?
- Type optimization: Does the current graph type best represent the statistical pattern?

If statistical improvements are identified, use `update_graph()` with full statistical justification.

### 2. COMPREHENSIVE STATISTICAL DATA ANALYSIS

**Step 2a: Data Acquisition & Preparation**
- Use `get_recent_financial_data(limit=16)` to fetch latest financial records
- Prepare data for statistical analysis

**Step 2b: Multi-Dimensional Statistical Discovery**
Systematically analyze for these patterns:

**ANOMALY DETECTION:**
- Z-score analysis for transaction amounts (identify values >3σ from mean)
- Frequency anomalies in categorical data
- Time-based clustering analysis

**TREND & CORRELATION ANALYSIS:**
- Time series regression analysis (R², significance tests)
- Cross-variable correlation matrices
- Leading indicator identification

**DISTRIBUTION PATTERNS:**
- Categorical distribution entropy calculations
- Variance decomposition analysis
- Seasonal/cyclical pattern detection

### 3. STATISTICAL-BASED GRAPH PRIORITIZATION

**Step 3a: Statistical Significance Ranking**
Rank discovered patterns by:
- **Statistical Significance**: p-values < 0.05 get priority
- **Effect Size**: R² > 0.5 for correlations, Cohen's d > 0.8 for differences
- **Business Impact**: Revenue/risk implications
- **Novelty Score**: Unique insights not currently displayed

**Step 3b: Rigorous Graph Configuration**
For each high-priority pattern:
- Select graph type based on statistical characteristics
- Design SQL queries optimized for statistical clarity
- Calculate comprehensive justification metrics
- Ensure statistical assumptions are met

### 4. STRATEGIC PORTFOLIO MANAGEMENT WITH STATISTICS

**Step 4a: Portfolio Statistical Assessment**
- Evaluate current graphs' statistical strength and relevance
- Identify redundant or statistically weak visualizations
- Assess coverage across fraud detection, performance metrics, and market analysis

**Step 4b: Evidence-Based Graph Management**
- Remove graphs with weakest statistical foundation if at capacity
- Add new graphs with strongest statistical justification
- Always include complete statistical justification in API calls

### 5. DETAILED STATISTICAL REPORTING

Document your complete analysis including:
- **Statistical Summary**: All test results and significance levels
- **Decision Matrix**: How statistical evidence influenced graph choices
- **Methodology**: Which statistical tests were applied and why
- **Confidence Levels**: Uncertainty quantification for each insight
- **Actionable Recommendations**: Business decisions supported by statistical evidence

## CRITICAL STATISTICAL REQUIREMENTS

1. **Mandatory Justification**: Every `add_new_graph()` and `update_graph()` call MUST include detailed statistical justification with numerical results
2. **Significance Testing**: Only create graphs for statistically significant patterns (p < 0.05 preferred)
3. **Effect Size**: Prioritize patterns with meaningful effect sizes (R² > 0.3, Cohen's d > 0.5)
4. **Sample Size Validation**: Acknowledge limitations when N < 30
5. **Multiple Testing**: Consider Bonferroni correction when testing multiple hypotheses
6. **Statistical Assumptions**: Verify normality, independence, and homoscedasticity as appropriate

Execute this statistically-rigorous workflow to maintain an evidence-based financial intelligence dashboard with maximum analytical value.
"""

# ============= MAIN EXECUTION FUNCTIONS =============

def run_graph_management_agent(
    api_key: str,
    interval_seconds: int = 20, 
    max_iterations: Optional[int] = 1, 
    max_graphs: int = 4
):
    """
    Runs the enhanced financial graph management agent continuously.
    
    Args:
        api_key (str): Anthropic API key
        interval_seconds (int): How often to run the agent (default: 20 seconds)
        max_iterations (Optional[int]): Maximum number of iterations (None for infinite)
        max_graphs (int): Maximum number of graphs to maintain (default: 6)
    """
    print(f"🚀 Starting Enhanced Statistical Financial Analysis Agent...")
    print(f"📊 Update interval: {interval_seconds} seconds")
    print(f"📈 Max graphs: {max_graphs}")
    print(f"🔄 Max iterations: {'Unlimited' if max_iterations is None else max_iterations}")
    print(f"🌐 API Base URL: {BASE_URL}")
    print(f"📊 Statistical Analysis: Advanced statistical tests enabled")
    print(f"⏰ Start time: {datetime.now()}")
    print("=" * 80)
    
    # Create agent with API key
    agent = create_agent(api_key)
    
    iteration = 0
    
    try:
        while max_iterations is None or iteration < max_iterations:
            iteration += 1
            
            print(f"\n{'='*15} 📊 STATISTICAL ANALYSIS CYCLE {iteration} {'='*15}")
            print(f"🕐 Starting comprehensive statistical analysis at {datetime.now()}")
            
            try:
                result = agent("Start the statistical workflow with comprehensive analysis and justification for every graph decision")
                print(f"✅ [CYCLE {iteration}] Statistical financial analysis completed successfully")
                
            except Exception as e:
                print(f"❌ [CYCLE {iteration}] Agent error: {e}")
            
            if max_iterations is None or iteration < max_iterations:
                print(f"⏳ [CYCLE {iteration}] Waiting {interval_seconds} seconds until next analysis...")
                print("-" * 80)
                time.sleep(interval_seconds)
    
    except KeyboardInterrupt:
        print(f"\n\n🛑 Received interrupt signal. Stopping after {iteration} iterations.")
        print(f"🏁 End time: {datetime.now()}")
    except Exception as e:
        print(f"\n\n💥 Unexpected error in analysis loop: {e}")
        print(f"🏁 Stopped after {iteration} iterations at {datetime.now()}")

# ============= MAIN ENTRY POINT =============

if __name__ == "__main__":
    # Get API key from environment or prompt user
    api_key = os.getenv("ANTHROPIC_API_KEY", "")
    
    if not api_key:
        print("⚠️  No ANTHROPIC_API_KEY found in environment variables.")
        api_key = input("Please enter your Anthropic API key: ").strip()
    
    if not api_key:
        print("❌ API key is required to run the agent.")
        exit(1)
    
    print("🎯 Running Enhanced Statistical Financial Analysis Agent for 10 iterations...")
    run_graph_management_agent(
        api_key=api_key,
        interval_seconds=20, 
        max_iterations=10, 
        max_graphs=4
    )