# Autonomous Incident-to-Resolution Ops Agent Demo

This notebook demonstrates the end-to-end flow of the autonomous agent system.
It simulates an infrastructure incident, detects it, diagnoses the root cause, and executes a remediation action.

In [None]:
import sys
import os
# Add parent directory to path to import modules
sys.path.append('..')

from simulation.grafana import MockGrafana
from simulation.infrastructure import MockInfrastructure
from simulation.ticketing import MockTicketing
from agents.monitor import MonitorAgent
from agents.diagnoser import DiagnoserAgent
from agents.remediator import RemediationAgent
from agents.auditor import AuditorAgent
from agents.safety import SafetyLayer

## 1. Initialize System Components
We set up the mock environment and the agents.

In [None]:
grafana = MockGrafana()
infra = MockInfrastructure()
ticketing = MockTicketing()

monitor = MonitorAgent(grafana)
diagnoser = DiagnoserAgent(infra)
safety = SafetyLayer()
remediator = RemediationAgent(infra, safety)
auditor = AuditorAgent(ticketing)

service_name = "payment-service"
print("System Initialized.")

## 2. Check Normal State
Verify that the service is currently healthy.

In [None]:
issues = monitor.check_health(service_name)
if not issues:
    print(f"{service_name} is Healthy.")
else:
    print(f"{service_name} has issues: {issues}")

## 3. Inject Anomaly
We simulate a memory leak by injecting high memory usage metrics and an OutOfMemory error log.

In [None]:
print("Injecting Memory Leak...")
grafana.inject_anomaly(service_name, "memory", 95.0)
infra.inject_log_error(service_name, "java.lang.OutOfMemoryError: Java heap space")
print("Anomaly Injected.")

## 4. Run Agent Loop
The agents will now detect, diagnose, and fix the issue.

In [None]:
# Monitor
issues = monitor.check_health(service_name)
if issues:
    print(f"[Monitor] ALERT: Detected {issues}")
    
    # Auditor (Create Ticket)
    ticket_id = auditor.log_incident(service_name, issues)
    print(f"[Auditor] Created Ticket: {ticket_id}")
    
    # Diagnoser
    diagnosis = diagnoser.diagnose(service_name, issues)
    auditor.log_diagnosis(ticket_id, diagnosis)
    print(f"[Diagnoser] Root Cause: {diagnosis['root_cause']}")
    print(f"[Diagnoser] Recommended Action: {diagnosis['recommended_action']}")
    
    # Remediator
    action = diagnosis["recommended_action"]
    print(f"[Remediator] Attempting to execute: {action}")
    success = remediator.remediate(service_name, action)
    auditor.log_action(ticket_id, action, success)
    
    if success:
        print("[Remediator] Action executed successfully.")
    else:
        print("[Remediator] Action failed.")
else:
    print("No issues detected.")

## 5. Verify Resolution
Check the ticket status and the service logs.

In [None]:
ticket = ticketing.get_ticket(ticket_id)
print(f"Ticket Status: {ticket['status']}")
print("Ticket History:")
for c in ticket['comments']:
    print(f" - {c['text']}")

print("\nRecent Logs:")
print(infra.get_logs(service_name, lines=3))