# Trends.Earth API - System Monitoring & Rate Limiting

This notebook focuses on testing system monitoring, rate limiting, and administrative features of the Trends.Earth API.

## Table of Contents
1. [Setup and Configuration](#setup)
2. [System Status Monitoring](#system-status)
3. [Docker Swarm Monitoring](#swarm-monitoring)
4. [Rate Limiting Tests](#rate-limiting)
5. [Performance Testing](#performance)

## Setup and Configuration {#setup}

In [None]:
# Import the shared utilities
import time

from IPython.display import HTML, display
import matplotlib.pyplot as plt
import pandas as pd
from trends_earth_api_utils import (
    TEST_USERS,
    TrendsEarthAPIClient,
    display_system_overview,
    get_rate_limit_status,
    get_swarm_status,
    get_system_status,
    test_rate_limiting,
)

# Configuration
API_URL = "http://localhost:5000"  # Update this for your environment

print(f"🌍 Trends.Earth API URL: {API_URL}")

# Initialize and login as admin (required for most monitoring functions)
client = TrendsEarthAPIClient(API_URL)
admin_user = TEST_USERS["admin"]
login_result = client.login(admin_user["email"], admin_user["password"])

if login_result:
    print(f"✅ Logged in as admin: {admin_user['email']}")
else:
    print("❌ Admin login failed - some tests will be skipped")
    print("   System monitoring typically requires admin privileges")

## System Status Monitoring {#system-status}

In [None]:
# Get comprehensive system overview
print("🖥️  Getting system overview...")
display_system_overview(client)

print("\n" + "=" * 50)

In [None]:
# Get detailed system status logs
print("📊 Getting system status history...")

status_logs = get_system_status(client, per_page=20, sort="-timestamp")

if status_logs:
    print(f"✅ Retrieved {len(status_logs)} status log entries")

    # Create status history table
    status_df = pd.DataFrame(
        [
            {
                "Timestamp": log.get("timestamp", "")[:19]
                if log.get("timestamp")
                else "N/A",
                "Active Executions": log.get("executions_active", 0),
                "Running Executions": log.get("executions_running", 0),
                "Ready Executions": log.get("executions_ready", 0),
                "Total Users": log.get("users_count", 0),
                "Total Scripts": log.get("scripts_count", 0),
            }
            for log in status_logs[:10]
        ]
    )  # Show last 10 entries

    display(HTML(status_df.to_html(index=False)))

    # Analyze trends
    if len(status_logs) > 1:
        latest = status_logs[0]
        previous = status_logs[1]

        print("\n📈 Recent Trends:")

        active_change = latest.get("executions_active", 0) - previous.get(
            "executions_active", 0
        )
        running_change = latest.get("executions_running", 0) - previous.get(
            "executions_running", 0
        )
        users_change = latest.get("users_count", 0) - previous.get("users_count", 0)

        print(
            f"   Active Executions: {latest.get('executions_active', 0)} ({'+' if active_change >= 0 else ''}{active_change})"
        )
        print(
            f"   Running Executions: {latest.get('executions_running', 0)} ({'+' if running_change >= 0 else ''}{running_change})"
        )
        print(
            f"   Total Users: {latest.get('users_count', 0)} ({'+' if users_change >= 0 else ''}{users_change})"
        )
else:
    print("❌ Could not retrieve system status logs")

In [None]:
# Visualize system metrics over time (if matplotlib is available and we have data)
if status_logs and len(status_logs) > 5:
    print("📊 Creating system metrics visualization...")

    try:
        # Prepare data for visualization
        timestamps = []
        active_executions = []
        running_executions = []
        ready_executions = []

        for log in reversed(status_logs[:10]):  # Last 10, oldest first
            if log.get("timestamp"):
                timestamps.append(log["timestamp"][:16])  # YYYY-MM-DD HH:MM
                active_executions.append(log.get("executions_active", 0))
                running_executions.append(log.get("executions_running", 0))
                ready_executions.append(log.get("executions_ready", 0))

        if timestamps:
            # Create the plot
            fig, ax = plt.subplots(figsize=(12, 6))

            ax.plot(
                range(len(timestamps)),
                active_executions,
                "b-o",
                label="Active Executions",
                markersize=4,
            )
            ax.plot(
                range(len(timestamps)),
                running_executions,
                "g-s",
                label="Running Executions",
                markersize=4,
            )
            ax.plot(
                range(len(timestamps)),
                ready_executions,
                "r-^",
                label="Ready Executions",
                markersize=4,
            )

            ax.set_xlabel("Time")
            ax.set_ylabel("Number of Executions")
            ax.set_title("System Execution Metrics Over Time")
            ax.legend()
            ax.grid(True, alpha=0.3)

            # Set x-axis labels (show every other timestamp to avoid crowding)
            step = max(1, len(timestamps) // 5)
            ax.set_xticks(range(0, len(timestamps), step))
            ax.set_xticklabels(
                [timestamps[i] for i in range(0, len(timestamps), step)], rotation=45
            )

            plt.tight_layout()
            plt.show()

            print("✅ System metrics visualization created")
        else:
            print("⚠️  No valid timestamps for visualization")

    except Exception as e:
        print(f"⚠️  Could not create visualization: {e}")
else:
    print("⚠️  Insufficient data for visualization (need at least 5 status logs)")

## Docker Swarm Monitoring {#swarm-monitoring}

In [None]:
# Get Docker Swarm status
print("🐳 Getting Docker Swarm status...")

swarm_info = get_swarm_status(client)

if swarm_info:
    print("✅ Docker Swarm information retrieved:")

    # Display swarm overview
    swarm_df = pd.DataFrame(
        [
            {
                "Metric": k.replace("_", " ").title(),
                "Value": str(v) if v is not None else "N/A",
            }
            for k, v in swarm_info.items()
        ]
    )

    display(HTML(swarm_df.to_html(index=False)))

    # Analyze swarm health
    print("\n🏥 Swarm Health Analysis:")

    is_active = swarm_info.get("swarm_active", False)
    total_nodes = swarm_info.get("total_nodes", 0)
    managers = swarm_info.get("total_managers", 0)
    workers = swarm_info.get("total_workers", 0)

    if is_active:
        print("   ✅ Swarm is active")

        if managers >= 1:
            print(f"   ✅ Sufficient managers ({managers})")
        else:
            print(f"   ⚠️  Low manager count ({managers})")

        if total_nodes > 0:
            print(f"   ✅ Nodes available ({total_nodes} total, {workers} workers)")
        else:
            print("   ⚠️  No nodes detected")

        # Check for any additional swarm metrics
        if "services" in swarm_info:
            services = swarm_info["services"]
            if isinstance(services, list):
                print(f"   📋 Services running: {len(services)}")
            elif isinstance(services, int):
                print(f"   📋 Services running: {services}")

        if "tasks" in swarm_info:
            tasks = swarm_info["tasks"]
            if isinstance(tasks, (list, dict)):
                task_count = (
                    len(tasks) if isinstance(tasks, list) else tasks.get("total", 0)
                )
                print(f"   🎯 Active tasks: {task_count}")
    else:
        print("   ❌ Swarm is not active")
else:
    print("❌ Could not retrieve Docker Swarm status")
    print("   This may be normal if not running in swarm mode")

## Rate Limiting Tests {#rate-limiting}

In [None]:
# Get current rate limiting status
print("🚦 Getting rate limiting status...")

rate_status = get_rate_limit_status(client)

if rate_status:
    print("✅ Rate limiting status retrieved:")

    # Display rate limit information
    if isinstance(rate_status, dict):
        rate_df = pd.DataFrame(
            [
                {
                    "Metric": k.replace("_", " ").title(),
                    "Value": str(v) if v is not None else "N/A",
                }
                for k, v in rate_status.items()
            ]
        )

        display(HTML(rate_df.to_html(index=False)))
    else:
        print(f"   Status: {rate_status}")
else:
    print("❌ Could not retrieve rate limiting status")
    print("   This feature may require superadmin privileges")

In [None]:
# Test rate limiting with different request patterns
print("🧪 Testing rate limiting patterns...")

# Test 1: Burst requests
print("\n1️⃣  Burst Request Test (10 requests, no delay):")
burst_results = test_rate_limiting(
    client, endpoint="/user/me", requests_count=10, delay=0
)

# Analyze burst results
burst_rate_limited = sum(1 for r in burst_results if r.get("rate_limited", False))
burst_avg_time = sum(r.get("response_time", 0) for r in burst_results) / len(
    burst_results
)

print(f"   Rate limited requests: {burst_rate_limited}/{len(burst_results)}")
print(f"   Average response time: {burst_avg_time:.2f}ms")

In [None]:
# Test 2: Sustained requests
print("\n2️⃣  Sustained Request Test (15 requests, 0.5s delay):")
sustained_results = test_rate_limiting(
    client, endpoint="/user/me", requests_count=15, delay=0.5
)

# Analyze sustained results
sustained_rate_limited = sum(
    1 for r in sustained_results if r.get("rate_limited", False)
)
sustained_avg_time = sum(r.get("response_time", 0) for r in sustained_results) / len(
    sustained_results
)

print(f"   Rate limited requests: {sustained_rate_limited}/{len(sustained_results)}")
print(f"   Average response time: {sustained_avg_time:.2f}ms")

In [None]:
# Test 3: Different endpoints
print("\n3️⃣  Multi-endpoint Test:")

test_endpoints = [
    ("/user/me", "User Profile"),
    ("/script", "Script List"),
    ("/execution/user", "User Executions"),
]

endpoint_results = {}

for endpoint, name in test_endpoints:
    print(f"\n   Testing {name} ({endpoint}):")
    results = test_rate_limiting(client, endpoint=endpoint, requests_count=8, delay=0.2)

    rate_limited = sum(1 for r in results if r.get("rate_limited", False))
    avg_time = sum(r.get("response_time", 0) for r in results) / len(results)

    endpoint_results[name] = {
        "total": len(results),
        "rate_limited": rate_limited,
        "avg_response_time": avg_time,
    }

    print(
        f"     Rate limited: {rate_limited}/{len(results)} ({(rate_limited / len(results) * 100):.1f}%)"
    )
    print(f"     Avg response: {avg_time:.2f}ms")

In [None]:
# Create rate limiting summary
print("\n📊 Rate Limiting Test Summary:")
print("=" * 40)

# Summary table
summary_data = [
    {
        "Test Type": "Burst (no delay)",
        "Requests": len(burst_results),
        "Rate Limited": burst_rate_limited,
        "Rate Limited %": f"{(burst_rate_limited / len(burst_results) * 100):.1f}%",
        "Avg Response (ms)": f"{burst_avg_time:.2f}",
    },
    {
        "Test Type": "Sustained (0.5s delay)",
        "Requests": len(sustained_results),
        "Rate Limited": sustained_rate_limited,
        "Rate Limited %": f"{(sustained_rate_limited / len(sustained_results) * 100):.1f}%",
        "Avg Response (ms)": f"{sustained_avg_time:.2f}",
    },
]

summary_df = pd.DataFrame(summary_data)
display(HTML(summary_df.to_html(index=False)))

# Endpoint comparison
if endpoint_results:
    print("\n🎯 Endpoint Comparison:")
    endpoint_df = pd.DataFrame(
        [
            {
                "Endpoint": name,
                "Total Requests": data["total"],
                "Rate Limited": data["rate_limited"],
                "Rate Limited %": f"{(data['rate_limited'] / data['total'] * 100):.1f}%",
                "Avg Response (ms)": f"{data['avg_response_time']:.2f}",
            }
            for name, data in endpoint_results.items()
        ]
    )

    display(HTML(endpoint_df.to_html(index=False)))

## Performance Testing {#performance}

In [None]:
# Test concurrent requests with multiple clients
print("🚀 Testing concurrent request performance...")


def make_concurrent_requests(client_id, num_requests=5):
    """Make concurrent requests from a single client"""
    test_client = TrendsEarthAPIClient(API_URL)
    regular_user = TEST_USERS["regular"]

    # Login
    login_success = test_client.login(regular_user["email"], regular_user["password"])
    if not login_success:
        return {"client_id": client_id, "error": "Login failed"}

    results = []
    for i in range(num_requests):
        try:
            start_time = time.time()
            response = test_client.make_request("GET", "/user/me")
            end_time = time.time()

            results.append(
                {
                    "request": i + 1,
                    "status_code": response.status_code,
                    "response_time": (end_time - start_time) * 1000,
                    "success": response.status_code < 400,
                }
            )
        except Exception as e:
            results.append({"request": i + 1, "error": str(e), "success": False})

        time.sleep(0.1)  # Small delay between requests

    # Logout
    test_client.logout()

    return {"client_id": client_id, "results": results}


# Run concurrent tests with 3 clients
print("📊 Starting concurrent test with 3 clients (5 requests each)...")

import concurrent.futures

concurrent_results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    futures = [executor.submit(make_concurrent_requests, i, 5) for i in range(3)]
    concurrent_results = [
        future.result() for future in concurrent.futures.as_completed(futures)
    ]

# Analyze concurrent results
print("\n📈 Concurrent Request Analysis:")

total_requests = 0
total_successful = 0
total_response_time = 0
response_times = []

for client_result in concurrent_results:
    client_id = client_result["client_id"]

    if "error" in client_result:
        print(f"   Client {client_id}: {client_result['error']}")
        continue

    results = client_result["results"]
    successful = sum(1 for r in results if r.get("success", False))
    avg_time = sum(
        r.get("response_time", 0) for r in results if "response_time" in r
    ) / len(results)

    print(
        f"   Client {client_id}: {successful}/{len(results)} successful, avg {avg_time:.2f}ms"
    )

    total_requests += len(results)
    total_successful += successful

    for r in results:
        if "response_time" in r:
            response_times.append(r["response_time"])

if response_times:
    avg_response_time = sum(response_times) / len(response_times)
    min_response_time = min(response_times)
    max_response_time = max(response_times)

    print("\n🎯 Overall Performance:")
    print(f"   Total Requests: {total_requests}")
    print(
        f"   Successful: {total_successful} ({(total_successful / total_requests * 100):.1f}%)"
    )
    print(f"   Average Response Time: {avg_response_time:.2f}ms")
    print(f"   Min Response Time: {min_response_time:.2f}ms")
    print(f"   Max Response Time: {max_response_time:.2f}ms")
else:
    print("❌ No valid response times collected")

In [None]:
# Test system under different load patterns
print("\n⚡ Testing system responsiveness under load...")

# Baseline test - single request
print("\n📊 Baseline Performance:")
baseline_start = time.time()
try:
    response = client.make_request("GET", "/user/me")
    baseline_time = (time.time() - baseline_start) * 1000
    print(f"   Single request: {baseline_time:.2f}ms (status: {response.status_code})")
except Exception as e:
    print(f"   Baseline test failed: {e}")
    baseline_time = 0

# Load test - rapid sequential requests
print("\n🔄 Load Test (20 sequential requests):")
load_times = []
load_start = time.time()

for i in range(20):
    try:
        req_start = time.time()
        response = client.make_request("GET", "/user/me")
        req_time = (time.time() - req_start) * 1000
        load_times.append(req_time)

        if response.status_code == 429:
            print(f"   Request {i + 1}: Rate limited after {req_time:.2f}ms")
        elif i % 5 == 0:  # Print every 5th request
            print(f"   Request {i + 1}: {req_time:.2f}ms")

    except Exception as e:
        print(f"   Request {i + 1}: Error - {e}")

    time.sleep(0.05)  # 50ms delay

total_load_time = (time.time() - load_start) * 1000

if load_times:
    avg_load_time = sum(load_times) / len(load_times)
    print("\n📊 Load Test Results:")
    print(f"   Requests completed: {len(load_times)}/20")
    print(f"   Total time: {total_load_time:.2f}ms")
    print(f"   Average per request: {avg_load_time:.2f}ms")
    print(
        f"   Performance vs baseline: {(avg_load_time / baseline_time):.2f}x"
        if baseline_time > 0
        else "N/A"
    )
    print(
        f"   Throughput: {len(load_times) / (total_load_time / 1000):.2f} requests/sec"
    )

In [None]:
# Final system status check
print("\n🔍 Final system status check after testing...")

# Get fresh system status
final_status = get_system_status(client, per_page=1, sort="-timestamp")
if final_status:
    latest_status = final_status[0]
    print("📊 Current System State:")
    print(f"   Active Executions: {latest_status.get('executions_active', 0)}")
    print(f"   Running Executions: {latest_status.get('executions_running', 0)}")
    print(f"   Ready Executions: {latest_status.get('executions_ready', 0)}")
    print(f"   Timestamp: {latest_status.get('timestamp', 'N/A')}")

# Summary
print("\n📊 SYSTEM MONITORING & RATE LIMITING TEST SUMMARY")
print("=" * 65)
print("✅ System status monitoring tested")
print("✅ Docker Swarm monitoring tested")
print("✅ Rate limiting functionality tested")
print("✅ Performance under various loads tested")
print("✅ Concurrent request handling tested")

# Logout
client.logout()
print("\n🎉 System Monitoring and Rate Limiting tests completed!")