In [None]:
print('Setup complete.')

# Reliability in Practice - Demo with GPT-5-Mini

**Focus**: retries with jitter, timeouts, concurrency, budget caps using GPT-5-Mini via AskSage

This notebook demonstrates practical techniques for building reliable AI applications that can handle failures gracefully and operate within budget constraints using GPT-5-Mini through the AskSage platform.

## Learning Objectives
- Implement retry mechanisms with exponential backoff and jitter
- Add timeouts to prevent hanging operations
- Manage concurrent API calls safely
- Implement budget caps and cost monitoring
- Build resilient error handling patterns
- Use GPT-5-Mini via AskSage for AI operations

In [None]:
# Install required packages for Google Colab
!pip install asksageclient tenacity asyncio aiohttp requests matplotlib pandas

import time
import random
import asyncio
import aiohttp
import os
from asksageclient import AskSageClient
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import concurrent.futures
from datetime import datetime, timedelta
import json
import logging
from dataclasses import dataclass
from typing import List, Dict, Optional, Callable
import matplotlib.pyplot as plt
import pandas as pd

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print("✅ All packages installed and modules imported successfully!")

In [None]:
# Initialize AskSage client with credentials
# Note: Set your environment variables or provide credentials directly
# ASKSAGE_API_KEY should be set in your environment
# ASKSAGE_BASE_URL should be set (e.g., 'https://api.asksage.ai')

try:
    ask_sage_client = AskSageClient(
        api_key=os.getenv('ASKSAGE_API_KEY'),
        base_url=os.getenv('ASKSAGE_BASE_URL', 'https://api.asksage.ai')
    )
    print("✅ AskSage client initialized successfully!")
    
    # Test the connection and get available models
    models_response = ask_sage_client.get_models()
    if 'response' in models_response:
        available_models = models_response['response']
        print(f"📋 Available models: {len(available_models)} total")
        
        # Check if gpt-5-mini is available
        if 'gpt-5-mini' in available_models:
            print("✅ GPT-5-Mini is available!")
        else:
            print("⚠️  GPT-5-Mini not found. Available models:")
            for model in available_models[:10]:  # Show first 10 models
                print(f"  - {model}")
            if len(available_models) > 10:
                print(f"  ... and {len(available_models) - 10} more")
    else:
        print("⚠️  Could not retrieve model list")
        
except Exception as e:
    print(f"❌ Error initializing AskSage client: {e}")
    print("Please ensure your ASKSAGE_API_KEY and ASKSAGE_BASE_URL are set correctly")

## 1. Retry Mechanisms with Jitter

Retries are essential for handling transient failures, but naive retry patterns can create thundering herd problems. Adding jitter helps distribute retry attempts.

In [None]:
# Simulate an unreliable AI API using GPT-5-Mini
class UnreliableAIAPI:
    def __init__(self, success_rate=0.7, use_gpt5_mini=True):
        self.success_rate = success_rate
        self.call_count = 0
        self.use_gpt5_mini = use_gpt5_mini
        
    def make_ai_request(self, prompt: str, task_type: str = "general"):
        self.call_count += 1
        
        # Simulate network delay
        time.sleep(random.uniform(0.1, 0.5))
        
        # Random failure based on success rate
        if random.random() > self.success_rate:
            failure_types = [
                (ConnectionError, "Network connection failed"),
                (TimeoutError, "Request timed out"),
                (ValueError, "Invalid request format"),
                (Exception, "Rate limit exceeded")
            ]
            error_type, error_msg = random.choice(failure_types)
            raise error_type(error_msg)
        
        # Make actual AI request using GPT-5-Mini
        try:
            if self.use_gpt5_mini:
                # Create a system prompt based on task type
                system_prompts = {
                    "analysis": "You are an expert analyst. Provide concise, factual analysis.",
                    "creative": "You are a creative assistant. Be imaginative and engaging.",
                    "technical": "You are a technical expert. Provide accurate, detailed explanations.",
                    "general": "You are a helpful AI assistant. Be clear and informative."
                }
                
                system_prompt = system_prompts.get(task_type, system_prompts["general"])
                
                # Use AskSage client to query GPT-5-Mini
                response = ask_sage_client.query(
                    message=prompt,
                    model="gpt-5-mini",  # Specify GPT-5-Mini
                    system_prompt=system_prompt,
                    max_tokens=150  # Keep responses concise for demo
                )
                
                if 'response' in response:
                    ai_result = response['response']
                else:
                    ai_result = "AI response received but format unexpected"
            else:
                # Fallback simulated response
                ai_result = f"Simulated AI response for: {prompt[:50]}..."
        
        except Exception as ai_error:
            logger.warning(f"AI API error: {ai_error}")
            ai_result = f"AI processing failed: {str(ai_error)[:100]}..."
        
        return {
            "prompt": prompt,
            "result": ai_result,
            "task_type": task_type,
            "timestamp": datetime.now().isoformat(),
            "call_number": self.call_count
        }

# Initialize our unreliable AI API
ai_api = UnreliableAIAPI(success_rate=0.6)  # 60% success rate

print("Unreliable AI API initialized with 60% success rate using GPT-5-Mini")

In [None]:
# Retry with exponential backoff and jitter for AI requests
@retry(
    stop=stop_after_attempt(5),
    wait=wait_exponential(multiplier=1, min=1, max=10) + wait_exponential(multiplier=0.1, min=0, max=2),
    retry=retry_if_exception_type((ConnectionError, TimeoutError))
)
def reliable_ai_call(prompt: str, task_type: str = "general"):
    logger.info(f"Attempting AI call for task '{task_type}': {prompt[:50]}...")
    return ai_api.make_ai_request(prompt, task_type)

# Test retry mechanism with various AI prompts
test_prompts = [
    ("Explain the concept of machine learning in simple terms.", "technical"),
    ("Write a creative short story about a robot learning to paint.", "creative"),
    ("Analyze the pros and cons of renewable energy sources.", "analysis")
]

results = []
failures = []

print("Testing retry mechanism with GPT-5-Mini...\n")

for i, (prompt, task_type) in enumerate(test_prompts, 1):
    try:
        start_time = time.time()
        result = reliable_ai_call(prompt, task_type)
        end_time = time.time()
        
        results.append({
            "prompt": prompt,
            "result": result,
            "duration": end_time - start_time,
            "success": True,
            "task_type": task_type
        })
        
        print(f"✅ Success for prompt {i} ({task_type}) after {end_time - start_time:.2f}s")
        print(f"   Response preview: {result['result'][:100]}...\n")
        
    except Exception as e:
        failures.append({"prompt": prompt, "error": str(e), "task_type": task_type})
        logger.error(f"❌ Failed for prompt {i} ({task_type}): {e}")

print(f"\n📊 Results: {len(results)} successes, {len(failures)} failures")
print(f"Total API calls made: {ai_api.call_count}")

## 2. Timeout Implementation

Timeouts prevent operations from hanging indefinitely and help maintain system responsiveness.

In [None]:
import signal
from contextlib import contextmanager

class TimeoutException(Exception):
    pass

@contextmanager
def timeout(duration):
    def timeout_handler(signum, frame):
        raise TimeoutException(f"Operation timed out after {duration} seconds")
    
    # Set the signal handler
    old_handler = signal.signal(signal.SIGALRM, timeout_handler)
    signal.alarm(duration)
    
    try:
        yield
    finally:
        # Restore the old handler
        signal.alarm(0)
        signal.signal(signal.SIGALRM, old_handler)

# Function with timeout wrapper for AI requests
def ai_call_with_timeout(prompt: str, task_type: str = "general", timeout_seconds: int = 10):
    try:
        with timeout(timeout_seconds):
            return ai_api.make_ai_request(prompt, task_type)
    except TimeoutException as e:
        logger.warning(f"Timeout for prompt '{prompt[:50]}...': {e}")
        raise

# Test timeout mechanism
print("Testing timeout mechanism with GPT-5-Mini...")
test_prompt = "Explain quantum computing and its potential applications in detail."

try:
    result = ai_call_with_timeout(test_prompt, "technical", timeout_seconds=8)
    print(f"✅ Call completed successfully!")
    print(f"📝 Response: {result['result'][:200]}...")
except TimeoutException as e:
    print(f"⏰ Timeout occurred: {e}")
except Exception as e:
    print(f"❌ Other error: {e}")

## 3. Concurrency Management

Managing concurrent requests helps balance throughput with resource constraints and rate limits.

In [None]:
# Concurrent AI request manager for GPT-5-Mini
class ConcurrentAIManager:
    def __init__(self, max_workers=5, rate_limit_per_second=10):
        self.max_workers = max_workers
        self.rate_limit = rate_limit_per_second
        self.last_request_times = []
        self.lock = threading.Lock()
        
    def _enforce_rate_limit(self):
        with self.lock:
            now = time.time()
            # Remove requests older than 1 second
            self.last_request_times = [t for t in self.last_request_times if now - t < 1.0]
            
            if len(self.last_request_times) >= self.rate_limit:
                sleep_time = 1.0 - (now - self.last_request_times[0])
                if sleep_time > 0:
                    time.sleep(sleep_time)
            
            self.last_request_times.append(time.time())
    
    def make_concurrent_requests(self, prompt_tasks):
        """
        Make concurrent AI requests.
        
        Args:
            prompt_tasks: List of (prompt, task_type) tuples
        """
        def single_request(prompt_task):
            prompt, task_type = prompt_task
            self._enforce_rate_limit()
            try:
                return reliable_ai_call(prompt, task_type)
            except Exception as e:
                return {"error": str(e), "prompt": prompt, "task_type": task_type}
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = {executor.submit(single_request, pt): pt for pt in prompt_tasks}
            results = []
            
            for future in concurrent.futures.as_completed(futures):
                try:
                    result = future.result()
                    results.append(result)
                except Exception as e:
                    prompt_task = futures[future]
                    results.append({"error": str(e), "prompt": prompt_task[0], "task_type": prompt_task[1]})
            
            return results

import threading

# Test concurrent requests with various AI tasks
manager = ConcurrentAIManager(max_workers=3, rate_limit_per_second=5)

# Create diverse test requests
test_requests = [
    ("Summarize the benefits of exercise.", "analysis"),
    ("Write a haiku about technology.", "creative"),
    ("Explain how neural networks work.", "technical"),
    ("What are the main causes of climate change?", "analysis"),
    ("Create a short dialogue between two AI assistants.", "creative"),
    ("Describe the process of photosynthesis.", "technical"),
    ("List pros and cons of remote work.", "analysis"),
    ("Write a limerick about coding.", "creative")
]

print(f"Testing concurrent processing of {len(test_requests)} AI requests...\n")

start_time = time.time()
concurrent_results = manager.make_concurrent_requests(test_requests)
end_time = time.time()

successful = [r for r in concurrent_results if "error" not in r]
failed = [r for r in concurrent_results if "error" in r]

print(f"\n📊 Concurrent processing completed in {end_time - start_time:.2f}s")
print(f"✅ Successful requests: {len(successful)}")
print(f"❌ Failed requests: {len(failed)}")
print(f"🔄 Total API calls: {ai_api.call_count}")

# Show sample successful responses
if successful:
    print("\n📋 Sample successful responses:")
    for i, result in enumerate(successful[:3]):
        print(f"  {i+1}. [{result.get('task_type', 'unknown')}] {result.get('result', 'N/A')[:100]}...")

## 4. Budget Caps and Cost Monitoring

Implementing budget controls prevents runaway costs and provides visibility into API usage.

In [None]:
@dataclass
class AIUsage:
    calls_made: int = 0
    total_cost: float = 0.0
    successful_calls: int = 0
    failed_calls: int = 0
    total_tokens: int = 0
    start_time: datetime = None

class BudgetManager:
    def __init__(self, daily_budget=50.0, cost_per_1k_tokens=0.0015):
        """Initialize budget manager.
        
        Args:
            daily_budget: Maximum daily budget in USD
            cost_per_1k_tokens: Cost per 1000 tokens for GPT-5-Mini
        """
        self.daily_budget = daily_budget
        self.cost_per_1k_tokens = cost_per_1k_tokens
        self.usage = AIUsage(start_time=datetime.now())
        self.lock = threading.Lock()
        
    def estimate_tokens(self, text: str) -> int:
        """Rough estimation of tokens (approximately 4 characters per token)"""
        return len(text) // 4
        
    def can_make_call(self, estimated_prompt_tokens: int = 100) -> bool:
        with self.lock:
            estimated_cost = (estimated_prompt_tokens / 1000) * self.cost_per_1k_tokens
            projected_cost = self.usage.total_cost + estimated_cost
            return projected_cost <= self.daily_budget
    
    def record_call(self, success: bool, prompt: str = "", response: str = ""):
        with self.lock:
            self.usage.calls_made += 1
            
            # Estimate tokens used
            prompt_tokens = self.estimate_tokens(prompt)
            response_tokens = self.estimate_tokens(response)
            total_tokens = prompt_tokens + response_tokens
            
            # Calculate cost
            call_cost = (total_tokens / 1000) * self.cost_per_1k_tokens
            
            self.usage.total_tokens += total_tokens
            self.usage.total_cost += call_cost
            
            if success:
                self.usage.successful_calls += 1
            else:
                self.usage.failed_calls += 1
    
    def get_usage_report(self) -> Dict:
        with self.lock:
            runtime = datetime.now() - self.usage.start_time
            return {
                "total_calls": self.usage.calls_made,
                "successful_calls": self.usage.successful_calls,
                "failed_calls": self.usage.failed_calls,
                "success_rate": self.usage.successful_calls / max(1, self.usage.calls_made) * 100,
                "total_cost": self.usage.total_cost,
                "total_tokens": self.usage.total_tokens,
                "budget_remaining": self.daily_budget - self.usage.total_cost,
                "budget_used_percent": (self.usage.total_cost / self.daily_budget) * 100,
                "runtime_minutes": runtime.total_seconds() / 60,
                "calls_per_minute": self.usage.calls_made / max(1, runtime.total_seconds() / 60),
                "avg_cost_per_call": self.usage.total_cost / max(1, self.usage.calls_made)
            }

# Budget-aware AI caller
class BudgetAwareAI:
    def __init__(self, budget_manager: BudgetManager):
        self.budget_manager = budget_manager
        
    def make_call(self, prompt: str, task_type: str = "general"):
        estimated_tokens = self.budget_manager.estimate_tokens(prompt) + 150  # Estimate response tokens
        
        if not self.budget_manager.can_make_call(estimated_tokens):
            raise ValueError("Budget limit reached - cannot make more calls")
        
        try:
            result = ai_api.make_ai_request(prompt, task_type)
            response_text = result.get('result', '')
            self.budget_manager.record_call(success=True, prompt=prompt, response=response_text)
            return result
        except Exception as e:
            self.budget_manager.record_call(success=False, prompt=prompt, response="")
            raise e

# Test budget management
budget_mgr = BudgetManager(daily_budget=2.0, cost_per_1k_tokens=0.0015)  # Small budget for testing
budget_ai = BudgetAwareAI(budget_mgr)

print("Testing budget management with GPT-5-Mini...\n")

test_prompts_budget = [
    "Explain artificial intelligence.",
    "Write a short poem about nature.",
    "Describe the solar system.",
    "What is machine learning?",
    "Create a simple recipe for cookies."
]

for i, prompt in enumerate(test_prompts_budget):
    try:
        result = budget_ai.make_call(prompt, "general")
        print(f"✅ Call {i+1} successful: {result['result'][:60]}...")
    except ValueError as e:
        print(f"💰 Budget limit reached at call {i+1}: {e}")
        break
    except Exception as e:
        print(f"❌ Call {i+1} failed: {e}")

# Print usage report
report = budget_mgr.get_usage_report()
print("\n📊 Budget Usage Report:")
print("=" * 40)
for key, value in report.items():
    if isinstance(value, float):
        if 'percent' in key or 'rate' in key:
            print(f"{key}: {value:.1f}%")
        elif 'cost' in key:
            print(f"{key}: ${value:.4f}")
        else:
            print(f"{key}: {value:.2f}")
    else:
        print(f"{key}: {value}")

## 5. Comprehensive Reliability System

Combining all techniques into a production-ready reliability system for GPT-5-Mini.

In [None]:
class ReliableAIClient:
    def __init__(self, budget_manager, max_workers=3, timeout_seconds=10):
        self.budget_manager = budget_manager
        self.max_workers = max_workers
        self.timeout_seconds = timeout_seconds
        self.metrics = {
            "requests_attempted": 0,
            "requests_successful": 0,
            "requests_failed": 0,
            "budget_limited": 0,
            "timeouts": 0,
            "retries_triggered": 0,
            "total_response_time": 0.0
        }
    
    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=1, max=5) + wait_exponential(multiplier=0.1, min=0, max=1),
        retry=retry_if_exception_type((ConnectionError, TimeoutError))
    )
    def _make_single_request(self, prompt: str, task_type: str = "general"):
        estimated_tokens = self.budget_manager.estimate_tokens(prompt) + 150
        
        if not self.budget_manager.can_make_call(estimated_tokens):
            self.metrics["budget_limited"] += 1
            raise ValueError("Budget limit exceeded")
        
        try:
            # Use timeout wrapper for the AI request
            with timeout(self.timeout_seconds):
                result = ai_api.make_ai_request(prompt, task_type)
            
            response_text = result.get('result', '')
            self.budget_manager.record_call(success=True, prompt=prompt, response=response_text)
            self.metrics["requests_successful"] += 1
            return result
            
        except TimeoutException:
            self.metrics["timeouts"] += 1
            self.budget_manager.record_call(success=False, prompt=prompt, response="")
            raise TimeoutError("Request timed out")
            
        except (ConnectionError, TimeoutError) as e:
            self.metrics["retries_triggered"] += 1
            self.budget_manager.record_call(success=False, prompt=prompt, response="")
            raise
            
        except Exception as e:
            self.budget_manager.record_call(success=False, prompt=prompt, response="")
            self.metrics["requests_failed"] += 1
            raise
    
    def process_batch(self, prompt_tasks):
        """
        Process a batch of (prompt, task_type) tuples.
        """
        results = []
        
        def process_single(prompt_task):
            prompt, task_type = prompt_task
            self.metrics["requests_attempted"] += 1
            start_time = time.time()
            
            try:
                result = self._make_single_request(prompt, task_type)
                duration = time.time() - start_time
                self.metrics["total_response_time"] += duration
                
                return {
                    "prompt": prompt,
                    "task_type": task_type,
                    "result": result,
                    "success": True,
                    "duration": duration,
                    "timestamp": datetime.now().isoformat()
                }
            except Exception as e:
                duration = time.time() - start_time
                self.metrics["total_response_time"] += duration
                
                return {
                    "prompt": prompt,
                    "task_type": task_type,
                    "error": str(e),
                    "success": False,
                    "duration": duration,
                    "timestamp": datetime.now().isoformat()
                }
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            futures = {executor.submit(process_single, pt): pt for pt in prompt_tasks}
            
            for future in concurrent.futures.as_completed(futures):
                results.append(future.result())
        
        return results
    
    def get_metrics(self):
        total_attempted = self.metrics["requests_attempted"]
        if total_attempted > 0:
            success_rate = (self.metrics["requests_successful"] / total_attempted) * 100
            failure_rate = (self.metrics["requests_failed"] / total_attempted) * 100
            avg_response_time = self.metrics["total_response_time"] / total_attempted
        else:
            success_rate = failure_rate = avg_response_time = 0
        
        return {
            **self.metrics,
            "success_rate_percent": success_rate,
            "failure_rate_percent": failure_rate,
            "avg_response_time_seconds": avg_response_time
        }

# Create a comprehensive test with GPT-5-Mini
comprehensive_budget = BudgetManager(daily_budget=5.0, cost_per_1k_tokens=0.0015)
reliable_client = ReliableAIClient(comprehensive_budget, max_workers=4)

# Create a diverse set of test tasks
comprehensive_test_batch = [
    ("Summarize the key principles of sustainable development.", "analysis"),
    ("Write a creative story about a time-traveling scientist.", "creative"),
    ("Explain how blockchain technology works.", "technical"),
    ("Compare and contrast different programming paradigms.", "technical"),
    ("Create a motivational speech for entrepreneurs.", "creative"),
    ("Analyze the impact of social media on society.", "analysis"),
    ("Describe the process of cellular respiration.", "technical"),
    ("Write a dialogue between a customer and support agent.", "creative"),
    ("Evaluate the pros and cons of electric vehicles.", "analysis"),
    ("Explain the concept of quantum entanglement simply.", "technical")
]

print("🚀 Running comprehensive reliability test with GPT-5-Mini...\n")
print(f"Processing {len(comprehensive_test_batch)} diverse AI tasks...\n")

start_time = time.time()
batch_results = reliable_client.process_batch(comprehensive_test_batch)
end_time = time.time()

# Analyze results
successful_results = [r for r in batch_results if r["success"]]
failed_results = [r for r in batch_results if not r["success"]]

print(f"\n🔍 Comprehensive Test Results:")
print("=" * 50)
print(f"Total processing time: {end_time - start_time:.2f} seconds")
print(f"✅ Successful requests: {len(successful_results)}")
print(f"❌ Failed requests: {len(failed_results)}")

# Show sample results by task type
if successful_results:
    print("\n📋 Sample Results by Task Type:")
    task_types = set(r['task_type'] for r in successful_results)
    for task_type in task_types:
        task_results = [r for r in successful_results if r['task_type'] == task_type]
        if task_results:
            sample = task_results[0]
            print(f"\n  [{task_type.upper()}] Sample Response:")
            print(f"  Prompt: {sample['prompt'][:60]}...")
            print(f"  Response: {sample['result']['result'][:120]}...")

# Print client metrics
print("\n📈 Client Performance Metrics:")
print("=" * 50)
metrics = reliable_client.get_metrics()
for key, value in metrics.items():
    if "percent" in key:
        print(f"{key}: {value:.1f}%")
    elif "time" in key and "seconds" in key:
        print(f"{key}: {value:.3f}s")
    elif isinstance(value, float):
        print(f"{key}: {value:.2f}")
    else:
        print(f"{key}: {value}")

# Print final budget report
print("\n💰 Final Budget Report:")
print("=" * 50)
final_report = comprehensive_budget.get_usage_report()
for key, value in final_report.items():
    if isinstance(value, float):
        if 'percent' in key or 'rate' in key:
            print(f"{key}: {value:.1f}%")
        elif 'cost' in key:
            print(f"{key}: ${value:.4f}")
        else:
            print(f"{key}: {value:.2f}")
    else:
        print(f"{key}: {value}")

## Summary

This demo covered key reliability patterns for production AI applications using **GPT-5-Mini via AskSage**:

### 🔄 **Retries with Jitter**
- Exponential backoff prevents thundering herd problems
- Jitter adds randomization to distribute retry attempts
- Selective retry logic for different error types
- Applied to GPT-5-Mini API calls through AskSage

### ⏰ **Timeout Management**
- Prevents hanging operations during AI processing
- Maintains system responsiveness
- Configurable timeout durations for different task types

### 🚀 **Concurrency Control**
- Thread pool management for parallel AI processing
- Rate limiting to respect AskSage API constraints
- Resource management and cleanup
- Efficient handling of multiple AI task types

### 💰 **Budget Caps**
- Real-time cost tracking based on token usage
- GPT-5-Mini specific pricing considerations
- Usage metrics and detailed reporting
- Automatic request blocking when limits reached

### 🛡️ **Comprehensive Reliability**
- Integration of all reliability patterns
- Detailed metrics and monitoring for AI operations
- Production-ready error handling for AskSage integration
- Support for different AI task types (analysis, creative, technical)

### 🤖 **GPT-5-Mini Integration**
- AskSage client setup and configuration
- Model-specific prompt engineering
- Token estimation and cost calculation
- Task-type specific system prompts

These patterns are essential for building robust, cost-effective AI applications that can handle real-world failures and constraints gracefully while leveraging the power of GPT-5-Mini through the AskSage platform.

In [None]:
# Final verification - test AskSage connection and available models
print("🔍 Final System Verification")
print("=" * 40)

try:
    # Check AskSage connection
    models = ask_sage_client.get_models()
    if 'response' in models:
        available_models = models['response']
        print(f"✅ Connected to AskSage successfully")
        print(f"📋 Total available models: {len(available_models)}")
        
        # Check for GPT-5-Mini specifically
        gpt5_models = [m for m in available_models if 'gpt-5' in m.lower()]
        if gpt5_models:
            print(f"🎯 GPT-5 models found: {gpt5_models}")
        else:
            print("⚠️  GPT-5-Mini not found. Consider using gpt-4o-mini or gpt-4o instead.")
            gpt4_models = [m for m in available_models if 'gpt-4' in m.lower()]
            if gpt4_models:
                print(f"🔄 Alternative GPT-4 models: {gpt4_models[:5]}")
                
    # Summary of reliability features implemented
    print("\n🛡️ Reliability Features Implemented:")
    features = [
        "✅ Retry mechanisms with exponential backoff and jitter",
        "✅ Timeout protection for AI operations", 
        "✅ Concurrent request management with rate limiting",
        "✅ Budget tracking and cost monitoring",
        "✅ Comprehensive error handling and metrics",
        "✅ AskSage integration with model selection",
        "✅ Task-type specific prompt engineering"
    ]
    
    for feature in features:
        print(f"  {feature}")
    
    print("\n🎉 Reliability demo setup complete!")
    print("💡 Ready for production AI applications with GPT-5-Mini via AskSage")
    
except Exception as e:
    print(f"❌ Error during final verification: {e}")
    print("🔧 Please check your AskSage credentials and connection")