In [None]:
print('Setup complete.')

# Prompt Patch Loop Demo

## Learning Objectives
- See systematic prompt improvement in action
- Watch AI iterate and refine its own prompts
- Understand the patch-test-improve cycle
- Learn automated prompt optimization techniques

## The Demo: Self-Improving Prompts

We'll demonstrate:
1. **Initial Prompt** - Start with a basic prompt
2. **Performance Analysis** - Identify weaknesses
3. **Automated Patching** - AI suggests improvements
4. **Testing Loop** - Validate improvements
5. **Convergence** - Reach optimal performance

In [None]:
# Setup and imports
!pip install asksageclient pip_system_certs
from google.colab import drive
drive.mount('/content/drive')

import os
import json
import time
import tiktoken
from pathlib import Path
from typing import Dict, List, Any

# Import our AskSage client
from asksageclient import AskSageClient

# Get API credentials from Google Colab secrets
from google.colab import userdata
api_key = userdata.get('ASKSAGE_API_KEY')
email = userdata.get('ASKSAGE_EMAIL')

# Initialize client and tokenizer
client = AskSageClient(api_key=api_key, email=email)
tokenizer = tiktoken.encoding_for_model("gpt-4")
print("AskSage client initialized successfully")
print("Ready to showcase AI capabilities...")

## Task: Product Review Analysis

We'll use product review sentiment analysis as our test case for prompt optimization.

In [None]:
# Test dataset for prompt optimization
test_reviews = [
    {
        "review": "This product is amazing! Works exactly as advertised. Fast shipping too.",
        "expected_sentiment": "positive",
        "expected_score": 0.9
    },
    {
        "review": "Terrible quality. Broke after one day. Waste of money.",
        "expected_sentiment": "negative",
        "expected_score": 0.1
    },
    {
        "review": "It's okay. Does what it's supposed to do but nothing special.",
        "expected_sentiment": "neutral",
        "expected_score": 0.5
    },
    {
        "review": "Love the design but the functionality is lacking. Mixed feelings.",
        "expected_sentiment": "mixed",
        "expected_score": 0.6
    },
    {
        "review": "Outstanding customer service! Product had issues but they fixed everything quickly.",
        "expected_sentiment": "positive",
        "expected_score": 0.8
    }
]

print(f"Test dataset loaded: {len(test_reviews)} reviews")
print("Expected outcomes defined for validation")

## Iteration 1: Basic Prompt

Start with a simple, basic prompt:

In [None]:
# Initial basic prompt
def create_basic_prompt(review_text):
    return f"""
Analyze the sentiment of this review: {review_text}
"""

# Test basic prompt
print("=== ITERATION 1: BASIC PROMPT ===")
basic_results = []

for i, test_case in enumerate(test_reviews):
    prompt = create_basic_prompt(test_case['review'])
    
# Test GPT-5-mini
print("=== TESTING GPT-5-mini ===")
start_time = time.time()

response = client.query(
    message=prompt,
    system_prompt="You are concise.",
    temperature=0.1,
    model="gpt-5-mini",
    live=0,
    limit_references=0,
)

    
result = response.get("message").strip()
    basic_results.append({
        'review': test_case['review'][:50] + '...',
        'expected': test_case['expected_sentiment'],
        'actual': result,
        'prompt_version': 'basic'
    })
    
    print(f"Review {i+1}: Expected {test_case['expected_sentiment']}")
    print(f"Got: {result[:100]}...")
    print()

print(f"Basic prompt tested on {len(basic_results)} reviews")

## AI-Powered Prompt Analysis

Let AI analyze the basic prompt's performance and suggest improvements:

In [None]:
# AI analyzes prompt performance
analysis_prompt = f"""
Analyze this prompt's performance and suggest specific improvements.

CURRENT PROMPT:
{create_basic_prompt('[REVIEW_TEXT]')}

TEST RESULTS:
{json.dumps(basic_results, indent=2)}

Provide analysis in JSON format:
{{
  "performance_issues": [
    {{
      "issue": "string",
      "impact": "High|Medium|Low",
      "examples": ["list of examples"]
    }}
  ],
  "improvement_suggestions": [
    {{
      "suggestion": "string",
      "rationale": "string",
      "expected_improvement": "string"
    }}
  ],
  "improved_prompt": "string"
}}
"""

print("=== AI PROMPT ANALYSIS ===")
# Test GPT-5-mini
print("=== TESTING GPT-5-mini ===")
start_time = time.time()

analysis_response = client.query(
    message=analysis_prompt,
    system_prompt="You are concise.",
    temperature=0.1,
    model="gpt-5-mini",
    live=0,
    limit_references=0,
)


analysis_result = analysis_response.get("message").strip()
print(analysis_result)

# Parse the analysis
import re
json_match = re.search(r'\{.*\}', analysis_result, re.DOTALL)
if json_match:
    analysis_data = json.loads(json_match.group())
    issues = analysis_data.get('performance_issues', [])
    suggestions = analysis_data.get('improvement_suggestions', [])
    improved_prompt = analysis_data.get('improved_prompt', '')
    
    print(f"\n✓ Identified {len(issues)} performance issues")
    print(f"✓ Generated {len(suggestions)} improvement suggestions")
    print(f"✓ Created improved prompt version")

## Iteration 2: AI-Improved Prompt

Test the AI-suggested improvements:

In [None]:
# Test improved prompt
def create_improved_prompt(review_text):
    # Use the AI-suggested improved prompt
    return improved_prompt.replace('[REVIEW_TEXT]', review_text)

print("=== ITERATION 2: AI-IMPROVED PROMPT ===")
print("Improved prompt:")
print(improved_prompt)
print()

improved_results = []

for i, test_case in enumerate(test_reviews):
    prompt = create_improved_prompt(test_case['review'])
    
# Test GPT-5-mini
print("=== TESTING GPT-5-mini ===")
start_time = time.time()

response = client.query(
    message=prompt,
    system_prompt="You are concise.",
    temperature=0.1,
    model="gpt-5-mini",
    live=0,
    limit_references=0,
)

    
result = response.get("message").strip()
    improved_results.append({
        'review': test_case['review'][:50] + '...',
        'expected': test_case['expected_sentiment'],
        'actual': result,
        'prompt_version': 'improved_v1'
    })
    
    print(f"Review {i+1}: Expected {test_case['expected_sentiment']}")
    print(f"Got: {result[:100]}...")
    print()

print(f"Improved prompt tested on {len(improved_results)} reviews")

## Performance Comparison

Compare the performance of both prompt versions:

In [None]:
# AI compares prompt performance
comparison_prompt = f"""
Compare the performance of these two prompt versions and determine if further optimization is needed.

BASIC PROMPT RESULTS:
{json.dumps(basic_results, indent=2)}

IMPROVED PROMPT RESULTS:
{json.dumps(improved_results, indent=2)}

Provide comparison analysis:
{{
  "performance_comparison": {{
    "basic_prompt_score": "number 1-10",
    "improved_prompt_score": "number 1-10",
    "improvement_percentage": "number",
    "key_improvements": ["list of improvements"]
  }},
  "remaining_issues": [
    {{
      "issue": "string",
      "severity": "High|Medium|Low"
    }}
  ],
  "optimization_status": "Complete|Needs_Further_Work",
  "next_iteration_suggestions": ["list if needed"],
  "final_optimized_prompt": "string if optimization complete"
}}
"""

print("=== PERFORMANCE COMPARISON ===")
# Test GPT-5-mini
print("=== TESTING GPT-5-mini ===")
start_time = time.time()

comparison_response = client.query(
    message=comparison_prompt,
    system_prompt="You are concise.",
    temperature=0.1,
    model="gpt-5-mini",
    live=0,
    limit_references=0,
)


comparison_result = comparison_response.get("message").strip()
print(comparison_result)

# Parse comparison results
json_match = re.search(r'\{.*\}', comparison_result, re.DOTALL)
if json_match:
    comparison_data = json.loads(json_match.group())
    performance = comparison_data.get('performance_comparison', {})
    status = comparison_data.get('optimization_status', 'Unknown')
    
    print(f"\n✓ Basic prompt score: {performance.get('basic_prompt_score', 0)}/10")
    print(f"✓ Improved prompt score: {performance.get('improved_prompt_score', 0)}/10")
    print(f"✓ Improvement: {performance.get('improvement_percentage', 0)}%")
    print(f"✓ Optimization status: {status}")

## Patch Loop Summary

### The Automated Improvement Process:

**1. Initial Testing**
- Started with basic prompt
- Tested on representative dataset
- Collected performance data

**2. AI-Powered Analysis**
- AI identified specific weaknesses
- Generated targeted improvement suggestions
- Created optimized prompt version

**3. Iterative Testing**
- Tested improved prompt
- Compared performance metrics
- Determined if further optimization needed

**4. Convergence**
- Reached satisfactory performance
- Or identified need for additional iterations

### Key Benefits:

**Systematic Improvement**
- Data-driven optimization process
- Objective performance measurement
- Consistent improvement methodology

**AI-Assisted Analysis**
- Identifies issues humans might miss
- Suggests specific, actionable improvements
- Accelerates optimization process

**Scalable Process**
- Can be automated for production use
- Works with any prompt type
- Continuous improvement capability

### Production Applications:
- **A/B Testing**: Compare prompt versions automatically
- **Performance Monitoring**: Detect prompt degradation
- **Continuous Optimization**: Self-improving systems
- **Quality Assurance**: Validate prompt changes

This demonstrates how AI can improve AI - creating self-optimizing prompt systems that get better over time.