In [None]:
print('Setup complete.')

# Golden Test Generation Demo

## Learning Objectives
- Generate comprehensive test suites using AI
- Create "golden" reference tests for legacy code
- Build automated test validation and coverage analysis
- Design test-driven refactoring workflows

## The Challenge: Testing Legacy Code

Golden tests capture the current behavior of existing code as a baseline, then ensure refactored code maintains the same behavior. This demo shows how to:
1. **Behavior Analysis** - Understanding what code actually does
2. **Test Generation** - Creating comprehensive test coverage
3. **Edge Case Discovery** - Finding boundary conditions and error cases
4. **Regression Prevention** - Ensuring refactors don't break functionality

In [None]:
# Install required packages
!pip install asksageclient pip_system_certs pytest coverage rich tiktoken

In [None]:
# ================================
# 🔐 Cell 1 — Load secrets (Colab) + pricing + token utils
# ================================
import os, time, csv
from typing import Optional, Dict
import tiktoken

from google.colab import userdata

ASKSAGE_API_KEY = userdata.get("ASKSAGE_API_KEY")
ASKSAGE_BASE_URL = userdata.get("ASKSAGE_BASE_URL")
ASKSAGE_EMAIL = userdata.get("ASKSAGE_EMAIL")

assert ASKSAGE_API_KEY, "ASKSAGE_API_KEY not provided."
assert ASKSAGE_EMAIL, "ASKSAGE_EMAIL not provided."

print("✓ Secrets loaded")
print("  • EMAIL:", ASKSAGE_EMAIL)
print("  • BASE URL:", ASKSAGE_BASE_URL or "(default)")

# Pricing (USD per 1,000,000 tokens)
PRICES_PER_M = {
    "gpt-5": {"input_per_m": 1.25, "output_per_m": 10.00},
    "gpt-5-mini": {"input_per_m": 0.25, "output_per_m": 2.00},
}

# Tokenizer
enc = tiktoken.get_encoding("o200k_base")

def count_tokens(text: str) -> int:
    return len(enc.encode(text or ""))

def cost_usd(model: str, input_tokens: int, output_tokens: int) -> float:
    if model not in PRICES_PER_M:
        raise ValueError(f"Unknown model: {model}")
    r = PRICES_PER_M[model]
    return (input_tokens / 1_000_000) * r["input_per_m"] + (output_tokens / 1_000_000) * r["output_per_m"]

In [None]:
# ================================
# 🔧 Cell 2 — Import bootcamp_common and setup AskSage client
# ================================
import sys
sys.path.append('../../../')  # Adjust path to reach bootcamp_common

from bootcamp_common.ask_sage import AskSageClient

# Initialize AskSage client
client = AskSageClient(
    api_key=ASKSAGE_API_KEY,
    base_url=ASKSAGE_BASE_URL
)

print("✓ AskSage client initialized")

In [None]:
import os
import ast
import inspect
import subprocess
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass

import openai
from rich.console import Console
from rich.panel import Panel
from rich.syntax import Syntax
from rich.table import Table
from rich.progress import Progress, SpinnerColumn, TextColumn

console = Console()
print("🧪 Golden test generator loading...")

## Target Code: Email Validator Function

In [None]:
# Example function that needs comprehensive testing
target_code = '''
import re
from typing import Dict, List, Tuple, Optional

def validate_email_list(emails: List[str], strict_mode: bool = False) -> Dict[str, any]:
    """Validate a list of email addresses and return detailed results."""
    
    valid_emails = []
    invalid_emails = []
    warnings = []
    
    # Basic email regex (simplified)
    email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    
    for email in emails:
        if not email or not isinstance(email, str):
            invalid_emails.append({"email": email, "reason": "Invalid type or empty"})
            continue
            
        email = email.strip().lower()
        
        if len(email) > 254:  # RFC 5321 limit
            invalid_emails.append({"email": email, "reason": "Too long"})
            continue
            
        if re.match(email_pattern, email):
            # Additional checks in strict mode
            if strict_mode:
                local, domain = email.split('@')
                
                # Check local part length
                if len(local) > 64:
                    invalid_emails.append({"email": email, "reason": "Local part too long"})
                    continue
                
                # Warn about suspicious patterns
                if '..' in email or email.startswith('.') or email.endswith('.'):
                    warnings.append({"email": email, "warning": "Suspicious dot pattern"})
                
                # Check for disposable email domains
                disposable_domains = ['10minutemail.com', 'tempmail.org', 'guerrillamail.com']
                if any(domain.endswith(d) for d in disposable_domains):
                    warnings.append({"email": email, "warning": "Disposable email domain"})
            
            valid_emails.append(email)
        else:
            invalid_emails.append({"email": email, "reason": "Invalid format"})
    
    return {
        "valid_count": len(valid_emails),
        "invalid_count": len(invalid_emails),
        "warning_count": len(warnings),
        "valid_emails": valid_emails,
        "invalid_emails": invalid_emails,
        "warnings": warnings,
        "success_rate": len(valid_emails) / len(emails) if emails else 0
    }
'''

# Execute the code to make function available
exec(target_code)

console.print("📧 [bold blue]Target Function Loaded[/bold blue]")
syntax = Syntax(target_code, "python", theme="monokai", line_numbers=True)
console.print(Panel(syntax, title="validate_email_list function", border_style="blue"))
print("\n🎯 Ready to generate comprehensive golden tests!")

## Golden Test Generator

In [None]:
@dataclass
class TestCase:
    """Individual test case with inputs and expected outputs"""
    name: str
    inputs: Dict[str, Any]
    expected_output: Any
    test_type: str  # 'happy_path', 'edge_case', 'error_case'
    description: str

class GoldenTestGenerator:
    """Generate comprehensive test suites for existing functions"""
    
    def __init__(self):
        self.setup_client()
        self.generated_tests = {}
    
    def setup_client(self):
        """Setup API client"""
        if os.getenv('OPENAI_API_KEY'):
            try:
                self.client = openai.OpenAI()
                self.has_api = True
                console.print("✅ OpenAI client configured")
            except Exception as e:
                self.has_api = False
                console.print(f"⚠️ Using mock responses: {e}")
        else:
            self.has_api = False
            console.print("💡 No API key found, using mock responses")
    
    def analyze_function_behavior(self, code: str, function_name: str) -> Dict[str, Any]:
        """Analyze function to understand its behavior and edge cases"""
        
        analysis_prompt = f"""Analyze this Python function to understand its behavior:

{code}

For function '{function_name}', identify:
1. Input parameters and their types/constraints
2. Return value structure and possible values
3. Happy path scenarios (normal usage)
4. Edge cases (boundary conditions, unusual inputs)
5. Error conditions (invalid inputs, exceptions)
6. Side effects or special behaviors

Provide specific test scenarios for comprehensive coverage."""
        
        if self.has_api:
            response = self.client.chat.completions.create(
                model="gpt-4",
                messages=[{"role": "user", "content": analysis_prompt}],
                max_tokens=800,
                temperature=0.3
            )
            analysis = response.choices[0].message.content
        else:
            # Mock analysis
            analysis = """Function Analysis:
Input: emails (List[str]), strict_mode (bool, default False)
Output: Dict with validation results

Happy Path:
- List of valid email addresses
- Mix of valid/invalid emails
- Strict mode enabled/disabled

Edge Cases:
- Empty list
- Single email
- Very long emails (>254 chars)
- Emails with edge case formats
- Non-string inputs

Error Conditions:
- None/null inputs
- Invalid data types
- Malformed email patterns"""
        
        return {"analysis": analysis}
    
    def generate_test_cases(self, code: str, function_name: str) -> List[TestCase]:
        """Generate comprehensive test cases based on function analysis"""
        
        console.print(f"[yellow]Generating test cases for {function_name}...[/yellow]")
        
        # Get actual function for testing
        func = globals().get(function_name)
        if not func:
            console.print(f"[red]Function {function_name} not found![/red]")
            return []
        
        test_cases = []
        
        # Happy path tests
        happy_path_inputs = [
            {"emails": ["user@example.com", "test@domain.org"], "strict_mode": False},
            {"emails": ["valid@email.com"], "strict_mode": True},
            {"emails": ["good@test.co.uk", "also@valid.net", "another@site.info"], "strict_mode": False}
        ]
        
        for i, inputs in enumerate(happy_path_inputs):
            try:
                expected = func(**inputs)
                test_cases.append(TestCase(
                    name=f"test_happy_path_{i+1}",
                    inputs=inputs,
                    expected_output=expected,
                    test_type="happy_path",
                    description=f"Normal usage with {len(inputs['emails'])} emails"
                ))
            except Exception as e:
                console.print(f"[red]Error in happy path test {i}: {e}[/red]")
        
        # Edge case tests
        edge_cases = [
            {"emails": [], "strict_mode": False},  # Empty list
            {"emails": ["user@example.com"], "strict_mode": False},  # Single email
            {"emails": ["a" * 250 + "@example.com"], "strict_mode": False},  # Long email
            {"emails": ["test@10minutemail.com"], "strict_mode": True},  # Disposable email
            {"emails": [".user@example.com", "user@example."], "strict_mode": True},  # Dot patterns
        ]
        
        for i, inputs in enumerate(edge_cases):
            try:
                expected = func(**inputs)
                test_cases.append(TestCase(
                    name=f"test_edge_case_{i+1}",
                    inputs=inputs,
                    expected_output=expected,
                    test_type="edge_case",
                    description=f"Edge case: {list(inputs.keys())}"
                ))
            except Exception as e:
                console.print(f"[red]Error in edge case test {i}: {e}[/red]")
        
        # Error case tests  
        error_cases = [
            {"emails": [None, "", 123], "strict_mode": False},  # Invalid types
            {"emails": ["invalid-email", "@domain.com", "user@"], "strict_mode": False},  # Malformed
        ]
        
        for i, inputs in enumerate(error_cases):
            try:
                expected = func(**inputs)
                test_cases.append(TestCase(
                    name=f"test_error_case_{i+1}",
                    inputs=inputs,
                    expected_output=expected,
                    test_type="error_case",
                    description=f"Error handling: invalid inputs"
                ))
            except Exception as e:
                # For functions that should handle errors gracefully
                console.print(f"[yellow]Function raised exception for error case {i}: {e}[/yellow]")
        
        return test_cases
    
    def generate_pytest_code(self, test_cases: List[TestCase], function_name: str) -> str:
        """Generate pytest code from test cases"""
        
        pytest_code = f'''import pytest
from your_module import {function_name}

class Test{function_name.title()}:
    """Comprehensive test suite for {function_name} function."""
    
'''
        
        for test_case in test_cases:
            pytest_code += f'''    def {test_case.name}(self):
        """Test: {test_case.description}"""
        inputs = {test_case.inputs}
        expected = {test_case.expected_output}
        
        result = {function_name}(**inputs)
        assert result == expected
        
        # Additional assertions for key metrics
        assert 'valid_count' in result
        assert 'invalid_count' in result
        assert result['valid_count'] + result['invalid_count'] == len(inputs['emails'])

'''
        
        return pytest_code
    
    def create_golden_test_suite(self, code: str, function_name: str) -> Dict[str, Any]:
        """Create complete golden test suite"""
        
        with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}")) as progress:
            task = progress.add_task("Creating golden test suite...", total=None)
            
            # Analyze function
            analysis = self.analyze_function_behavior(code, function_name)
            
            # Generate test cases
            test_cases = self.generate_test_cases(code, function_name)
            
            # Generate pytest code
            pytest_code = self.generate_pytest_code(test_cases, function_name)
            
            progress.update(task, completed=100)
        
        return {
            "analysis": analysis,
            "test_cases": test_cases,
            "pytest_code": pytest_code,
            "coverage_estimate": len(test_cases)
        }

# Initialize generator
golden_test_gen = GoldenTestGenerator()
print("🧪 Golden test generator ready!")

## Demo: Generate Golden Tests

In [None]:
# Generate comprehensive test suite
console.print("\n🧪 [bold blue]Generating Golden Test Suite[/bold blue]")

test_suite = golden_test_gen.create_golden_test_suite(target_code, "validate_email_list")

# Display results
console.print(f"\n[green]✅ Generated {len(test_suite['test_cases'])} test cases![/green]")

# Show test case summary
test_summary = Table(title="Generated Test Cases")
test_summary.add_column("Test Name")
test_summary.add_column("Type")
test_summary.add_column("Description")
test_summary.add_column("Expected")

for test_case in test_suite['test_cases'][:8]:  # Show first 8
    expected_summary = f"valid: {test_case.expected_output.get('valid_count', 0)}, invalid: {test_case.expected_output.get('invalid_count', 0)}"
    test_summary.add_row(
        test_case.name,
        test_case.test_type,
        test_case.description[:40] + "..." if len(test_case.description) > 40 else test_case.description,
        expected_summary
    )

console.print(test_summary)

# Show generated pytest code
console.print("\n[yellow]Generated Pytest Code:[/yellow]")
pytest_syntax = Syntax(test_suite['pytest_code'][:1500] + "\n# ... more tests ...", "python", theme="monokai", line_numbers=True)
console.print(Panel(pytest_syntax, title="test_validate_email_list.py", border_style="green"))

# Test coverage analysis
console.print("\n📊 [bold blue]Test Coverage Analysis[/bold blue]")
coverage_table = Table(title="Coverage Metrics")
coverage_table.add_column("Metric")
coverage_table.add_column("Value")
coverage_table.add_column("Status")

test_types = {}
for test_case in test_suite['test_cases']:
    test_types[test_case.test_type] = test_types.get(test_case.test_type, 0) + 1

coverage_table.add_row("Total Test Cases", str(len(test_suite['test_cases'])), "✅ Good")
coverage_table.add_row("Happy Path Tests", str(test_types.get('happy_path', 0)), "✅ Covered")
coverage_table.add_row("Edge Case Tests", str(test_types.get('edge_case', 0)), "✅ Covered")
coverage_table.add_row("Error Case Tests", str(test_types.get('error_case', 0)), "✅ Covered")

console.print(coverage_table)

print("\n🎯 Golden test suite generation complete!")

## Key Takeaways: Golden Test Generation

### 🎯 **Golden Test Strategy**

1. **Capture Current Behavior**: Document what code actually does today
2. **Comprehensive Coverage**: Test happy paths, edge cases, and error conditions
3. **Regression Prevention**: Ensure refactors maintain existing functionality
4. **Behavior Documentation**: Tests serve as executable specifications
5. **Confidence Building**: Enable safe refactoring with test safety net

### 🧪 **Test Generation Best Practices**

- **Function Analysis**: Understand inputs, outputs, and side effects
- **Edge Case Discovery**: Identify boundary conditions and corner cases
- **Error Path Testing**: Verify graceful handling of invalid inputs
- **Performance Considerations**: Include tests for performance-critical paths
- **Maintainable Tests**: Generate readable, well-documented test code

### 🔧 **Implementation Tips**

- **Start Small**: Begin with critical functions before expanding
- **Validate Tests**: Ensure generated tests actually pass with current code
- **Review Coverage**: Use coverage tools to identify gaps
- **Iterate**: Refine test generation based on real-world usage
- **Team Review**: Have domain experts validate test scenarios

## Next: AI UI Demo

Ready to see how AI can help build user interfaces and interactive applications?