# VectorEmbeddingService Test Results

This notebook tests the intent classification API routes against multiple prompts to evaluate performance.

## API Routes Tested
- `/api/intent/embedding` - Vector embedding-based classification
- `/api/intent/slm` - Small Language Model classification
- `/api/intent/hybrid` - Hybrid approach (vector + SLM)


## Setup

Load test prompts and import required libraries.


In [1]:
!pip install --upgrade pip
!pip install requests



In [2]:
import requests
import json
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Any
import time

# Load test prompts from data/testPrompts.json
# Each prompt has {intent, text} structure
prompts_path = Path('data') / 'testPrompts.json'

# If not found, try absolute path from current working directory
if not prompts_path.exists():
    import os
    project_root = Path(os.getcwd())
    prompts_path = project_root / 'data' / 'testPrompts.json'

with open(prompts_path, 'r', encoding='utf-8') as f:
    prompts_data = json.load(f)
    test_prompts = prompts_data['testPrompts']

print(f"Loaded {len(test_prompts)} test prompts from {prompts_path}")
print(f"Prompts structure: {{intent, text}}")


Loaded 21 test prompts from data/testPrompts.json
Prompts structure: {intent, text}


## Test Functions

Functions to test each API route.


In [3]:
def test_embedding_api(prompts: List[Dict[str, Any]], api_url: str = 'http://localhost:3000/api/intent/embedding') -> List[Dict[str, Any]]:
    """Test the embedding API route."""
    results = []
    
    for prompt_obj in prompts:
        text = prompt_obj.get('text', '')
        expected_intent = prompt_obj.get('intent')
        
        try:
            response = requests.post(
                api_url,
                json={'message': text},
                headers={'Content-Type': 'application/json'},
                timeout=10
            )
            response.raise_for_status()
            
            data = response.json()
            actual_intent = data.get('intent', {}).get('intent', 'N/A')
            is_correct = None
            if expected_intent:
                is_correct = actual_intent == expected_intent
            
            results.append({
                'text': text,
                'expectedIntent': expected_intent or 'N/A',
                'actualIntent': actual_intent,
                'isCorrect': is_correct,
                'score': data.get('intent', {}).get('score', 0),
                'candidates': data.get('candidates', []),
                'route': 'embedding'
            })
        except requests.exceptions.RequestException as e:
            results.append({
                'text': text,
                'expectedIntent': expected_intent or 'N/A',
                'error': str(e),
                'route': 'embedding'
            })
        except Exception as e:
            results.append({
                'text': text,
                'expectedIntent': expected_intent or 'N/A',
                'error': f'Unexpected error: {str(e)}',
                'route': 'embedding'
            })
    
    return results

print("Embedding API test function defined")


Embedding API test function defined


In [4]:
def test_slm_api(prompts: List[Dict[str, Any]], api_url: str = 'http://localhost:3000/api/intent/slm') -> List[Dict[str, Any]]:
    """Test the SLM API route."""
    results = []
    
    for prompt_obj in prompts:
        text = prompt_obj.get('text', '')
        expected_intent = prompt_obj.get('intent')
        
        try:
            response = requests.post(
                api_url,
                json={'message': text},
                headers={'Content-Type': 'application/json'},
                timeout=10
            )
            response.raise_for_status()
            
            data = response.json()
            classification = data.get('classification', {})
            actual_intent = classification.get('intent', 'N/A')
            is_correct = None
            if expected_intent:
                is_correct = actual_intent == expected_intent
            
            results.append({
                'text': text,
                'expectedIntent': expected_intent or 'N/A',
                'actualIntent': actual_intent,
                'isCorrect': is_correct,
                'classification': classification,
                'route': 'slm'
            })
        except requests.exceptions.RequestException as e:
            results.append({
                'text': text,
                'expectedIntent': expected_intent or 'N/A',
                'error': str(e),
                'route': 'slm'
            })
        except Exception as e:
            results.append({
                'text': text,
                'expectedIntent': expected_intent or 'N/A',
                'error': f'Unexpected error: {str(e)}',
                'route': 'slm'
            })
    
    return results

print("SLM API test function defined")


SLM API test function defined


In [5]:
def test_hybrid_api(prompts: List[Dict[str, Any]], api_url: str = 'http://localhost:3000/api/intent/hybrid') -> List[Dict[str, Any]]:
    """Test the hybrid API route."""
    results = []
    
    for prompt_obj in prompts:
        text = prompt_obj.get('text', '')
        expected_intent = prompt_obj.get('intent')
        
        try:
            response = requests.post(
                api_url,
                json={'message': text},
                headers={'Content-Type': 'application/json'},
                timeout=10
            )
            response.raise_for_status()
            
            data = response.json()
            vector_matches = data.get('vectorMatches', [])
            slm_result = data.get('slm', {})
            
            # Get intents from both approaches
            vector_intent = vector_matches[0].get('intent', 'N/A') if vector_matches else 'N/A'
            slm_intent = slm_result.get('intent', 'N/A')
            
            # Use SLM intent as primary (hybrid approach)
            actual_intent = slm_intent if slm_intent != 'N/A' else vector_intent
            is_correct = None
            if expected_intent:
                is_correct = actual_intent == expected_intent
            
            results.append({
                'text': text,
                'expectedIntent': expected_intent or 'N/A',
                'actualIntent': actual_intent,
                'vectorIntent': vector_intent,
                'slmIntent': slm_intent,
                'isCorrect': is_correct,
                'vectorMatches': vector_matches,
                'slmResult': slm_result,
                'route': 'hybrid'
            })
        except requests.exceptions.RequestException as e:
            results.append({
                'text': text,
                'expectedIntent': expected_intent or 'N/A',
                'error': str(e),
                'route': 'hybrid'
            })
        except Exception as e:
            results.append({
                'text': text,
                'expectedIntent': expected_intent or 'N/A',
                'error': f'Unexpected error: {str(e)}',
                'route': 'hybrid'
            })
    
    return results

print("Hybrid API test function defined")


Hybrid API test function defined


## Analysis Functions

Functions to analyze and display test results.


In [11]:
def analyze_results(results: List[Dict[str, Any]], route_name: str = '') -> Dict[str, Any]:
    """Analyze and display test results with accuracy metrics."""
    route_label = f" ({route_name})" if route_name else ""
    
    success_count = 0
    error_count = 0
    correct_count = 0
    incorrect_count = 0
    intent_distribution = {}
    accuracy_by_intent = {}
    
    for idx, result in enumerate(results, 1):
        text = result.get('text', 'N/A')
        
        if 'error' in result:
            error_count += 1
        else:
            expected = result.get('expectedIntent', 'N/A')
            actual = result.get('actualIntent', 'N/A')
            is_correct = result.get('isCorrect')
            
            if expected and expected != 'N/A':
                if is_correct is True:
                    correct_count += 1
                elif is_correct is False:
                    incorrect_count += 1
            
            # Track intent distribution
            if actual != 'N/A':
                intent_distribution[actual] = intent_distribution.get(actual, 0) + 1
            
            # Track accuracy by expected intent
            if expected and expected != 'N/A' and is_correct is not None:
                if expected not in accuracy_by_intent:
                    accuracy_by_intent[expected] = {'correct': 0, 'total': 0}
                accuracy_by_intent[expected]['total'] += 1
                if is_correct:
                    accuracy_by_intent[expected]['correct'] += 1
            
            success_count += 1
    
    print('\n=== SUMMARY STATISTICS ===')
    print(f'Total Prompts: {len(results)}')
    print(f'Successful: {success_count}')
    print(f'Errors: {error_count}')
    
    if correct_count + incorrect_count > 0:
        accuracy = (correct_count / (correct_count + incorrect_count) * 100)
        print(f'\nAccuracy: {correct_count}/{correct_count + incorrect_count} ({accuracy:.2f}%)')
        print(f'  Correct: {correct_count}')
        print(f'  Incorrect: {incorrect_count}')
    
    print(f'\nIntent Distribution (Predicted):')
    for intent, count in sorted(intent_distribution.items(), key=lambda x: x[1], reverse=True):
        print(f'  {intent}: {count}')
    
    if accuracy_by_intent:
        print(f'\nAccuracy by Intent:')
        for intent, stats in sorted(accuracy_by_intent.items()):
            acc = (stats['correct'] / stats['total'] * 100) if stats['total'] > 0 else 0
            print(f'  {intent}: {stats["correct"]}/{stats["total"]} ({acc:.2f}%)')
    
    return {
        'total': len(results),
        'successful': success_count,
        'errors': error_count,
        'correct': correct_count,
        'incorrect': incorrect_count,
        'accuracy': (correct_count / (correct_count + incorrect_count) * 100) if (correct_count + incorrect_count) > 0 else None,
        'intent_distribution': intent_distribution,
        'accuracy_by_intent': accuracy_by_intent
    }

print("Results analysis function defined")


Results analysis function defined


## Run Tests

Execute tests against all API routes. Make sure your Next.js server is running: `npm run dev`


In [12]:
# Test Embedding API
print("Testing Embedding API...")
embedding_results = test_embedding_api(test_prompts)
embedding_summary = analyze_results(embedding_results, 'Embedding')

# Store results
embedding_test_data = {
    'results': embedding_results,
    'summary': embedding_summary,
    'timestamp': datetime.now().isoformat(),
    'route': 'embedding'
}


Testing Embedding API...

=== TEST RESULTS SUMMARY (Embedding) ===


=== SUMMARY STATISTICS ===
Total Prompts: 21
Successful: 21
Errors: 0

Accuracy: 21/21 (100.00%)
  Correct: 21
  Incorrect: 0

Intent Distribution (Predicted):
  direct_product_search: 3
  attribute_based_search: 3
  problem_solving_search: 3
  comparison_search: 3
  project_based_search: 3
  bulk_or_budget_search: 3
  price_query: 3

Accuracy by Intent:
  attribute_based_search: 3/3 (100.00%)
  bulk_or_budget_search: 3/3 (100.00%)
  comparison_search: 3/3 (100.00%)
  direct_product_search: 3/3 (100.00%)
  price_query: 3/3 (100.00%)
  problem_solving_search: 3/3 (100.00%)
  project_based_search: 3/3 (100.00%)


In [None]:
# Test SLM API
print("Testing SLM API...")
slm_results = test_slm_api(test_prompts)
slm_summary = analyze_results(slm_results, 'SLM')

# Store results
slm_test_data = {
    'results': slm_results,
    'summary': slm_summary,
    'timestamp': datetime.now().isoformat(),
    'route': 'slm'
}


In [None]:
# Test Hybrid API
print("Testing Hybrid API...")
hybrid_results = test_hybrid_api(test_prompts)
hybrid_summary = analyze_results(hybrid_results, 'Hybrid')

# Store results
hybrid_test_data = {
    'results': hybrid_results,
    'summary': hybrid_summary,
    'timestamp': datetime.now().isoformat(),
    'route': 'hybrid'
}


## Compare Results

Compare performance across all three API routes.


In [None]:
# Compare all routes
print("\n=== ROUTE COMPARISON ===\n")

routes = [
    ('Embedding', embedding_summary),
    ('SLM', slm_summary),
    ('Hybrid', hybrid_summary)
]

print(f"{'Route':<15} {'Accuracy':<15} {'Correct':<10} {'Incorrect':<12} {'Errors':<10}")
print("-" * 65)

for route_name, summary in routes:
    accuracy = summary.get('accuracy')
    accuracy_str = f"{accuracy:.2f}%" if accuracy is not None else "N/A"
    correct = summary.get('correct', 0)
    incorrect = summary.get('incorrect', 0)
    errors = summary.get('errors', 0)
    
    print(f"{route_name:<15} {accuracy_str:<15} {correct:<10} {incorrect:<12} {errors:<10}")

# Find best route
best_route = max(routes, key=lambda x: x[1].get('accuracy', 0) if x[1].get('accuracy') is not None else 0)
print(f"\nBest performing route: {best_route[0]} ({best_route[1].get('accuracy', 0):.2f}% accuracy)")

## Export Results

Export test results to JSON files for further analysis.


In [None]:
def export_results(results: List[Dict[str, Any]], summary: Dict[str, Any], filename: str):
    """Export test results to JSON file."""
    output = {
        'timestamp': datetime.now().isoformat(),
        'total_prompts': len(results),
        'results': results,
        'summary': summary
    }
    
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(output, f, indent=2, ensure_ascii=False)
    
    print(f' Results exported to {filename}')
    return output

# Export all results
# Uncomment to export
# export_results(embedding_results, embedding_summary, 'embedding_test_results.json')
# export_results(slm_results, slm_summary, 'slm_test_results.json')
# export_results(hybrid_results, hybrid_summary, 'hybrid_test_results.json')

# Export combined comparison
# combined_output = {
#     'timestamp': datetime.now().isoformat(),
#     'embedding': embedding_test_data,
#     'slm': slm_test_data,
#     'hybrid': hybrid_test_data
# }
# with open('all_routes_test_results.json', 'w', encoding='utf-8') as f:
#     json.dump(combined_output, f, indent=2, ensure_ascii=False)
# print(' Combined results exported to all_routes_test_results.json')


## Detailed Analysis with Pandas

Display results in tabular format (optional, requires pandas).


In [None]:
# Display results in a more readable format using pandas (optional)
try:
    import pandas as pd
    
    # Create DataFrame for embedding results
    df_embedding = pd.DataFrame([
        {
            'text': r.get('text', 'N/A'),
            'expected_intent': r.get('expectedIntent', 'N/A'),
            'actual_intent': r.get('actualIntent', 'N/A'),
            'is_correct': r.get('isCorrect'),
            'score': r.get('score', 0),
            'error': r.get('error', '')
        }
        for r in embedding_results
    ])
    
    print("\n=== EMBEDDING RESULTS TABLE ===")
    print(df_embedding.to_string(index=False))
    
    # Create comparison DataFrame
    comparison_data = []
    for i, prompt_obj in enumerate(test_prompts):
        text = prompt_obj.get('text', '')
        expected = prompt_obj.get('intent', 'N/A')
        
        embedding_result = embedding_results[i] if i < len(embedding_results) else {}
        slm_result = slm_results[i] if i < len(slm_results) else {}
        hybrid_result = hybrid_results[i] if i < len(hybrid_results) else {}
        
        comparison_data.append({
            'text': text[:50] + '...' if len(text) > 50 else text,
            'expected': expected,
            'embedding': embedding_result.get('actualIntent', 'N/A'),
            'slm': slm_result.get('actualIntent', 'N/A'),
            'hybrid': hybrid_result.get('actualIntent', 'N/A'),
            'embedding_correct': embedding_result.get('isCorrect'),
            'slm_correct': slm_result.get('isCorrect'),
            'hybrid_correct': hybrid_result.get('isCorrect')
        })
    
    df_comparison = pd.DataFrame(comparison_data)
    print("\n=== ROUTE COMPARISON TABLE ===")
    print(df_comparison.to_string(index=False))
    
except ImportError:
    print("pandas not available. Install with: pip install pandas")
