# OCR Libraries Benchmark: Comprehensive Performance Analysis

## Overview
This notebook provides a detailed benchmark comparison of popular OCR (Optical Character Recognition) libraries:
- **Pytesseract** - Google's Tesseract OCR Engine
- **EasyOCR** - Ready-to-use OCR with 80+ language support
- **Keras-OCR** - OCR pipeline built with Keras
- **TrOCR** - Transformer-based OCR from Microsoft
- **docTR** - Document Text Recognition library

## Evaluation Metrics
- Character Error Rate (CER)
- Word Error Rate (WER)
- Processing Speed
- Memory Usage
- Language Support
- Accuracy on different text types

## 1. Environment Setup and Installation

In [1]:
# Install required packages
!pip install -q pytesseract pillow
!pip install -q easyocr
!pip install -q keras-ocr
!pip install -q transformers torch
!pip install -q python-doctr[torch]
!pip install -q pandas matplotlib seaborn tqdm
!pip install -q Levenshtein memory-profiler

# For Tesseract, you may need to install the engine separately:
# Ubuntu/Debian: sudo apt-get install tesseract-ocr
# Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki
# macOS: brew install tesseract


[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import os
import sys
import time
import json
import warnings
warnings.filterwarnings('ignore')

from pathlib import Path
from typing import List, Dict, Tuple, Any
from dataclasses import dataclass, field
from collections import defaultdict
import traceback
import psutil
import gc

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from PIL import Image, ImageDraw, ImageFont
import cv2

# Metrics
import Levenshtein
from memory_profiler import memory_usage

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

## 2. Benchmark Configuration

In [3]:
@dataclass
class BenchmarkConfig:
    """Configuration for benchmark tests"""
    test_categories: List[str] = field(default_factory=lambda: [
        'printed_text',
        'handwritten',
        'scene_text',
        'distorted_text',
        'multi_language',
        'low_quality'
    ])
    
    image_sizes: List[Tuple[int, int]] = field(default_factory=lambda: [
        (640, 480),
        (1280, 720),
        (1920, 1080)
    ])
    
    languages: List[str] = field(default_factory=lambda: ['en', 'fr', 'de', 'es', 'zh'])
    
    metrics: List[str] = field(default_factory=lambda: [
        'accuracy',
        'cer',  # Character Error Rate
        'wer',  # Word Error Rate
        'processing_time',
        'memory_usage',
        'confidence_score'
    ])
    
    output_dir: Path = Path('./benchmark_results')
    data_dir: Path = Path('./benchmark_data')

config = BenchmarkConfig()
config.output_dir.mkdir(exist_ok=True)
config.data_dir.mkdir(exist_ok=True)

## 3. Test Data Generation

In [4]:
class TestDataGenerator:
    """Generate synthetic test data for OCR benchmarking"""
    
    def __init__(self, output_dir: Path):
        self.output_dir = output_dir
        self.test_texts = {
            'simple': [
                "The quick brown fox jumps over the lazy dog",
                "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
                "abcdefghijklmnopqrstuvwxyz",
                "0123456789",
                "Hello World! This is a test."
            ],
            'complex': [
                "Email: user@example.com | Phone: +1-234-567-8900",
                "Price: $99.99 (Save 20%!)",
                "Date: 2024-01-15 Time: 14:30:00",
                "Special chars: @#$%^&*()_+-=[]{}|;:',.<>?/"
            ],
            'paragraph': [
                """Lorem ipsum dolor sit amet, consectetur adipiscing elit. 
                Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.""",
                """Machine learning is a subset of artificial intelligence that 
                enables systems to learn and improve from experience."""
            ]
        }
    
    def create_text_image(self, text: str, font_size: int = 24, 
                         distortion: str = None, image_size: Tuple[int, int] = (800, 200)):
        """Create an image with text for testing"""
        img = Image.new('RGB', image_size, color='white')
        draw = ImageDraw.Draw(img)
        
        # Try to use a font, fallback to default if not available
        try:
            font = ImageFont.truetype("arial.ttf", font_size)
        except:
            font = ImageFont.load_default()
        
        # Calculate text position
        text_bbox = draw.textbbox((0, 0), text, font=font)
        text_width = text_bbox[2] - text_bbox[0]
        text_height = text_bbox[3] - text_bbox[1]
        
        x = (image_size[0] - text_width) // 2
        y = (image_size[1] - text_height) // 2
        
        draw.text((x, y), text, fill='black', font=font)
        
        # Apply distortions if specified
        if distortion:
            img = self.apply_distortion(img, distortion)
        
        return img
    
    def apply_distortion(self, img: Image.Image, distortion_type: str) -> Image.Image:
        """Apply various distortions to test robustness"""
        img_array = np.array(img)
        
        if distortion_type == 'noise':
            noise = np.random.normal(0, 25, img_array.shape).astype(np.uint8)
            img_array = cv2.add(img_array, noise)
        
        elif distortion_type == 'blur':
            img_array = cv2.GaussianBlur(img_array, (5, 5), 0)
        
        elif distortion_type == 'rotation':
            angle = np.random.uniform(-5, 5)
            center = (img_array.shape[1]//2, img_array.shape[0]//2)
            matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
            img_array = cv2.warpAffine(img_array, matrix, 
                                      (img_array.shape[1], img_array.shape[0]),
                                      borderValue=(255, 255, 255))
        
        elif distortion_type == 'perspective':
            h, w = img_array.shape[:2]
            pts1 = np.float32([[0, 0], [w, 0], [0, h], [w, h]])
            pts2 = np.float32([[0, 0], [w, 0], 
                              [int(0.1*w), h], 
                              [int(0.9*w), h]])
            matrix = cv2.getPerspectiveTransform(pts1, pts2)
            img_array = cv2.warpPerspective(img_array, matrix, (w, h),
                                           borderValue=(255, 255, 255))
        
        return Image.fromarray(img_array)
    
    def generate_test_dataset(self):
        """Generate complete test dataset"""
        test_data = []
        
        for category, texts in self.test_texts.items():
            category_dir = self.output_dir / category
            category_dir.mkdir(exist_ok=True)
            
            for i, text in enumerate(texts):
                # Normal image
                img = self.create_text_image(text)
                img_path = category_dir / f"{category}_{i}_normal.png"
                img.save(img_path)
                test_data.append({
                    'path': str(img_path),
                    'text': text,
                    'category': category,
                    'distortion': 'none'
                })
                
                # Distorted versions
                for distortion in ['noise', 'blur', 'rotation']:
                    img = self.create_text_image(text, distortion=distortion)
                    img_path = category_dir / f"{category}_{i}_{distortion}.png"
                    img.save(img_path)
                    test_data.append({
                        'path': str(img_path),
                        'text': text,
                        'category': category,
                        'distortion': distortion
                    })
        
        return pd.DataFrame(test_data)

# Generate test data
generator = TestDataGenerator(config.data_dir)
test_df = generator.generate_test_dataset()
print(f"Generated {len(test_df)} test images")
print(test_df.groupby(['category', 'distortion']).size())

Generated 44 test images
category   distortion
complex    blur          4
           noise         4
           none          4
           rotation      4
paragraph  blur          2
           noise         2
           none          2
           rotation      2
simple     blur          5
           noise         5
           none          5
           rotation      5
dtype: int64


## 4. Evaluation Metrics

In [5]:
class OCRMetrics:
    """Calculate various OCR performance metrics"""
    
    @staticmethod
    def character_error_rate(reference: str, hypothesis: str) -> float:
        """Calculate Character Error Rate (CER)"""
        if len(reference) == 0:
            return 0.0 if len(hypothesis) == 0 else 1.0
        
        distance = Levenshtein.distance(reference, hypothesis)
        return distance / len(reference)
    
    @staticmethod
    def word_error_rate(reference: str, hypothesis: str) -> float:
        """Calculate Word Error Rate (WER)"""
        ref_words = reference.split()
        hyp_words = hypothesis.split()
        
        if len(ref_words) == 0:
            return 0.0 if len(hyp_words) == 0 else 1.0
        
        distance = Levenshtein.distance(ref_words, hyp_words)
        return distance / len(ref_words)
    
    @staticmethod
    def accuracy(reference: str, hypothesis: str) -> float:
        """Calculate exact match accuracy"""
        return 1.0 if reference == hypothesis else 0.0
    
    @staticmethod
    def precision_recall_f1(reference: str, hypothesis: str) -> Dict[str, float]:
        """Calculate precision, recall, and F1 score at character level"""
        ref_chars = set(reference)
        hyp_chars = set(hypothesis)
        
        if len(hyp_chars) == 0:
            return {'precision': 0.0, 'recall': 0.0, 'f1': 0.0}
        
        true_positives = len(ref_chars.intersection(hyp_chars))
        
        precision = true_positives / len(hyp_chars) if len(hyp_chars) > 0 else 0
        recall = true_positives / len(ref_chars) if len(ref_chars) > 0 else 0
        
        if precision + recall == 0:
            f1 = 0
        else:
            f1 = 2 * (precision * recall) / (precision + recall)
        
        return {
            'precision': precision,
            'recall': recall,
            'f1': f1
        }

metrics = OCRMetrics()

## 5. OCR Library Implementations

In [6]:
class OCRWrapper:
    """Base class for OCR library wrappers"""
    
    def __init__(self, name: str):
        self.name = name
        self.is_initialized = False
    
    def initialize(self):
        """Initialize the OCR engine"""
        raise NotImplementedError
    
    def process_image(self, image_path: str) -> Dict[str, Any]:
        """Process an image and return OCR results"""
        raise NotImplementedError
    
    def cleanup(self):
        """Cleanup resources"""
        pass

In [7]:
class PytesseractWrapper(OCRWrapper):
    """Wrapper for Pytesseract"""
    
    def __init__(self):
        super().__init__("Pytesseract")
    
    def initialize(self):
        try:
            import pytesseract
            self.pytesseract = pytesseract
            # Test if tesseract is installed
            self.pytesseract.get_tesseract_version()
            self.is_initialized = True
        except Exception as e:
            print(f"Failed to initialize Pytesseract: {e}")
            self.is_initialized = False
    
    def process_image(self, image_path: str) -> Dict[str, Any]:
        if not self.is_initialized:
            return {'text': '', 'confidence': 0, 'error': 'Not initialized'}
        
        try:
            img = Image.open(image_path)
            
            # Get text with confidence scores
            data = self.pytesseract.image_to_data(img, output_type=self.pytesseract.Output.DICT)
            
            # Extract text and calculate average confidence
            words = []
            confidences = []
            
            for i, word in enumerate(data['text']):
                if word.strip():
                    words.append(word)
                    conf = data['conf'][i]
                    if conf > 0:  # Tesseract uses -1 for no confidence
                        confidences.append(conf)
            
            text = ' '.join(words)
            avg_confidence = np.mean(confidences) if confidences else 0
            
            return {
                'text': text,
                'confidence': avg_confidence / 100,  # Normalize to 0-1
                'raw_data': data
            }
        except Exception as e:
            return {'text': '', 'confidence': 0, 'error': str(e)}

In [None]:
class EasyOCRWrapper(OCRWrapper):
    """Wrapper for EasyOCR"""
    
    def __init__(self, languages=['en']):
        super().__init__("EasyOCR")
        self.languages = languages
    
    def initialize(self):
        try:
            import easyocr
            self.reader = easyocr.Reader(self.languages, gpu=False)
            self.is_initialized = True
        except Exception as e:
            print(f"Failed to initialize EasyOCR: {e}")
            self.is_initialized = False
    
    def process_image(self, image_path: str) -> Dict[str, Any]:
        if not self.is_initialized:
            return {'text': '', 'confidence': 0, 'error': 'Not initialized'}
        
        try:
            results = self.reader.readtext(image_path)
            
            if not results:
                return {'text': '', 'confidence': 0}
            
            texts = []
            confidences = []
            
            for (bbox, text, confidence) in results:
                texts.append(text)
                confidences.append(confidence)
            
            full_text = ' '.join(texts)
            avg_confidence = np.mean(confidences) if confidences else 0
            
            return {
                'text': full_text,
                'confidence': avg_confidence,
                'boxes': results
            }
        except Exception as e:
            return {'text': '', 'confidence': 0, 'error': str(e)}

In [9]:
class KerasOCRWrapper(OCRWrapper):
    """Wrapper for Keras-OCR"""
    
    def __init__(self):
        super().__init__("Keras-OCR")
    
    def initialize(self):
        try:
            import keras_ocr
            self.pipeline = keras_ocr.pipeline.Pipeline()
            self.is_initialized = True
        except Exception as e:
            print(f"Failed to initialize Keras-OCR: {e}")
            self.is_initialized = False
    
    def process_image(self, image_path: str) -> Dict[str, Any]:
        if not self.is_initialized:
            return {'text': '', 'confidence': 0, 'error': 'Not initialized'}
        
        try:
            import keras_ocr
            
            # Read image
            image = keras_ocr.tools.read(image_path)
            
            # Get predictions
            prediction_groups = self.pipeline.recognize([image])
            
            if not prediction_groups or not prediction_groups[0]:
                return {'text': '', 'confidence': 0}
            
            # Extract text from predictions
            predictions = prediction_groups[0]
            texts = [text for text, box in predictions]
            
            # Keras-OCR doesn't provide confidence scores directly
            # We'll use a placeholder
            full_text = ' '.join(texts)
            
            return {
                'text': full_text,
                'confidence': 0.5,  # Placeholder as Keras-OCR doesn't provide confidence
                'predictions': predictions
            }
        except Exception as e:
            return {'text': '', 'confidence': 0, 'error': str(e)}

In [None]:
class TrOCRWrapper(OCRWrapper):
    """Wrapper for TrOCR (Transformer OCR)"""
    
    def __init__(self):
        super().__init__("TrOCR")
    
    def initialize(self):
        try:
            from transformers import TrOCRProcessor, VisionEncoderDecoderModel
            
            # Use small model for faster processing
            self.processor = TrOCRProcessor.from_pretrained(
                'microsoft/trocr-small-printed'
            )
            self.model = VisionEncoderDecoderModel.from_pretrained(
                'microsoft/trocr-small-printed'
            )
            self.is_initialized = True
        except Exception as e:
            print(f"Failed to initialize TrOCR: {e}")
            self.is_initialized = False
    
    def process_image(self, image_path: str) -> Dict[str, Any]:
        if not self.is_initialized:
            return {'text': '', 'confidence': 0, 'error': 'Not initialized'}
        
        try:
            image = Image.open(image_path).convert('RGB')
            
            # Process image
            pixel_values = self.processor(images=image, return_tensors="pt").pixel_values
            
            # Generate text
            generated_ids = self.model.generate(pixel_values)
            generated_text = self.processor.batch_decode(
                generated_ids, skip_special_tokens=True
            )[0]
            
            return {
                'text': generated_text,
                'confidence': 0.7,  # Placeholder confidence
            }
        except Exception as e:
            return {'text': '', 'confidence': 0, 'error': str(e)}

In [None]:
class DocTRWrapper(OCRWrapper):
    """Wrapper for docTR"""
    
    def __init__(self):
        super().__init__("docTR")
    
    def initialize(self):
        try:
            from doctr.io import DocumentFile
            from doctr.models import ocr_predictor
            
            self.model = ocr_predictor(pretrained=True)
            self.DocumentFile = DocumentFile
            self.is_initialized = True
        except Exception as e:
            print(f"Failed to initialize docTR: {e}")
            self.is_initialized = False
    
    def process_image(self, image_path: str) -> Dict[str, Any]:
        if not self.is_initialized:
            return {'text': '', 'confidence': 0, 'error': 'Not initialized'}
        
        try:
            # Load document
            doc = self.DocumentFile.from_images(image_path)
            
            # Perform OCR
            result = self.model(doc)
            
            # Extract text
            text_parts = []
            confidences = []
            
            for page in result.pages:
                for block in page.blocks:
                    for line in block.lines:
                        for word in line.words:
                            text_parts.append(word.value)
                            confidences.append(word.confidence)
            
            full_text = ' '.join(text_parts)
            avg_confidence = np.mean(confidences) if confidences else 0
            
            return {
                'text': full_text,
                'confidence': avg_confidence,
                'result': result
            }
        except Exception as e:
            return {'text': '', 'confidence': 0, 'error': str(e)}

## 6. Benchmark Runner

In [12]:
class BenchmarkRunner:
    """Run benchmarks for all OCR libraries"""
    
    def __init__(self, ocr_wrappers: List[OCRWrapper], test_data: pd.DataFrame):
        self.ocr_wrappers = ocr_wrappers
        self.test_data = test_data
        self.results = []
        self.metrics_calc = OCRMetrics()
    
    def run_single_test(self, ocr_wrapper: OCRWrapper, image_path: str, 
                       ground_truth: str) -> Dict[str, Any]:
        """Run a single OCR test"""
        
        # Measure processing time
        start_time = time.time()
        
        # Measure memory usage
        def process_with_memory():
            return ocr_wrapper.process_image(image_path)
        
        mem_usage = memory_usage(process_with_memory, interval=0.1, timeout=30)
        result = ocr_wrapper.process_image(image_path)
        
        processing_time = time.time() - start_time
        
        # Extract text from result
        predicted_text = result.get('text', '')
        confidence = result.get('confidence', 0)
        
        # Calculate metrics
        cer = self.metrics_calc.character_error_rate(ground_truth, predicted_text)
        wer = self.metrics_calc.word_error_rate(ground_truth, predicted_text)
        accuracy = self.metrics_calc.accuracy(ground_truth, predicted_text)
        pr_metrics = self.metrics_calc.precision_recall_f1(ground_truth, predicted_text)
        
        return {
            'predicted_text': predicted_text,
            'confidence': confidence,
            'cer': cer,
            'wer': wer,
            'accuracy': accuracy,
            'precision': pr_metrics['precision'],
            'recall': pr_metrics['recall'],
            'f1': pr_metrics['f1'],
            'processing_time': processing_time,
            'memory_usage': max(mem_usage) if mem_usage else 0,
            'error': result.get('error', None)
        }
    
    def run_benchmarks(self):
        """Run complete benchmark suite"""
        
        print("Initializing OCR libraries...")
        for wrapper in self.ocr_wrappers:
            print(f"  Initializing {wrapper.name}...")
            wrapper.initialize()
            if wrapper.is_initialized:
                print(f"    ✓ {wrapper.name} initialized successfully")
            else:
                print(f"    ✗ {wrapper.name} failed to initialize")
        
        print("\nRunning benchmarks...")
        
        for idx, row in tqdm(self.test_data.iterrows(), total=len(self.test_data)):
            image_path = row['path']
            ground_truth = row['text']
            category = row['category']
            distortion = row['distortion']
            
            for wrapper in self.ocr_wrappers:
                if not wrapper.is_initialized:
                    continue
                
                try:
                    test_result = self.run_single_test(wrapper, image_path, ground_truth)
                    
                    # Add metadata
                    test_result.update({
                        'library': wrapper.name,
                        'image_path': image_path,
                        'ground_truth': ground_truth,
                        'category': category,
                        'distortion': distortion
                    })
                    
                    self.results.append(test_result)
                    
                except Exception as e:
                    print(f"\nError testing {wrapper.name} on {image_path}: {e}")
                    self.results.append({
                        'library': wrapper.name,
                        'image_path': image_path,
                        'ground_truth': ground_truth,
                        'category': category,
                        'distortion': distortion,
                        'error': str(e)
                    })
        
        # Cleanup
        for wrapper in self.ocr_wrappers:
            wrapper.cleanup()
        
        return pd.DataFrame(self.results)

## 7. Run Benchmarks

In [None]:
# Initialize OCR wrappers
ocr_wrappers = [
    PytesseractWrapper(),
    EasyOCRWrapper(),
    KerasOCRWrapper(),
    TrOCRWrapper(),
    DocTRWrapper()
]

# Run benchmarks
runner = BenchmarkRunner(ocr_wrappers, test_df)
results_df = runner.run_benchmarks()

# Save results
results_df.to_csv(config.output_dir / 'benchmark_results.csv', index=False)
print(f"\nResults saved to {config.output_dir / 'benchmark_results.csv'}")

Initializing OCR libraries...
  Initializing Pytesseract...
Failed to initialize Pytesseract: tesseract is not installed or it's not in your PATH. See README file for more information.
    ✗ Pytesseract failed to initialize
  Initializing EasyOCR...


Using CPU. Note: This module is much faster with a GPU.
Downloading detection model, please wait. This may take several minutes depending upon your network connection.


Progress: |██████████████████████████████████████████████████| 100.0% Complete

Downloading recognition model, please wait. This may take several minutes depending upon your network connection.


Progress: |██████████████████████████████████████████████████| 100.0% Complete    ✓ EasyOCR initialized successfully
  Initializing Keras-OCR...


## 8. Analysis and Visualization

In [None]:
# Load results if needed
# results_df = pd.read_csv(config.output_dir / 'benchmark_results.csv')

# Clean data for analysis
results_clean = results_df.dropna(subset=['cer', 'wer', 'processing_time'])

# Summary statistics
summary_stats = results_clean.groupby('library').agg({
    'cer': ['mean', 'std', 'min', 'max'],
    'wer': ['mean', 'std', 'min', 'max'],
    'accuracy': 'mean',
    'processing_time': ['mean', 'std'],
    'memory_usage': 'mean',
    'confidence': 'mean'
}).round(3)

print("\n" + "="*80)
print("BENCHMARK SUMMARY")
print("="*80)
print(summary_stats)

In [None]:
# Visualization functions
def plot_performance_comparison():
    """Create comprehensive performance comparison plots"""
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    
    # 1. CER Comparison
    ax = axes[0, 0]
    results_clean.boxplot(column='cer', by='library', ax=ax)
    ax.set_title('Character Error Rate by Library')
    ax.set_xlabel('Library')
    ax.set_ylabel('CER (lower is better)')
    ax.grid(True, alpha=0.3)
    
    # 2. WER Comparison
    ax = axes[0, 1]
    results_clean.boxplot(column='wer', by='library', ax=ax)
    ax.set_title('Word Error Rate by Library')
    ax.set_xlabel('Library')
    ax.set_ylabel('WER (lower is better)')
    ax.grid(True, alpha=0.3)
    
    # 3. Processing Time
    ax = axes[0, 2]
    avg_time = results_clean.groupby('library')['processing_time'].mean().sort_values()
    avg_time.plot(kind='barh', ax=ax, color='skyblue')
    ax.set_title('Average Processing Time')
    ax.set_xlabel('Time (seconds)')
    ax.grid(True, alpha=0.3)
    
    # 4. Accuracy by Category
    ax = axes[1, 0]
    pivot_accuracy = results_clean.pivot_table(
        values='accuracy', 
        index='category', 
        columns='library', 
        aggfunc='mean'
    )
    pivot_accuracy.plot(kind='bar', ax=ax)
    ax.set_title('Accuracy by Text Category')
    ax.set_xlabel('Category')
    ax.set_ylabel('Accuracy')
    ax.legend(title='Library', bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.grid(True, alpha=0.3)
    
    # 5. Performance vs Distortion
    ax = axes[1, 1]
    pivot_distortion = results_clean.pivot_table(
        values='cer', 
        index='distortion', 
        columns='library', 
        aggfunc='mean'
    )
    pivot_distortion.plot(kind='bar', ax=ax)
    ax.set_title('CER by Distortion Type')
    ax.set_xlabel('Distortion')
    ax.set_ylabel('CER (lower is better)')
    ax.legend(title='Library', bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.grid(True, alpha=0.3)
    
    # 6. Memory Usage
    ax = axes[1, 2]
    avg_memory = results_clean.groupby('library')['memory_usage'].mean().sort_values()
    avg_memory.plot(kind='barh', ax=ax, color='coral')
    ax.set_title('Average Memory Usage')
    ax.set_xlabel('Memory (MB)')
    ax.grid(True, alpha=0.3)
    
    plt.suptitle('OCR Library Performance Comparison', fontsize=16, y=1.02)
    plt.tight_layout()
    plt.savefig(config.output_dir / 'performance_comparison.png', dpi=300, bbox_inches='tight')
    plt.show()

plot_performance_comparison()

In [None]:
# Create a performance heatmap
def plot_performance_heatmap():
    """Create a heatmap of normalized performance metrics"""
    
    # Prepare data for heatmap
    metrics_for_heatmap = ['cer', 'wer', 'processing_time', 'memory_usage']
    
    # Calculate mean values for each metric
    heatmap_data = results_clean.groupby('library')[metrics_for_heatmap].mean()
    
    # Normalize metrics (0-1 scale, where lower is better for all)
    heatmap_norm = heatmap_data.copy()
    for col in heatmap_norm.columns:
        min_val = heatmap_norm[col].min()
        max_val = heatmap_norm[col].max()
        if max_val > min_val:
            # Invert so that lower values are better (closer to 0)
            heatmap_norm[col] = (heatmap_norm[col] - min_val) / (max_val - min_val)
    
    # Create heatmap
    plt.figure(figsize=(10, 6))
    sns.heatmap(heatmap_norm.T, annot=True, fmt='.2f', cmap='RdYlGn_r', 
                cbar_kws={'label': 'Normalized Score (lower is better)'})
    plt.title('OCR Library Performance Heatmap\n(Normalized Metrics - Lower is Better)')
    plt.xlabel('Library')
    plt.ylabel('Metric')
    plt.tight_layout()
    plt.savefig(config.output_dir / 'performance_heatmap.png', dpi=300, bbox_inches='tight')
    plt.show()

plot_performance_heatmap()

In [None]:
# Create a radar chart for overall comparison
def plot_radar_chart():
    """Create radar chart for multi-dimensional comparison"""
    
    from math import pi
    
    # Select metrics for radar chart
    metrics = ['Accuracy', 'Speed', 'Memory\nEfficiency', 'Robustness', 'Confidence']
    
    # Calculate scores for each library
    library_scores = {}
    
    for lib in results_clean['library'].unique():
        lib_data = results_clean[results_clean['library'] == lib]
        
        # Calculate normalized scores (0-1, where 1 is best)
        accuracy_score = lib_data['accuracy'].mean()
        speed_score = 1 - (lib_data['processing_time'].mean() / 
                          results_clean['processing_time'].max())
        memory_score = 1 - (lib_data['memory_usage'].mean() / 
                           results_clean['memory_usage'].max())
        robustness_score = 1 - lib_data['cer'].mean()  # Inverse of error rate
        confidence_score = lib_data['confidence'].mean()
        
        library_scores[lib] = [
            accuracy_score,
            speed_score,
            memory_score,
            robustness_score,
            confidence_score
        ]
    
    # Create radar chart
    angles = [n / len(metrics) * 2 * pi for n in range(len(metrics))]
    angles += angles[:1]
    
    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))
    
    colors = plt.cm.Set2(range(len(library_scores)))
    
    for idx, (lib, scores) in enumerate(library_scores.items()):
        scores += scores[:1]  # Complete the circle
        ax.plot(angles, scores, 'o-', linewidth=2, label=lib, color=colors[idx])
        ax.fill(angles, scores, alpha=0.25, color=colors[idx])
    
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(metrics)
    ax.set_ylim(0, 1)
    ax.set_yticks([0.2, 0.4, 0.6, 0.8, 1.0])
    ax.set_yticklabels(['0.2', '0.4', '0.6', '0.8', '1.0'])
    ax.grid(True)
    
    plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
    plt.title('OCR Library Comparison - Radar Chart', size=16, y=1.08)
    plt.tight_layout()
    plt.savefig(config.output_dir / 'radar_comparison.png', dpi=300, bbox_inches='tight')
    plt.show()

plot_radar_chart()

## 9. Detailed Performance Report

In [None]:
def generate_performance_report():
    """Generate a detailed performance report"""
    
    report = []
    report.append("="*80)
    report.append("OCR LIBRARY BENCHMARK REPORT")
    report.append("="*80)
    report.append("")
    
    for lib in results_clean['library'].unique():
        lib_data = results_clean[results_clean['library'] == lib]
        
        report.append(f"\n{lib}")
        report.append("-" * len(lib))
        
        # Overall Performance
        report.append(f"Overall Accuracy: {lib_data['accuracy'].mean():.2%}")
        report.append(f"Average CER: {lib_data['cer'].mean():.3f}")
        report.append(f"Average WER: {lib_data['wer'].mean():.3f}")
        report.append(f"Average Processing Time: {lib_data['processing_time'].mean():.3f}s")
        report.append(f"Average Memory Usage: {lib_data['memory_usage'].mean():.1f} MB")
        report.append(f"Average Confidence: {lib_data['confidence'].mean():.2%}")
        
        # Performance by Category
        report.append("\nPerformance by Category:")
        for cat in lib_data['category'].unique():
            cat_data = lib_data[lib_data['category'] == cat]
            report.append(f"  {cat}: Accuracy={cat_data['accuracy'].mean():.2%}, "
                         f"CER={cat_data['cer'].mean():.3f}")
        
        # Performance by Distortion
        report.append("\nPerformance by Distortion:")
        for dist in lib_data['distortion'].unique():
            dist_data = lib_data[lib_data['distortion'] == dist]
            report.append(f"  {dist}: Accuracy={dist_data['accuracy'].mean():.2%}, "
                         f"CER={dist_data['cer'].mean():.3f}")
        
        # Strengths and Weaknesses
        report.append("\nStrengths:")
        if lib_data['processing_time'].mean() == results_clean.groupby('library')['processing_time'].mean().min():
            report.append("  ✓ Fastest processing speed")
        if lib_data['memory_usage'].mean() == results_clean.groupby('library')['memory_usage'].mean().min():
            report.append("  ✓ Most memory efficient")
        if lib_data['accuracy'].mean() == results_clean.groupby('library')['accuracy'].mean().max():
            report.append("  ✓ Highest accuracy")
        if lib_data['cer'].mean() == results_clean.groupby('library')['cer'].mean().min():
            report.append("  ✓ Lowest character error rate")
        
        report.append("")
    
    # Summary Recommendations
    report.append("\n" + "="*80)
    report.append("RECOMMENDATIONS")
    report.append("="*80)
    
    # Find best library for each use case
    best_accuracy = results_clean.groupby('library')['accuracy'].mean().idxmax()
    best_speed = results_clean.groupby('library')['processing_time'].mean().idxmin()
    best_memory = results_clean.groupby('library')['memory_usage'].mean().idxmin()
    
    report.append(f"\nBest for Accuracy: {best_accuracy}")
    report.append(f"Best for Speed: {best_speed}")
    report.append(f"Best for Memory Efficiency: {best_memory}")
    
    # Handle distorted text
    distorted_data = results_clean[results_clean['distortion'] != 'none']
    best_robust = distorted_data.groupby('library')['cer'].mean().idxmin()
    report.append(f"Best for Distorted Text: {best_robust}")
    
    report_text = "\n".join(report)
    
    # Save report
    with open(config.output_dir / 'benchmark_report.txt', 'w') as f:
        f.write(report_text)
    
    print(report_text)
    return report_text

report = generate_performance_report()

## 10. Export Results

In [None]:
# Create comprehensive results package
def export_benchmark_results():
    """Export all benchmark results in various formats"""
    
    # 1. Detailed CSV
    results_df.to_csv(config.output_dir / 'detailed_results.csv', index=False)
    
    # 2. Summary statistics
    summary_df = results_clean.groupby('library').agg({
        'accuracy': ['mean', 'std'],
        'cer': ['mean', 'std'],
        'wer': ['mean', 'std'],
        'processing_time': ['mean', 'std'],
        'memory_usage': ['mean', 'std'],
        'confidence': ['mean', 'std']
    })
    summary_df.to_csv(config.output_dir / 'summary_statistics.csv')
    
    # 3. JSON format for programmatic access
    results_json = {
        'metadata': {
            'total_tests': len(results_df),
            'libraries_tested': list(results_df['library'].unique()),
            'categories': list(results_df['category'].unique()),
            'distortions': list(results_df['distortion'].unique())
        },
        'results': results_df.to_dict(orient='records'),
        'summary': summary_df.to_dict()
    }
    
    with open(config.output_dir / 'benchmark_results.json', 'w') as f:
        json.dump(results_json, f, indent=2, default=str)
    
    # 4. Create a ranking table
    ranking_data = []
    for lib in results_clean['library'].unique():
        lib_data = results_clean[results_clean['library'] == lib]
        ranking_data.append({
            'Library': lib,
            'Accuracy Rank': 0,  # Will be filled
            'Speed Rank': 0,
            'Memory Rank': 0,
            'CER Rank': 0,
            'Overall Score': 0
        })
    
    ranking_df = pd.DataFrame(ranking_data)
    
    # Calculate ranks
    metrics_to_rank = {
        'accuracy': 'Accuracy Rank',
        'processing_time': 'Speed Rank',
        'memory_usage': 'Memory Rank',
        'cer': 'CER Rank'
    }
    
    for metric, rank_col in metrics_to_rank.items():
        lib_scores = results_clean.groupby('library')[metric].mean().sort_values()
        if metric in ['processing_time', 'memory_usage', 'cer']:
            lib_scores = lib_scores  # Lower is better
        else:
            lib_scores = lib_scores[::-1]  # Higher is better
        
        for rank, lib in enumerate(lib_scores.index, 1):
            ranking_df.loc[ranking_df['Library'] == lib, rank_col] = rank
    
    # Calculate overall score (lower is better)
    rank_cols = ['Accuracy Rank', 'Speed Rank', 'Memory Rank', 'CER Rank']
    ranking_df['Overall Score'] = ranking_df[rank_cols].mean(axis=1)
    ranking_df = ranking_df.sort_values('Overall Score')
    ranking_df['Final Rank'] = range(1, len(ranking_df) + 1)
    
    ranking_df.to_csv(config.output_dir / 'library_rankings.csv', index=False)
    
    print("\n" + "="*80)
    print("FINAL RANKINGS")
    print("="*80)
    print(ranking_df.to_string(index=False))
    
    print(f"\nAll results exported to: {config.output_dir}")
    print(f"  - detailed_results.csv")
    print(f"  - summary_statistics.csv")
    print(f"  - benchmark_results.json")
    print(f"  - library_rankings.csv")
    print(f"  - benchmark_report.txt")
    print(f"  - performance_comparison.png")
    print(f"  - performance_heatmap.png")
    print(f"  - radar_comparison.png")

export_benchmark_results()