I'll help you create an OCR system for restaurant bills/menus. Here's a complete Jupyter notebook for testing OCR accuracy and a FastAPI implementation:

## 1. OCR Algorithm Development Notebook

In [4]:
# !pip install cv2

In [5]:
# restaurant_ocr_testing.ipynb
import cv2
import pytesseract
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import re
import json
import warnings
warnings.filterwarnings('ignore')

# Set tesseract path (adjust based on your installation)
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # Windows
# For Linux/Mac: usually installed in PATH

print("Restaurant Bill/Menu OCR Testing System")
print("=" * 50)

ModuleNotFoundError: No module named 'cv2'

### Image Preprocessing Functions

In [None]:
def preprocess_image(image_path):
    """
    Preprocess image for better OCR accuracy
    """
    # Read image
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"Could not load image from {image_path}")
    
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Noise removal
    denoised = cv2.medianBlur(gray, 3)
    
    # Thresholding
    _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Morphological operations to clean image
    kernel = np.ones((1, 1), np.uint8)
    processed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    processed = cv2.morphologyEx(processed, cv2.MORPH_OPEN, kernel)
    
    return img, gray, processed

def display_images(original, processed, title="Image Comparison"):
    """
    Display original and processed images side by side
    """
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))
    
    axes[0].imshow(cv2.cvtColor(original, cv2.COLOR_BGR2RGB))
    axes[0].set_title('Original Image')
    axes[0].axis('off')
    
    axes[1].imshow(processed, cmap='gray')
    axes[1].set_title('Processed Image')
    axes[1].axis('off')
    
    plt.tight_layout()
    plt.show()

### OCR Extraction Functions

In [None]:
def extract_text_with_tesseract(image, config='--psm 6'):
    """
    Extract text using Tesseract OCR
    """
    text = pytesseract.image_to_string(image, config=config)
    return text.strip()

def extract_detailed_data(image):
    """
    Extract detailed data with bounding boxes
    """
    data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
    return data

def extract_text_with_different_psm(image_path):
    """
    Try different Page Segmentation Modes for better accuracy
    """
    _, _, processed_img = preprocess_image(image_path)
    
    psm_modes = {
        3: 'Fully automatic page segmentation',
        4: 'Assume single column of text',
        6: 'Assume uniform block of text',
        8: 'Single word',
        11: 'Sparse text'
    }
    
    results = {}
    for psm, desc in psm_modes.items():
        config = f'--psm {psm}'
        text = extract_text_with_tesseract(processed_img, config)
        results[psm] = {
            'description': desc,
            'text': text,
            'line_count': len(text.split('\n'))
        }
    
    return results

### Restaurant Bill Parser

In [None]:
class RestaurantBillParser:
    def __init__(self):
        self.menu_items_keywords = [
            'burger', 'pizza', 'pasta', 'salad', 'soup', 'sandwich',
            'chicken', 'beef', 'fish', 'rice', 'noodles', 'fries',
            'coffee', 'tea', 'juice', 'water', 'soda', 'beer', 'wine'
        ]
        
        self.price_pattern = r'\$?\d+\.?\d{0,2}'
        self.quantity_pattern = r'(\d+)\s*x'
    
    def parse_bill_text(self, text):
        """
        Parse OCR text into structured bill data
        """
        lines = text.split('\n')
        parsed_data = {
            'restaurant_name': '',
            'items': [],
            'subtotal': 0,
            'tax': 0,
            'total': 0,
            'date': '',
            'raw_text': text
        }
        
        # Extract items and prices
        items = []
        current_item = {}
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
                
            # Look for prices in the line
            prices = re.findall(self.price_pattern, line)
            
            if prices:
                # Remove prices from line to get item name
                item_name = re.sub(self.price_pattern, '', line).strip()
                item_name = re.sub(r'[^\w\s]', '', item_name).strip()
                
                if len(item_name) > 2:  # Filter out very short "items"
                    price = float(prices[-1].replace('$', ''))
                    
                    # Look for quantity
                    quantity_match = re.search(self.quantity_pattern, line.lower())
                    quantity = int(quantity_match.group(1)) if quantity_match else 1
                    
                    items.append({
                        'name': item_name,
                        'quantity': quantity,
                        'unit_price': price / quantity,
                        'total_price': price
                    })
            
            # Look for totals
            if any(keyword in line.lower() for keyword in ['total', 'subtotal', 'tax']):
                prices = re.findall(self.price_pattern, line)
                if prices:
                    price_val = float(prices[0].replace('$', ''))
                    if 'subtotal' in line.lower():
                        parsed_data['subtotal'] = price_val
                    elif 'tax' in line.lower():
                        parsed_data['tax'] = price_val
                    elif 'total' in line.lower():
                        parsed_data['total'] = price_val
        
        parsed_data['items'] = items
        return parsed_data
    
    def parse_menu_text(self, text):
        """
        Parse OCR text into structured menu data
        """
        lines = text.split('\n')
        parsed_data = {
            'restaurant_name': '',
            'sections': {},
            'items': [],
            'raw_text': text
        }
        
        current_section = 'Main'
        items = []
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            # Check if line is a section header
            if (line.isupper() or 
                any(keyword in line.lower() for keyword in ['appetizer', 'main', 'dessert', 'drink', 'entree']) or
                line.endswith(':')):
                current_section = line.replace(':', '').strip()
                parsed_data['sections'][current_section] = []
                continue
            
            # Look for prices in the line
            prices = re.findall(self.price_pattern, line)
            
            if prices:
                # Extract item name and description
                item_text = re.sub(self.price_pattern, '', line).strip()
                price = float(prices[0].replace('$', ''))
                
                items.append({
                    'name': item_text,
                    'price': price,
                    'section': current_section,
                    'description': ''
                })
                
                if current_section in parsed_data['sections']:
                    parsed_data['sections'][current_section].append({
                        'name': item_text,
                        'price': price
                    })
        
        parsed_data['items'] = items
        return parsed_data

### Testing and Accuracy Evaluation

In [None]:
def test_ocr_accuracy(test_images):
    """
    Test OCR accuracy on sample images
    """
    results = []
    parser = RestaurantBillParser()
    
    for img_path in test_images:
        print(f"\nTesting: {img_path}")
        print("-" * 40)
        
        try:
            # Preprocess image
            original, gray, processed = preprocess_image(img_path)
            
            # Display images
            display_images(original, processed, f"OCR Processing: {img_path}")
            
            # Extract text with different PSM modes
            psm_results = extract_text_with_different_psm(img_path)
            
            # Use best PSM mode (usually 6 for bills)
            best_text = psm_results[6]['text']
            
            print("Extracted Text:")
            print(best_text)
            print("\n" + "="*50)
            
            # Parse the text
            if 'menu' in img_path.lower():
                parsed_data = parser.parse_menu_text(best_text)
                data_type = 'menu'
            else:
                parsed_data = parser.parse_bill_text(best_text)
                data_type = 'bill'
            
            print(f"Parsed {data_type.upper()} Data:")
            print(json.dumps(parsed_data, indent=2))
            
            # Calculate basic accuracy metrics
            text_length = len(best_text)
            word_count = len(best_text.split())
            non_empty_lines = len([line for line in best_text.split('\n') if line.strip()])
            
            results.append({
                'image': img_path,
                'text_length': text_length,
                'word_count': word_count,
                'lines_extracted': non_empty_lines,
                'data_type': data_type,
                'parsed_successfully': len(parsed_data['items']) > 0,
                'items_found': len(parsed_data['items'])
            })
            
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")
            results.append({
                'image': img_path,
                'error': str(e),
                'parsed_successfully': False
            })
    
    return results

# Example usage
if __name__ == "__main__":
    # Replace with your test image paths
    test_images = [
        'sample_bill_1.jpg',
        'sample_bill_2.jpg',
        'sample_menu_1.jpg'
    ]
    
    # Test OCR accuracy
    accuracy_results = test_ocr_accuracy(test_images)
    
    # Display results summary
    df_results = pd.DataFrame(accuracy_results)
    print("\nOCR Accuracy Results Summary:")
    print("=" * 50)
    print(df_results)
    
    # Calculate success rate
    successful_parses = df_results[df_results['parsed_successfully'] == True]
    success_rate = len(successful_parses) / len(df_results) * 100
    
    print(f"\nOverall Success Rate: {success_rate:.2f}%")

### Performance Optimization

In [None]:
def optimize_ocr_performance():
    """
    Test different preprocessing techniques for optimal performance
    """
    techniques = {
        'default': lambda img: img,
        'denoise_only': lambda img: cv2.medianBlur(img, 3),
        'threshold_only': lambda img: cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1],
        'morphology_only': lambda img: cv2.morphologyEx(img, cv2.MORPH_CLOSE, np.ones((1,1), np.uint8))
    }
    
    performance_results = []
    
    for tech_name, tech_func in techniques.items():
        try:
            # Test on sample image
            img_path = 'sample_bill_1.jpg'  # Replace with actual path
            original, gray, _ = preprocess_image(img_path)
            
            # Apply technique
            processed = tech_func(gray)
            
            # Measure performance
            start_time = time.time()
            text = extract_text_with_tesseract(processed)
            end_time = time.time()
            
            performance_results.append({
                'technique': tech_name,
                'processing_time': end_time - start_time,
                'text_length': len(text),
                'word_count': len(text.split())
            })
            
        except Exception as e:
            print(f"Error with {tech_name}: {str(e)}")
    
    return pd.DataFrame(performance_results)

# Run optimization tests
performance_df = optimize_ocr_performance()
print("Performance Comparison:")
print(performance_df)

## 2. FastAPI Implementation

Here's the FastAPI implementation that uses the OCR algorithm:

In [None]:
# main.py
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import uvicorn
import cv2
import pytesseract
import numpy as np
from PIL import Image
import io
import re
import json
from typing import List, Optional
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="Restaurant OCR API", version="1.0.0")

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class MenuItem(BaseModel):
    name: str
    price: float
    section: str
    description: Optional[str] = ""

class BillItem(BaseModel):
    name: str
    quantity: int
    unit_price: float
    total_price: float

class MenuResponse(BaseModel):
    restaurant_name: str
    sections: dict
    items: List[MenuItem]
    raw_text: str
    confidence: float

class BillResponse(BaseModel):
    restaurant_name: str
    items: List[BillItem]
    subtotal: float
    tax: float
    total: float
    raw_text: str
    confidence: float

class RestaurantOCRProcessor:
    def __init__(self):
        self.price_pattern = r'\$?\d+\.?\d{0,2}'
        self.quantity_pattern = r'(\d+)\s*x'
    
    def preprocess_image(self, image_bytes):
        """Preprocess uploaded image for OCR"""
        try:
            # Convert bytes to numpy array
            nparr = np.frombuffer(image_bytes, np.uint8)
            img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
            
            # Convert to grayscale
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            # Noise removal and thresholding
            denoised = cv2.medianBlur(gray, 3)
            _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
            
            return thresh
        except Exception as e:
            logger.error(f"Image preprocessing error: {str(e)}")
            raise
    
    def extract_text(self, processed_image):
        """Extract text from processed image"""
        try:
            # Use Tesseract with optimized configuration
            custom_config = r'--psm 6 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz$.,:/()- '
            text = pytesseract.image_to_string(processed_image, config=custom_config)
            
            # Get confidence data
            data = pytesseract.image_to_data(processed_image, output_type=pytesseract.Output.DICT)
            confidences = [int(conf) for conf in data['conf'] if int(conf) > 0]
            avg_confidence = sum(confidences) / len(confidences) if confidences else 0
            
            return text.strip(), avg_confidence
        except Exception as e:
            logger.error(f"OCR extraction error: {str(e)}")
            raise
    
    def parse_menu(self, text):
        """Parse OCR text as menu"""
        lines = text.split('\n')
        parsed_data = {
            'restaurant_name': '',
            'sections': {},
            'items': [],
            'raw_text': text
        }
        
        current_section = 'Main'
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            # Detect section headers
            if (line.isupper() or 
                any(keyword in line.lower() for keyword in ['appetizer', 'main', 'entree', 'dessert', 'drink', 'beverage']) or
                line.endswith(':')):
                current_section = line.replace(':', '').strip()
                parsed_data['sections'][current_section] = []
                continue
            
            # Extract items with prices
            prices = re.findall(self.price_pattern, line)
            if prices:
                item_text = re.sub(self.price_pattern, '', line).strip()
                item_text = re.sub(r'[^\w\s]', ' ', item_text).strip()
                
                if len(item_text) > 2:
                    price = float(prices[0].replace('$', ''))
                    
                    item_data = {
                        'name': item_text,
                        'price': price,
                        'section': current_section
                    }
                    
                    parsed_data['items'].append(item_data)
                    if current_section in parsed_data['sections']:
                        parsed_data['sections'][current_section].append(item_data)
        
        return parsed_data
    
    def parse_bill(self, text):
        """Parse OCR text as bill"""
        lines = text.split('\n')
        parsed_data = {
            'restaurant_name': '',
            'items': [],
            'subtotal': 0.0,
            'tax': 0.0,
            'total': 0.0,
            'raw_text': text
        }
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            prices = re.findall(self.price_pattern, line)
            
            if prices and len(prices) == 1:
                # Single price - likely an item
                item_name = re.sub(self.price_pattern, '', line).strip()
                item_name = re.sub(r'[^\w\s]', ' ', item_name).strip()
                
                if len(item_name) > 2:
                    price = float(prices[0].replace('$', ''))
                    
                    # Look for quantity
                    quantity_match = re.search(self.quantity_pattern, line.lower())
                    quantity = int(quantity_match.group(1)) if quantity_match else 1
                    
                    parsed_data['items'].append({
                        'name': item_name,
                        'quantity': quantity,
                        'unit_price': round(price / quantity, 2),
                        'total_price': price
                    })
            
            # Extract totals
            elif prices:
                price_val = float(prices[-1].replace('$', ''))
                line_lower = line.lower()
                
                if 'subtotal' in line_lower:
                    parsed_data['subtotal'] = price_val
                elif 'tax' in line_lower:
                    parsed_data['tax'] = price_val
                elif 'total' in line_lower or 'amount' in line_lower:
                    parsed_data['total'] = price_val
        
        return parsed_data

# Initialize processor
processor = RestaurantOCRProcessor()

@app.get("/")
async def root():
    return {"message": "Restaurant OCR API", "status": "active"}

@app.post("/ocr/menu", response_model=MenuResponse)
async def extract_menu(file: UploadFile = File(...)):
    """Extract menu data from image"""
    try:
        # Validate file type
        if not file.content_type.startswith('image/'):
            raise HTTPException(status_code=400, detail="File must be an image")
        
        # Read image
        image_bytes = await file.read()
        
        # Preprocess and extract text
        processed_image = processor.preprocess_image(image_bytes)
        text, confidence = processor.extract_text(processed_image)
        
        # Parse as menu
        parsed_data = processor.parse_menu(text)
        
        return MenuResponse(
            restaurant_name=parsed_data['restaurant_name'],
            sections=parsed_data['sections'],
            items=parsed_data['items'],
            raw_text=parsed_data['raw_text'],
            confidence=confidence
        )
        
    except Exception as e:
        logger.error(f"Menu extraction error: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")

@app.post("/ocr/bill", response_model=BillResponse)
async def extract_bill(file: UploadFile = File(...)):
    """Extract bill data from image"""
    try:
        # Validate file type
        if not file.content_type.startswith('image/'):
            raise HTTPException(status_code=400, detail="File must be an image")
        
        # Read image
        image_bytes = await file.read()
        
        # Preprocess and extract text
        processed_image = processor.preprocess_image(image_bytes)
        text, confidence = processor.extract_text(processed_image)
        
        # Parse as bill
        parsed_data = processor.parse_bill(text)
        
        return BillResponse(
            restaurant_name=parsed_data['restaurant_name'],
            items=parsed_data['items'],
            subtotal=parsed_data['subtotal'],
            tax=parsed_data['tax'],
            total=parsed_data['total'],
            raw_text=parsed_data['raw_text'],
            confidence=confidence
        )
        
    except Exception as e:
        logger.error(f"Bill extraction error: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")

@app.post("/ocr/text")
async def extract_text_only(file: UploadFile = File(...)):
    """Extract raw text from image"""
    try:
        # Validate file type
        if not file.content_type.startswith('image/'):
            raise HTTPException(status_code=400, detail="File must be an image")
        
        # Read image
        image_bytes = await file.read()
        
        # Preprocess and extract text
        processed_image = processor.preprocess_image(image_bytes)
        text, confidence = processor.extract_text(processed_image)
        
        return {
            "text": text,
            "confidence": confidence,
            "character_count": len(text),
            "line_count": len(text.split('\n'))
        }
        
    except Exception as e:
        logger.error(f"Text extraction error: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)