# PaddleOCR Testing for OCR Q&A Segmentation

This notebook tests PaddleOCR for improved handwriting recognition.

**Python**: 3.10 (Colab) - Compatible with PaddleOCR âœ…

In [None]:
# Clone repository
!git clone https://github.com/Abhigyan-Shekhar/ocr-qa-segmentation.git
%cd ocr-qa-segmentation

In [None]:
# Install dependencies
!pip install -q -r requirements.txt

In [None]:
# Test OCR engines
!python test_ocr.py

In [None]:
# Test with sample image
from src.ocr_engine import OCREngine
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# Initialize OCR (will use PaddleOCR on Colab)
print("Initializing OCR engine...")
try:
    ocr = OCREngine(engine='paddleocr')
    print("âœ“ Using PaddleOCR (best quality)")
except:
    ocr = OCREngine(engine='tesseract')
    print("âœ“ Using Tesseract (fallback)")

print("\nðŸŽ‰ OCR engine ready for handwriting recognition!")

## Upload Your Test Image

Upload a handwritten exam image to test OCR quality:

In [None]:
from google.colab import files
import cv2

# Upload image
uploaded = files.upload()

if uploaded:
    # Get first uploaded file
    filename = list(uploaded.keys())[0]
    
    # Read image
    img = cv2.imread(filename)
    
    # Display
    plt.figure(figsize=(12, 8))
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.title('Uploaded Image')
    plt.show()
    
    # Extract text
    print("\nExtracting text...")
    lines = ocr.extract_lines(img)
    
    print(f"\nDetected {len(lines)} lines:\n")
    for i, line in enumerate(lines[:10], 1):  # Show first 10 lines
        print(f"{i}. {line.text} (confidence: {line.confidence:.2f})")