# üöÄ TrOCR Handwriting Recognition
## Microsoft's Transformer-based OCR for Handwritten Text

**Model**: [microsoft/trocr-base-handwritten](https://huggingface.co/microsoft/trocr-base-handwritten)

**What it does:**
- ‚úÖ State-of-the-art handwriting recognition
- ‚úÖ Works on line images
- ‚úÖ Easy to use (just pip install)
- ‚úÖ No dependency hell

## 1Ô∏è‚É£ Install Dependencies

In [None]:
!pip install -q transformers pillow opencv-python-headless matplotlib torch

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from PIL import Image
from google.colab import files

from transformers import TrOCRProcessor, VisionEncoderDecoderModel

print("‚úÖ Imports successful")

## 2Ô∏è‚É£ Load TrOCR Model

In [None]:
print("üì• Loading TrOCR model...")
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
print("‚úÖ Model loaded!")

## 3Ô∏è‚É£ Upload Your Handwritten Page

In [None]:
print("üì§ Upload your handwritten page")
uploaded_img = files.upload()

if uploaded_img:
    img_filename = list(uploaded_img.keys())[0]
    print(f"‚úÖ Uploaded: {img_filename}")
    
    # Display
    img = cv2.imread(img_filename)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(12, 8))
    plt.imshow(img_rgb)
    plt.axis('off')
    plt.title('Original Image')
    plt.show()

## 4Ô∏è‚É£ Automatic Line Segmentation

In [None]:
def segment_lines(image_path, min_line_height=20):
    """
    Segment handwritten page into text lines using horizontal projection
    """
    # Read image
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Denoise
    denoised = cv2.fastNlMeansDenoising(gray, h=10)
    
    # Binarize
    binary = cv2.adaptiveThreshold(
        denoised, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY_INV, 21, 10
    )
    
    # Horizontal projection
    h_projection = np.sum(binary, axis=1)
    threshold = np.max(h_projection) * 0.1
    
    # Find line boundaries
    in_line = False
    line_start = 0
    lines = []
    
    for i, val in enumerate(h_projection):
        if not in_line and val > threshold:
            line_start = i
            in_line = True
        elif in_line and val < threshold:
            line_end = i
            if line_end - line_start > min_line_height:
                lines.append((line_start, line_end))
            in_line = False
    
    if in_line:
        lines.append((line_start, len(h_projection)))
    
    # Extract line images with PIL
    line_images = []
    bboxes = []
    
    for y_start, y_end in lines:
        # Add padding
        y_start = max(0, y_start - 5)
        y_end = min(gray.shape[0], y_end + 5)
        
        # Extract line
        line_img = gray[y_start:y_end, :]
        
        # Find horizontal boundaries
        v_projection = np.sum(binary[y_start:y_end, :], axis=0)
        non_zero = np.where(v_projection > 0)[0]
        
        if len(non_zero) > 0:
            x_start = max(0, non_zero[0] - 10)
            x_end = min(gray.shape[1], non_zero[-1] + 10)
            line_img = line_img[:, x_start:x_end]
            
            # Convert to PIL Image (TrOCR expects PIL)
            line_pil = Image.fromarray(line_img)
            
            line_images.append(line_pil)
            bboxes.append((x_start, y_start, x_end - x_start, y_end - y_start))
    
    return line_images, bboxes

print("‚úÖ Line segmentation function ready")

In [None]:
# Segment lines
if img_filename:
    print("üîç Detecting text lines...")
    lines, line_bboxes = segment_lines(img_filename)
    print(f"‚úÖ Found {len(lines)} text lines")
    
    # Visualize detected lines
    img_display = cv2.imread(img_filename)
    img_display = cv2.cvtColor(img_display, cv2.COLOR_BGR2RGB)
    
    fig, ax = plt.subplots(figsize=(14, 10))
    ax.imshow(img_display)
    
    for i, (x, y, w, h) in enumerate(line_bboxes):
        rect = Rectangle((x, y), w, h, linewidth=2, 
                        edgecolor='lime', facecolor='none')
        ax.add_patch(rect)
        ax.text(x, y-5, f'Line {i+1}', color='lime', 
               fontsize=12, fontweight='bold', 
               bbox=dict(boxstyle='round', facecolor='black', alpha=0.7))
    
    ax.axis('off')
    ax.set_title(f'Detected {len(lines)} Lines', fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()

## 5Ô∏è‚É£ Run TrOCR on Each Line

In [None]:
def recognize_line(line_image):
    """
    Recognize text in a line image using TrOCR
    
    Args:
        line_image: PIL Image of text line
    Returns:
        Recognized text string
    """
    # Preprocess image
    pixel_values = processor(line_image, return_tensors="pt").pixel_values
    
    # Generate text
    generated_ids = model.generate(pixel_values)
    
    # Decode
    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return text

print("‚úÖ Recognition function ready")

In [None]:
# Process all lines
if lines:
    print("\n" + "="*80)
    print("RECOGNITION RESULTS")
    print("="*80)
    
    full_text = []
    
    for i, line_img in enumerate(lines):
        print(f"\nüîÑ Processing Line {i+1}...", end=" ")
        
        # Recognize
        text = recognize_line(line_img)
        full_text.append(text)
        
        print(f"‚úÖ")
        
        # Display line and prediction
        fig, ax = plt.subplots(1, 1, figsize=(12, 2))
        ax.imshow(line_img, cmap='gray')
        ax.axis('off')
        ax.set_title(f"Line {i+1}: '{text}'", fontsize=12, fontweight='bold')
        plt.tight_layout()
        plt.show()
        
        print(f"üìù Text: {text}")
        print("-" * 80)
    
    print("\n" + "="*80)
    print("FULL TEXT OUTPUT")
    print("="*80)
    for i, line in enumerate(full_text, 1):
        print(f"{i}. {line}")
    print("="*80)

## 6Ô∏è‚É£ Save Results

In [None]:
# Save to text file
if 'full_text' in locals():
    output_file = 'recognized_text.txt'
    
    with open(output_file, 'w') as f:
        for i, line in enumerate(full_text, 1):
            f.write(f"{i}. {line}\n")
    
    print(f"‚úÖ Results saved to {output_file}")
    
    # Download
    files.download(output_file)
    print("‚úÖ File downloaded!")

## üìä Summary

**What this notebook does:**
1. **Auto-segments** your handwritten page into text lines
2. **Recognizes** each line using Microsoft's TrOCR
3. **Outputs** the complete text

**Why TrOCR?**
- State-of-the-art accuracy on handwriting
- Transformer-based (better than CNN+RNN)
- Pre-trained on large handwriting datasets
- Easy to use, no complex dependencies

**For your internship assignment:**
- ‚úÖ This handles **Task 1: OCR**
- Next: Build **Task 2: Q&A Separation** (rule-based, no LLMs)