In [None]:
# install required libraries
!pip install google-cloud-vision openpyxl --quiet

In [None]:
# Step 1.1: Import and verify core libraries
import sys
import pkg_resources
import importlib

# Test essential libraries
required_packages = [
    'opencv-python',
    'pillow',
    'numpy',
    'pandas',
    'matplotlib',
    'scikit-learn',
    'tensorflow',  # or 'torch' if using PyTorch
    'pytesseract',
    'easyocr'
]

print("Checking required packages...")
missing_packages = []

for package in required_packages:
    try:
        if package == 'opencv-python':
            import cv2
            print(f"✓ OpenCV: {cv2.__version__}")
        elif package == 'pillow':
            import PIL
            print(f"✓ Pillow: {PIL.__version__}")
        elif package == 'numpy':
            import numpy as np
            print(f"✓ NumPy: {np.__version__}")
        elif package == 'pandas':
            import pandas as pd
            print(f"✓ Pandas: {pd.__version__}")
        elif package == 'matplotlib':
            import matplotlib
            print(f"✓ Matplotlib: {matplotlib.__version__}")
        elif package == 'scikit-learn':
            import sklearn
            print(f"✓ Scikit-learn: {sklearn.__version__}")
        elif package == 'tensorflow':
            import tensorflow as tf
            print(f"✓ TensorFlow: {tf.__version__}")
        elif package == 'pytesseract':
            import pytesseract
            print(f"✓ PyTesseract: Available")
        elif package == 'easyocr':
            import easyocr
            print(f"✓ EasyOCR: Available")
    except ImportError:
        missing_packages.append(package)
        print(f"✗ {package}: Not installed")

if missing_packages:
    print(f"\nMissing packages: {missing_packages}")
    print("Please install using: pip install", " ".join(missing_packages))
else:
    print("\n✓ All required packages are installed!")

In [None]:
# Step 1.2a: Install OpenCV
import subprocess
import sys

def install_package(package_name):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
        print(f"✓ Successfully installed {package_name}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"✗ Failed to install {package_name}: {e}")
        return False

# Install OpenCV first
print("Installing OpenCV...")
install_package("opencv-python")

In [None]:
# Step 1.2b: Install TensorFlow
print("Installing TensorFlow...")
install_package("tensorflow")

In [None]:
# Step 1.2c: Install OCR libraries
print("Installing PyTesseract...")
install_package("pytesseract")

In [None]:
# Step 1.2d: Install EasyOCR
print("Installing EasyOCR...")
install_package("easyocr")

In [None]:
# Step 1.3: Final verification of all installations
print("Verifying all installations...")

try:
    import cv2
    print(f"✓ OpenCV: {cv2.__version__}")
except ImportError as e:
    print(f"✗ OpenCV: {e}")

try:
    import tensorflow as tf
    print(f"✓ TensorFlow: {tf.__version__}")
except ImportError as e:
    print(f"✗ TensorFlow: {e}")

try:
    import pytesseract
    print(f"✓ PyTesseract: Available")
except ImportError as e:
    print(f"✗ PyTesseract: {e}")

try:
    import easyocr
    print(f"✓ EasyOCR: Available")
except ImportError as e:
    print(f"✗ EasyOCR: {e}")

print("\n" + "="*50)
print("HYBRID OCR SYSTEM READY!")
print("Local OCR: EasyOCR + PyTesseract")
print("Cloud OCR: Google Vision API")
print("="*50)

## Step 2: Project Structure Setup

In [None]:
# Step 2.1: Create project directory structure
import os
import json
from pathlib import Path

# Define project structure
project_structure = {
    'data': {
        'raw': 'Original check images',
        'processed': 'Preprocessed images',
        'train': 'Training dataset',
        'test': 'Test dataset',
        'validation': 'Validation dataset'
    },
    'models': 'Trained models storage',
    'outputs': {
        'results': 'OCR results',
        'logs': 'Processing logs',
        'reports': 'Analysis reports'
    },
    'config': 'Configuration files',
    'utils': 'Utility functions'
}

# Create directories
base_dir = Path('bank_check_ocr')
print("Creating project structure...")

def create_directories(structure, parent_path=base_dir):
    for key, value in structure.items():
        current_path = parent_path / key
        current_path.mkdir(parents=True, exist_ok=True)
        
        if isinstance(value, dict):
            create_directories(value, current_path)
        else:
            # Create a README file in each directory
            readme_path = current_path / 'README.md'
            if not readme_path.exists():
                readme_path.write_text(f"# {key.title()} Directory\n\n{value}\n")
        
        print(f"✓ Created: {current_path}")

create_directories(project_structure)
print(f"\n✓ Project structure created successfully!")
print(f"Base directory: {base_dir.absolute()}")

## Step 2.2: Configuration Setup

In [None]:
# Step 2.2: Create configuration file for hybrid OCR system
config = {
    "ocr_settings": {
        "local_ocr": {
            "easyocr": {
                "languages": ["en"],
                "gpu": False,
                "confidence_threshold": 0.5
            },
            "tesseract": {
                "config": "--oem 3 --psm 6",
                "confidence_threshold": 30
            }
        },
        "google_vision": {
            "api_key_path": "config/google_vision_api_key.json",
            "features": ["TEXT_DETECTION", "DOCUMENT_TEXT_DETECTION"],
            "confidence_threshold": 0.8
        }
    },
    "image_processing": {
        "resize_width": 1200,
        "resize_height": 800,
        "dpi": 300,
        "preprocessing_steps": [
            "grayscale",
            "noise_reduction",
            "contrast_enhancement",
            "deskew"
        ]
    },
    "hybrid_logic": {
        "use_local_first": True,
        "fallback_to_cloud": True,
        "confidence_comparison": True,
        "cost_optimization": True
    }
}

# Save configuration
config_path = base_dir / 'config' / 'ocr_config.json'
with open(config_path, 'w') as f:
    json.dump(config, f, indent=4)

print("✓ Configuration file created!")
print(f"Config saved to: {config_path}")
print("\nConfiguration preview:")
print(json.dumps(config, indent=2))

# Step 3: Initialize Hybrid OCR System

In [None]:
# Step 3.1: Initialize OCR engines
import cv2
import numpy as np
import easyocr
import pytesseract
from PIL import Image
import json
import time

# Load configuration
with open('bank_check_ocr/config/ocr_config.json', 'r') as f:
    config = json.load(f)

print("Initializing OCR engines...")

# Initialize EasyOCR (this will download models on first run)
print("Loading EasyOCR...")
try:
    reader = easyocr.Reader(['en'], gpu=False)
    print("✓ EasyOCR initialized successfully")
except Exception as e:
    print(f"✗ EasyOCR initialization failed: {e}")

# Test PyTesseract
print("Testing PyTesseract...")
try:
    # Create a simple test image
    test_img = np.ones((100, 300, 3), dtype=np.uint8) * 255
    cv2.putText(test_img, "TEST OCR", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
    
    # Test tesseract
    pil_img = Image.fromarray(test_img)
    test_result = pytesseract.image_to_string(pil_img)
    print(f"✓ PyTesseract working - Test result: '{test_result.strip()}'")
    
except Exception as e:
    print(f"✗ PyTesseract test failed: {e}")

print("\n" + "="*50)
print("OCR ENGINES INITIALIZED!")
print("="*50)

In [None]:
# Step 3.1b: Install Tesseract executable
print("Installing Tesseract executable...")

# For Windows users (most common case)
import platform
import subprocess
import sys

system = platform.system()
print(f"Detected system: {system}")

if system == "Windows":
    print("Installing Tesseract for Windows...")
    try:
        # Install using conda-forge (most reliable for Windows)
        subprocess.check_call([sys.executable, "-m", "pip", "install", "tesseract"])
        print("✓ Tesseract package installed")
    except:
        print("Pip install failed, trying alternative...")
        
    # Alternative: Download and install instructions
    print("\nIf installation fails, please:")
    print("1. Download Tesseract from: https://github.com/UB-Mannheim/tesseract/wiki")
    print("2. Install it to C:\\Program Files\\Tesseract-OCR")
    print("3. Add C:\\Program Files\\Tesseract-OCR to your PATH")
    
elif system == "Linux":
    print("For Linux, run: sudo apt-get install tesseract-ocr")
    
elif system == "Darwin":  # macOS
    print("For macOS, run: brew install tesseract")

print("\nLet's try a different approach - using only EasyOCR for now...")

In [None]:
# Step 3.2: Create Hybrid OCR class (EasyOCR focused)
class HybridOCR:
    def __init__(self, config):
        self.config = config
        self.easyocr_reader = reader  # Use the reader we initialized above
        self.results_log = []
        self.tesseract_available = False
        
        # Test if Tesseract is available
        try:
            test_img = np.ones((50, 200, 3), dtype=np.uint8) * 255
            cv2.putText(test_img, "TEST", (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
            pil_img = Image.fromarray(test_img)
            pytesseract.image_to_string(pil_img)
            self.tesseract_available = True
            print("✓ Tesseract is available")
        except:
            print("⚠ Tesseract not available - using EasyOCR only")
        
    def preprocess_image(self, image_path):
        """Preprocess image for better OCR results"""
        try:
            # Read image
            img = cv2.imread(str(image_path))
            if img is None:
                raise ValueError(f"Could not load image: {image_path}")
            
            # Convert to grayscale
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            # Resize if needed
            height, width = gray.shape
            if width > self.config['image_processing']['resize_width']:
                scale = self.config['image_processing']['resize_width'] / width
                new_width = int(width * scale)
                new_height = int(height * scale)
                gray = cv2.resize(gray, (new_width, new_height))
            
            # Noise reduction
            denoised = cv2.medianBlur(gray, 3)
            
            # Contrast enhancement
            enhanced = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)).apply(denoised)
            
            return enhanced
            
        except Exception as e:
            print(f"Preprocessing error: {e}")
            return None
    
    def easyocr_extract(self, image):
        """Extract text using EasyOCR"""
        try:
            results = self.easyocr_reader.readtext(image)
            
            extracted_data = []
            for (bbox, text, confidence) in results:
                if confidence >= self.config['ocr_settings']['local_ocr']['easyocr']['confidence_threshold']:
                    extracted_data.append({
                        'text': text,
                        'confidence': confidence,
                        'bbox': bbox,
                        'engine': 'easyocr'
                    })
            
            return extracted_data
            
        except Exception as e:
            print(f"EasyOCR extraction error: {e}")
            return []
    
    def process_image(self, image_path):
        """Main processing function"""
        print(f"Processing: {image_path}")
        
        # Preprocess image
        processed_img = self.preprocess_image(image_path)
        if processed_img is None:
            return None
        
        # Extract text using EasyOCR
        results = self.easyocr_extract(processed_img)
        
        # Log results
        self.results_log.append({
            'image_path': str(image_path),
            'timestamp': time.time(),
            'results': results,
            'engine_used': 'easyocr'
        })
        
        return results

# Initialize our hybrid OCR system
hybrid_ocr = HybridOCR(config)
print("✓ Hybrid OCR class created successfully!")
print(f"Tesseract available: {hybrid_ocr.tesseract_available}")

In [None]:
# Step 4.1: Create sample test images that simulate bank check text
import matplotlib.pyplot as plt
import os

def create_test_images():
    """Create sample images that simulate bank check elements"""
    
    # Create test images directory
    test_dir = Path('bank_check_ocr/data/test')
    test_dir.mkdir(exist_ok=True)
    
    # Test Image 1: Simple bank routing/account numbers
    img1 = np.ones((200, 600, 3), dtype=np.uint8) * 255
    cv2.putText(img1, "ROUTING: 021000021", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
    cv2.putText(img1, "ACCOUNT: 1234567890", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
    cv2.putText(img1, "CHECK #: 001", (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
    cv2.imwrite(str(test_dir / 'test_check_1.png'), img1)
    
    # Test Image 2: Amount and date
    img2 = np.ones((200, 600, 3), dtype=np.uint8) * 255
    cv2.putText(img2, "DATE: 12/25/2024", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
    cv2.putText(img2, "AMOUNT: $1,250.00", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
    cv2.putText(img2, "PAY TO: JOHN SMITH", (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
    cv2.imwrite(str(test_dir / 'test_check_2.png'), img2)
    
    # Test Image 3: Mixed fonts and sizes
    img3 = np.ones((250, 700, 3), dtype=np.uint8) * 255
    cv2.putText(img3, "BANK OF AMERICA", (50, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2)
    cv2.putText(img3, "123 Main Street", (50, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)
    cv2.putText(img3, "New York, NY 10001", (50, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)
    cv2.putText(img3, "MEMO: Monthly Rent", (50, 200), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
    cv2.imwrite(str(test_dir / 'test_check_3.png'), img3)
    
    print("✓ Created 3 test images")
    return [
        test_dir / 'test_check_1.png',
        test_dir / 'test_check_2.png',
        test_dir / 'test_check_3.png'
    ]

# Create test images
test_images = create_test_images()

# Display one test image to verify
plt.figure(figsize=(10, 4))
sample_img = cv2.imread(str(test_images[0]))
sample_img_rgb = cv2.cvtColor(sample_img, cv2.COLOR_BGR2RGB)
plt.imshow(sample_img_rgb)
plt.title("Sample Test Image")
plt.axis('off')
plt.show()

print(f"Test images created: {len(test_images)}")
for img_path in test_images:
    print(f"  - {img_path.name}")

In [None]:
# Step 4.2: Test OCR on our sample images
def test_ocr_system():
    """Test OCR system on sample images"""
    
    print("Testing OCR system on sample images...")
    print("=" * 60)
    
    for i, img_path in enumerate(test_images, 1):
        print(f"\nTest {i}: {img_path.name}")
        print("-" * 30)
        
        # Process image
        results = hybrid_ocr.process_image(img_path)
        
        if results:
            print(f"Found {len(results)} text elements:")
            for j, result in enumerate(results, 1):
                print(f"  {j}. Text: '{result['text']}'")
                print(f"     Confidence: {result['confidence']:.2f}")
                print(f"     Engine: {result['engine']}")
                print()
        else:
            print("No text detected!")
        
        print("-" * 30)
    
    print("\nOCR Test Complete!")
    print(f"Total images processed: {len(hybrid_ocr.results_log)}")

# Run the test
test_ocr_system()

In [None]:
# Step 5.1: Extract your bank cheque dataset
import zipfile
import os
from pathlib import Path

def extract_dataset():
    """Extract the bank cheque dataset"""
    
    # Look for the zip file
    zip_path = Path('bank_cheque_images_dataset.zip')
    
    if not zip_path.exists():
        print("Dataset zip file not found. Let's check what files are available:")
        current_dir = Path('.')
        for file in current_dir.iterdir():
            if file.suffix == '.zip':
                print(f"Found zip file: {file.name}")
                zip_path = file
                break
    
    if not zip_path.exists():
        print("No zip file found. Please make sure the dataset is uploaded.")
        return None
    
    print(f"Found dataset: {zip_path.name}")
    print(f"Size: {zip_path.stat().st_size / 1024 / 1024:.1f} MB")
    
    # Create extraction directory
    extract_dir = Path('bank_check_ocr/data/raw')
    extract_dir.mkdir(parents=True, exist_ok=True)
    
    # Extract the zip file
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            print("Extracting dataset...")
            zip_ref.extractall(extract_dir)
            print(f"✓ Dataset extracted to: {extract_dir}")
            
            # List contents
            print("\nDataset contents:")
            file_list = zip_ref.namelist()
            print(f"Total files: {len(file_list)}")
            
            # Show first few files
            for i, file_name in enumerate(file_list[:10]):
                print(f"  {i+1}. {file_name}")
            
            if len(file_list) > 10:
                print(f"  ... and {len(file_list) - 10} more files")
                
        return extract_dir
        
    except Exception as e:
        print(f"Error extracting dataset: {e}")
        return None

# Extract the dataset
dataset_path = extract_dataset()

## Step 5.2: Explore the Dataset Structure

In [None]:
# Step 5.2: Explore the extracted dataset
def explore_dataset(dataset_path):
    """Explore the structure of the extracted dataset"""
    
    if dataset_path is None:
        print("No dataset path provided")
        return
    
    print(f"Exploring dataset at: {dataset_path}")
    print("=" * 50)
    
    # Find all image files
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif']
    image_files = []
    
    for ext in image_extensions:
        image_files.extend(list(dataset_path.rglob(f'*{ext}')))
        image_files.extend(list(dataset_path.rglob(f'*{ext.upper()}')))
    
    print(f"Found {len(image_files)} image files")
    
    if image_files:
        print("\nFirst 5 image files:")
        for i, img_path in enumerate(image_files[:5]):
            relative_path = img_path.relative_to(dataset_path)
            file_size = img_path.stat().st_size / 1024  # KB
            print(f"  {i+1}. {relative_path} ({file_size:.1f} KB)")
        
        # Check directory structure
        print("\nDirectory structure:")
        directories = set()
        for img_path in image_files:
            relative_path = img_path.relative_to(dataset_path)
            if relative_path.parent != Path('.'):
                directories.add(str(relative_path.parent))
        
        if directories:
            for directory in sorted(directories):
                count = len([f for f in image_files if directory in str(f.relative_to(dataset_path))])
                print(f"  {directory}: {count} files")
        else:
            print("  All files in root directory")
    
    return image_files

# Explore the dataset
if dataset_path:
    image_files = explore_dataset(dataset_path)
else:
    print("Dataset extraction failed. Please check the zip file.")

In [None]:
# Step 5.3: Load and display sample cheque images
import matplotlib.pyplot as plt

def display_sample_images(image_files, num_samples=4):
    """Display sample images from the dataset"""
    
    if not image_files:
        print("No image files found!")
        return
    
    # Select sample images
    sample_images = image_files[:num_samples]
    
    # Create subplot
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    axes = axes.flatten()
    
    print("Loading sample cheque images...")
    
    for i, img_path in enumerate(sample_images):
        try:
            # Load image
            img = cv2.imread(str(img_path))
            if img is not None:
                # Convert BGR to RGB for matplotlib
                img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                
                # Display image
                axes[i].imshow(img_rgb)
                axes[i].set_title(f"{img_path.name}\n{img.shape[1]}x{img.shape[0]}")
                axes[i].axis('off')
                
                print(f"✓ Loaded {img_path.name}: {img.shape}")
            else:
                print(f"✗ Failed to load {img_path.name}")
                axes[i].text(0.5, 0.5, f"Failed to load\n{img_path.name}", 
                           ha='center', va='center', transform=axes[i].transAxes)
                axes[i].axis('off')
                
        except Exception as e:
            print(f"✗ Error loading {img_path.name}: {e}")
            axes[i].text(0.5, 0.5, f"Error loading\n{img_path.name}", 
                       ha='center', va='center', transform=axes[i].transAxes)
            axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return sample_images

# Display sample images
if 'image_files' in locals() and image_files:
    sample_images = display_sample_images(image_files)
else:
    print("Please run the dataset exploration code first!")

## Step 5: Basic OCR Testing

In [None]:
# Test OCR on your check images
def test_ocr_on_checks(image_files, num_tests=3):
    """Test OCR on actual check images"""
    
    test_images = image_files[:num_tests]
    
    for i, img_path in enumerate(test_images):
        print(f"\n=== Testing {img_path.name} ===")
        
        results = hybrid_ocr.process_image(img_path)
        
        if results:
            print(f"Detected {len(results)} text elements:")
            for j, result in enumerate(results, 1):
                print(f"{j}. '{result['text']}' (confidence: {result['confidence']:.2f})")
        else:
            print("No text detected")

# Run test
test_ocr_on_checks(image_files)

## Step 6: Advanced Preprocessing

In [None]:
# Enhanced preprocessing for better OCR
def advanced_preprocess(image_path):
    """Advanced preprocessing for check images"""
    
    img = cv2.imread(str(image_path))
    if img is None:
        return None
    
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Gaussian blur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                   cv2.THRESH_BINARY, 11, 2)
    
    # Morphological operations
    kernel = np.ones((2, 2), np.uint8)
    cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    return cleaned

# Test advanced preprocessing
test_img = advanced_preprocess(image_files[0])
if test_img is not None:
    results = hybrid_ocr.easyocr_reader.readtext(test_img)
    print(f"Advanced preprocessing results: {len(results)} text elements found")

## Step 7: Install Google Vision API

In [None]:
# Install Google Cloud Vision
import subprocess
import sys

try:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "google-cloud-vision"])
    print("✓ Google Vision API installed")
except Exception as e:
    print(f"Installation error: {e}")

## Step 8: Test Google Vision API

In [2]:
# Test Google Vision API with your key
def test_google_vision_setup():
    """Test Google Vision API setup"""
    
    try:
        from google.cloud import vision
        import os
        
        # Set your API key path
        key_path = "VisionApiKey.json"  # Your uploaded key file
        
        # Check if key file exists
        if not os.path.exists(key_path):
            print(f" API key file not found: {key_path}")
            return False
        
        # Set environment variable
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = key_path
        
        # Test API connection
        client = vision.ImageAnnotatorClient()
        print(" Google Vision API client created successfully!")
        
        return True
        
    except Exception as e:
        print(f" Google Vision setup error: {e}")
        return False

# Test the setup
google_vision_ready = test_google_vision_setup()

 Google Vision API client created successfully!


In [None]:
from google.cloud import vision
import os

# 🛠️ Set your API key path BEFORE running the loop
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "VisionApiKey.json"

def extract_text_google_vision(image_path):
    """
    Extracts text using Google Cloud Vision API from a given image.
    Returns a list of dictionaries with text and bounding box info.
    """
    try:
        client = vision.ImageAnnotatorClient()

        with open(image_path, 'rb') as image_file:
            content = image_file.read()

        image = vision.Image(content=content)
        response = client.text_detection(image=image)

        results = []
        if response.text_annotations:
            for annotation in response.text_annotations:
                text = annotation.description
                vertices = annotation.bounding_poly.vertices
                bbox = [(v.x, v.y) for v in vertices]

                results.append({
                    'text': text,
                    'bbox': bbox,
                    'engine': 'google_vision'
                })

        return results
    
    except Exception as e:
        print(f"❌ Error extracting text from {image_path}: {e}")
        return []


## Process All Images in a Folder

In [23]:
import zipfile

zip_path = "IDRBT_Cheque_Image_Dataset.zip"
extract_path = "cheques_india"

# Unzip if not already done
if not os.path.exists(extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
        print(f"✓ Extracted ZIP to folder: {extract_path}")
else:
    print(f"✓ Folder already exists: {extract_path}")


✓ Folder already exists: cheques_india


In [24]:
image_folder = extract_path  # ✅ Now this is the actual folder
output_records = []

for idx, filename in enumerate(os.listdir(image_folder)):
    if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(image_folder, filename)
        print(f"[{idx+1}] Processing: {filename}")
        
        ocr_results = extract_text_google_vision(image_path)

        for item in ocr_results:
            output_records.append({
                "image": filename,
                "text": item['text'],
                "bbox": item['bbox'],
                "engine": item['engine']
            })


In [25]:
df = pd.DataFrame(output_records)
df.to_excel("cheque_ocr_output2.xlsx", index=False)
print("✅ OCR results saved to cheque_ocr_output1.xlsx")


✅ OCR results saved to cheque_ocr_output1.xlsx


In [26]:
import os

image_folder = "cheques_india"  # or whatever your extracted folder is
output_records = []

valid_extensions = ('.jpg', '.jpeg', '.png')

for root, dirs, files in os.walk(image_folder):
    for filename in files:
        if filename.lower().endswith(valid_extensions):
            image_path = os.path.join(root, filename)
            print(f"🔍 Processing: {image_path}")
            
            ocr_results = extract_text_google_vision(image_path)

            if not ocr_results:
                print("⚠ No text detected!")
            else:
                for item in ocr_results:
                    output_records.append({
                        "image": filename,
                        "text": item['text'],
                        "bbox": item['bbox'],
                        "engine": item['engine']
                    })


In [27]:
import os
import pandas as pd

# 👇 Set your API key here again
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "VisionApiKey.json"

image_folder = "cheques"
output_records = []
valid_extensions = ('.jpg', '.jpeg', '.png')

for root, dirs, files in os.walk(image_folder):
    for filename in files:
        if filename.lower().endswith(valid_extensions):
            image_path = os.path.join(root, filename)
            print(f"🔍 Processing: {image_path}")
            try:
                ocr_results = extract_text_google_vision(image_path)

                if not ocr_results:
                    print("⚠ No text detected!")
                else:
                    for item in ocr_results:
                        output_records.append({
                            "image": filename,
                            "text": item['text'],
                            "bbox": item['bbox'],
                            "engine": item['engine']
                        })
            except Exception as e:
                print(f"❌ Error extracting text from {image_path}: {e}")

# Save output
df = pd.DataFrame(output_records)
if not df.empty:
    df.to_excel("cheque_ocr_output.xlsx", index=False)
    print("✅ OCR results saved to cheque_ocr_output.xlsx")
else:
    print("⚠ No OCR results to save.")


🔍 Processing: cheques\TestSet\X\X_013.jpeg
🔍 Processing: cheques\TestSet\X\X_014.jpeg
🔍 Processing: cheques\TestSet\X\X_015.jpeg
🔍 Processing: cheques\TestSet\X\X_016.jpeg
🔍 Processing: cheques\TestSet\X\X_017.jpeg
🔍 Processing: cheques\TestSet\X\X_018.jpeg
🔍 Processing: cheques\TestSet\X\X_019.jpeg
🔍 Processing: cheques\TestSet\X\X_020.jpeg
🔍 Processing: cheques\TestSet\X\X_021.jpeg
🔍 Processing: cheques\TestSet\X\X_022.jpeg
🔍 Processing: cheques\TestSet\X\X_023.jpeg
🔍 Processing: cheques\TestSet\X\X_024.jpeg
🔍 Processing: cheques\TestSet\X\X_025.jpeg
🔍 Processing: cheques\TestSet\X\X_026.jpeg
🔍 Processing: cheques\TestSet\X\X_027.jpeg
🔍 Processing: cheques\TestSet\X\X_028.jpeg
🔍 Processing: cheques\TestSet\X\X_029.jpeg
🔍 Processing: cheques\TestSet\X\X_030.jpeg
🔍 Processing: cheques\TestSet\X\X_031.jpeg
🔍 Processing: cheques\TestSet\X\X_032.jpeg
🔍 Processing: cheques\TestSet\X\X_033.jpeg
🔍 Processing: cheques\TestSet\X\X_034.jpeg
🔍 Processing: cheques\TestSet\X\X_035.jpeg
🔍 Processin

In [12]:
from collections import defaultdict

# Group text by image
image_text_map = defaultdict(str)

for record in output_records:
    image = record['image']
    text = record['text']
    image_text_map[image] += " " + text


In [28]:
import re

def extract_fields(text_block):
    return {
        "Loan Account Number": re.search(r'\b\d{9,15}\b', text_block),
        "IFSC Code": re.search(r'[A-Z]{4}0[A-Z0-9]{6}', text_block),
        "MICR Code": re.search(r'\b\d{9}\b', text_block),
        "Installment Date": re.search(r'\b\d{2}[-/]\d{2}[-/]\d{4}\b', text_block),
        "Instrument Amount": re.search(r'₹?\s?\d{1,3}(,\d{3})*(\.\d{2})?', text_block),
        "Account Name": re.search(r'Pay to the Order of\s+([A-Za-z ]+)', text_block),
    }


In [29]:
extracted_structured_data = []

for image, text in image_text_map.items():
    fields = extract_fields(text)
    structured = {"image": image}
    for key, match in fields.items():
        structured[key] = match.group(0) if match else None
    extracted_structured_data.append(structured)


In [31]:
df_structured = pd.DataFrame(extracted_structured_data)
df_structured.to_excel("cheque_final_output.xlsx", index=False)
print("Final OCR output saved!")


Final OCR output saved!
