In [1]:
import os
import cv2
import numpy as np
import pytesseract
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import json
import time
from datetime import datetime

def save_results_to_file(results, output_dir, timestamp):
    """Save results directly to files without using pandas"""
    # Save detailed JSON results
    json_path = os.path.join(output_dir, f"detailed_results_{timestamp}.json")
    with open(json_path, 'w') as f:
        json.dump(results, f, indent=4)
    
    csv_path = os.path.join(output_dir, f"summary_{timestamp}.csv")
    with open(csv_path, 'w') as f:
        f.write("filename,status,text_length,best_method\n")
        # Write data
        for r in results:
            text_length = len(r['text']) if r['status'] == 'success' and 'text' in r else 0
            best_method = r.get('best_method', '')
            f.write(f"{r['filename']},{r['status']},{text_length},{best_method}\n")
            
class EnhancedOCR:
    def __init__(self, input_folder, output_folder, num_threads=4):
        """Initialize OCR processor"""
        self.input_folder = input_folder
        self.output_folder = output_folder
        self.num_threads = num_threads
        self.create_output_dirs()

    def create_output_dirs(self):
        """Create output directory structure"""
        self.processed_dir = os.path.join(self.output_folder, 'processed_images')
        self.results_dir = os.path.join(self.output_folder, 'results')
        os.makedirs(self.processed_dir, exist_ok=True)
        os.makedirs(self.results_dir, exist_ok=True)

    def enhance_image(self, image):
        """Advanced image preprocessing pipeline"""
        try:
            if len(image.shape) == 3:
                gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            else:
                gray = image.copy()

            max_dim = 2000
            height, width = gray.shape
            if max(height, width) > max_dim:
                scale = max_dim / max(height, width)
                gray = cv2.resize(gray, None, fx=scale, fy=scale)

            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
            enhanced = clahe.apply(gray)

            denoised = cv2.fastNlMeansDenoising(enhanced)

            binary_adaptive = cv2.adaptiveThreshold(
                denoised, 255, 
                cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                cv2.THRESH_BINARY, 11, 2
            )

            _, binary_otsu = cv2.threshold(
                denoised, 0, 255, 
                cv2.THRESH_BINARY + cv2.THRESH_OTSU
            )

            return {
                'original': gray,
                'enhanced': enhanced,
                'binary_adaptive': binary_adaptive,
                'binary_otsu': binary_otsu
            }
        except Exception as e:
            print(f"Error in image enhancement: {str(e)}")
            return None

    def extract_text(self, image_dict):
        """Extract text using different preprocessing variants"""
        results = {}
        custom_config = r'--oem 3 --psm 6'
        
        for img_type, img in image_dict.items():
            try:
                text = pytesseract.image_to_string(img, config=custom_config)
                results[img_type] = text.strip()
            except Exception as e:
                results[img_type] = f"Error: {str(e)}"
                
        return results

    def process_single_image(self, image_file):
        """Process a single image through the pipeline"""
        try:
            image_path = os.path.join(self.input_folder, image_file)
            image = cv2.imread(image_path)
            
            if image is None:
                return {
                    'filename': image_file,
                    'status': 'error',
                    'error': 'Failed to load image'
                }

            enhanced_images = self.enhance_image(image)
            if enhanced_images is None:
                return {
                    'filename': image_file,
                    'status': 'error',
                    'error': 'Failed to enhance image'
                }

            for img_type, img in enhanced_images.items():
                output_path = os.path.join(
                    self.processed_dir,
                    f"{os.path.splitext(image_file)[0]}_{img_type}.png"
                )
                cv2.imwrite(output_path, img)

            text_results = self.extract_text(enhanced_images)

            best_result = max(text_results.items(), key=lambda x: len(x[1]))

            return {
                'filename': image_file,
                'status': 'success',
                'best_method': best_result[0],
                'text': best_result[1],
                'all_results': text_results
            }

        except Exception as e:
            return {
                'filename': image_file,
                'status': 'error',
                'error': str(e)
            }

    def process_all_images(self):
        """Process all images using thread pool"""
        start_time = time.time()
        
        image_files = [f for f in os.listdir(self.input_folder) 
                      if f.lower().endswith(('.jpg', '.jpeg', '.png', '.tiff'))]
        
        results = []
        with ThreadPoolExecutor(max_workers=self.num_threads) as executor:
            futures = {
                executor.submit(self.process_single_image, image_file): image_file 
                for image_file in image_files
            }
            
            with tqdm(total=len(image_files), desc="Processing images") as pbar:
                for future in as_completed(futures):
                    result = future.result()
                    results.append(result)
                    pbar.update(1)

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        save_results_to_file(results, self.results_dir, timestamp)

        processing_time = time.time() - start_time
        stats = {
            'total_images': len(image_files),
            'successful': len([r for r in results if r['status'] == 'success']),
            'failed': len([r for r in results if r['status'] == 'error']),
            'processing_time': processing_time,
            'average_time_per_image': processing_time / len(image_files)
        }

        stats_path = os.path.join(self.results_dir, f"processing_stats_{timestamp}.json")
        with open(stats_path, 'w') as f:
            json.dump(stats, f, indent=4)

        return stats, results
    
input_folder = "images"
output_folder = "output"

ocr = EnhancedOCR(input_folder, output_folder, num_threads=4)
stats, results = ocr.process_all_images()

print("\nProcessing Summary:")
print(f"Total images processed: {stats['total_images']}")
print(f"Successful: {stats['successful']}")
print(f"Failed: {stats['failed']}")
print(f"Total processing time: {stats['processing_time']:.2f} seconds")
print(f"Average time per image: {stats['average_time_per_image']:.2f} seconds")

In [5]:
import os
import cv2
import numpy as np
import easyocr
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import json
import time
from datetime import datetime

def save_results_to_file(results, output_dir, timestamp):
    """Save results directly to files without using pandas"""
    # Save detailed JSON results
    json_path = os.path.join(output_dir, f"detailed_results_{timestamp}.json")
    with open(json_path, 'w') as f:
        json.dump(results, f, indent=4)
    
    csv_path = os.path.join(output_dir, f"summary_{timestamp}.csv")
    with open(csv_path, 'w') as f:
        f.write("filename,status,text_length,best_method\n")
        # Write data
        for r in results:
            text_length = len(r['text']) if r['status'] == 'success' and 'text' in r else 0
            best_method = r.get('best_method', '')
            f.write(f"{r['filename']},{r['status']},{text_length},{best_method}\n")
            
class EnhancedOCR:
    def __init__(self, input_folder, output_folder, num_threads=4):
        """Initialize OCR processor with EasyOCR"""
        self.input_folder = input_folder
        self.output_folder = output_folder
        self.num_threads = num_threads
        self.reader = easyocr.Reader(['en'], gpu=True)  # Initialize EasyOCR Reader
        self.create_output_dirs()

    def create_output_dirs(self):
        """Create output directory structure"""
        self.processed_dir = os.path.join(self.output_folder, 'processed_images')
        self.results_dir = os.path.join(self.output_folder, 'results')
        os.makedirs(self.processed_dir, exist_ok=True)
        os.makedirs(self.results_dir, exist_ok=True)

    def enhance_image(self, image):
        """Advanced image preprocessing pipeline"""
        try:
            if len(image.shape) == 3:
                gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            else:
                gray = image.copy()

            max_dim = 2000
            height, width = gray.shape
            if max(height, width) > max_dim:
                scale = max_dim / max(height, width)
                gray = cv2.resize(gray, None, fx=scale, fy=scale)

            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
            enhanced = clahe.apply(gray)

            denoised = cv2.fastNlMeansDenoising(enhanced)

            binary_adaptive = cv2.adaptiveThreshold(
                denoised, 255, 
                cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                cv2.THRESH_BINARY, 11, 2
            )

            _, binary_otsu = cv2.threshold(
                denoised, 0, 255, 
                cv2.THRESH_BINARY + cv2.THRESH_OTSU
            )

            return {
                'original': gray,
                'enhanced': enhanced,
                'binary_adaptive': binary_adaptive,
                'binary_otsu': binary_otsu
            }
        except Exception as e:
            print(f"Error in image enhancement: {str(e)}")
            return None

    def extract_text(self, image_dict):
        """Extract text using EasyOCR"""
        results = {}
        for img_type, img in image_dict.items():
            try:
                # EasyOCR requires images in RGB format
                if len(img.shape) == 2:  # If grayscale, convert to RGB
                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
                text_result = self.reader.readtext(img, detail=0)
                results[img_type] = " ".join(text_result).strip()
            except Exception as e:
                results[img_type] = f"Error: {str(e)}"
        return results

    def process_single_image(self, image_file):
        """Process a single image through the pipeline"""
        try:
            image_path = os.path.join(self.input_folder, image_file)
            image = cv2.imread(image_path)
            
            if image is None:
                return {
                    'filename': image_file,
                    'status': 'error',
                    'error': 'Failed to load image'
                }

            enhanced_images = self.enhance_image(image)
            if enhanced_images is None:
                return {
                    'filename': image_file,
                    'status': 'error',
                    'error': 'Failed to enhance image'
                }

            for img_type, img in enhanced_images.items():
                output_path = os.path.join(
                    self.processed_dir,
                    f"{os.path.splitext(image_file)[0]}_{img_type}.png"
                )
                cv2.imwrite(output_path, img)

            text_results = self.extract_text(enhanced_images)

            best_result = max(text_results.items(), key=lambda x: len(x[1]))

            return {
                'filename': image_file,
                'status': 'success',
                'best_method': best_result[0],
                'text': best_result[1],
                'all_results': text_results
            }

        except Exception as e:
            return {
                'filename': image_file,
                'status': 'error',
                'error': str(e)
            }

    def process_all_images(self):
        """Process all images using thread pool"""
        start_time = time.time()
        
        image_files = [f for f in os.listdir(self.input_folder) 
                      if f.lower().endswith(('.jpg', '.jpeg', '.png', '.tiff'))]
        
        results = []
        with ThreadPoolExecutor(max_workers=self.num_threads) as executor:
            futures = {
                executor.submit(self.process_single_image, image_file): image_file 
                for image_file in image_files
            }
            
            with tqdm(total=len(image_files), desc="Processing images") as pbar:
                for future in as_completed(futures):
                    result = future.result()
                    results.append(result)
                    pbar.update(1)

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        save_results_to_file(results, self.results_dir, timestamp)

        processing_time = time.time() - start_time
        stats = {
            'total_images': len(image_files),
            'successful': len([r for r in results if r['status'] == 'success']),
            'failed': len([r for r in results if r['status'] == 'error']),
            'processing_time': processing_time,
            'average_time_per_image': processing_time / len(image_files) if image_files else 0
        }

        stats_path = os.path.join(self.results_dir, f"processing_stats_{timestamp}.json")
        with open(stats_path, 'w') as f:
            json.dump(stats, f, indent=4)

        return stats, results
    
input_folder = "images"
output_folder = "output1"

ocr = EnhancedOCR(input_folder, output_folder, num_threads=4)
stats, results = ocr.process_all_images()

print("\nProcessing Summary:")
print(f"Total images processed: {stats['total_images']}")
print(f"Successful: {stats['successful']}")
print(f"Failed: {stats['failed']}")
print(f"Total processing time: {stats['processing_time']:.2f} seconds")
print(f"Average time per image: {stats['average_time_per_image']:.2f} seconds")


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
Processing images: 100%|██████████| 427/427 [48:21<00:00,  6.79s/it]


Processing Summary:
Total images processed: 427
Successful: 427
Failed: 0
Total processing time: 2901.19 seconds
Average time per image: 6.79 seconds



