# 1-With KERAS

In [3]:
!pip install paddleocr keras-ocr easyocr opencv-python-headless pandas paddlepaddle

Collecting paddlepaddle
  Downloading paddlepaddle-2.6.2-cp310-cp310-manylinux1_x86_64.whl.metadata (8.6 kB)
Collecting httpx (from paddlepaddle)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting astor (from paddlepaddle)
  Downloading astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting httpcore==1.* (from httpx->paddlepaddle)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx->paddlepaddle)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading paddlepaddle-2.6.2-cp310-cp310-manylinux1_x86_64.whl (126.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Downloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpc

In [None]:

import cv2
import easyocr
import keras_ocr
from paddleocr import PaddleOCR
import pandas as pd
from datetime import datetime
import os

def ocr_with_paddle(img_path):
    finaltext = ''
    ocr = PaddleOCR(lang='en', use_angle_cls=True)
    result = ocr.ocr(img_path)

    for i in range(len(result[0])):
        text = result[0][i][1][0]
        finaltext += ' ' + text
    return finaltext

def ocr_with_keras(img_path):
    output_text = ''
    pipeline = keras_ocr.pipeline.Pipeline()
    images = [keras_ocr.tools.read(img_path)]
    predictions = pipeline.recognize(images)
    first = predictions[0]
    for text, box in first:
        output_text += ' ' + text
    return output_text

def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

def thresholding(src):
    return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1]

def ocr_with_easy(img_path):
    img = cv2.imread(img_path)
    gray_scale_image = get_grayscale(img)
    thresholding(gray_scale_image)
    cv2.imwrite('temp_image.png', gray_scale_image)
    reader = easyocr.Reader(['th','en'])
    bounds = reader.readtext('temp_image.png', paragraph="False", detail = 0)
    bounds = ''.join(bounds)
    return bounds

def generate_ocr(method, img_path):
    if method == 'EasyOCR':
        text_output = ocr_with_easy(img_path)
    elif method == 'KerasOCR':
        text_output = ocr_with_keras(img_path)
    elif method == 'PaddleOCR':
        text_output = ocr_with_paddle(img_path)
    else:
        raise ValueError("Invalid OCR method")

    # Create DataFrame and save to CSV
    df = pd.DataFrame({'Method': [method], 'OCR Result': [text_output]})
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    img_name = os.path.splitext(os.path.basename(img_path))[0]
    csv_filename = f'ocr_result_{img_name}_{timestamp}.csv'
    df.to_csv(csv_filename, index=False)

    return text_output, csv_filename

# Process multiple images
def process_images(method, img_paths):
    results = []
    for img_path in img_paths:
        try:
            result, csv_file = generate_ocr(method, img_path)
            results.append((img_path, result, csv_file))
            print(f"Processed {img_path}. Results saved to: {csv_file}")
        except Exception as e:
            print(f"An error occurred while processing {img_path}: {str(e)}")
    return results

# Simple command-line interface
if __name__ == "__main__":
    print("Welcome to the OCR Tool")
    img_paths = input("Enter the paths to your images (separated by commas): ").split(',')
    img_paths = [path.strip() for path in img_paths]  # Remove any whitespace

    print("\nChoose OCR method:")
    print("1. PaddleOCR")
    print("2. EasyOCR")
    print("3. KerasOCR")
    choice = input("Enter your choice (1-3): ")

    method_map = {"1": "PaddleOCR", "2": "EasyOCR", "3": "KerasOCR"}
    method = method_map.get(choice)

    if method:
        results = process_images(method, img_paths)
        print("\nProcessing complete. Summary:")
        for img_path, result, csv_file in results:
            print(f"\nImage: {img_path}")
            print(f"OCR Result: {result[:100]}...")  # Print first 100 characters
            print(f"Full results saved to: {csv_file}")
    else:
        print("Invalid choice. Please run the script again and select a valid option.")

# 2-With Tesseract

In [11]:
!apt-get install tesseract-ocr
!pip install paddleocr easyocr opencv-python-headless pandas pytesseract

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 3 newly installed, 0 to remove and 49 not upgraded.
Need to get 4,816 kB of archives.
After this operation, 15.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 4.1.1-2.1build1 [236 kB]
Fetched 4,816 kB in 3s (1,778 kB/s)
Selecting previously unselected package tesseract-ocr-eng.
(Reading database ... 123599 files and directories currently installed.)
Preparing to unpack .../tesseract-ocr-

In [14]:


import cv2
import easyocr
from paddleocr import PaddleOCR
import pandas as pd
from datetime import datetime
import os
import pytesseract

def ocr_with_paddle(img_path):
    finaltext = ''
    ocr = PaddleOCR(lang='en', use_angle_cls=True)
    result = ocr.ocr(img_path)

    for i in range(len(result[0])):
        text = result[0][i][1][0]
        finaltext += ' ' + text
    return finaltext

def ocr_with_tesseract(img_path):
    img = cv2.imread(img_path)
    text = pytesseract.image_to_string(img)
    return text

def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

def thresholding(src):
    return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1]

def ocr_with_easy(img_path):
    img = cv2.imread(img_path)
    gray_scale_image = get_grayscale(img)
    thresholding(gray_scale_image)
    cv2.imwrite('temp_image.png', gray_scale_image)
    reader = easyocr.Reader(['th','en'])
    bounds = reader.readtext('temp_image.png', paragraph="False", detail = 0)
    bounds = ''.join(bounds)
    return bounds

def generate_ocr(method, img_path):
    if method == 'EasyOCR':
        text_output = ocr_with_easy(img_path)
    elif method == 'Tesseract':
        text_output = ocr_with_tesseract(img_path)
    elif method == 'PaddleOCR':
        text_output = ocr_with_paddle(img_path)
    else:
        raise ValueError("Invalid OCR method")

    # Create DataFrame and save to CSV
    df = pd.DataFrame({'Method': [method], 'OCR Result': [text_output]})
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    img_name = os.path.splitext(os.path.basename(img_path))[0]
    csv_filename = f'ocr_result_{img_name}_{timestamp}.csv'
    df.to_csv(csv_filename, index=False)

    return text_output, csv_filename

# Process multiple images
def process_images(method, img_paths):
    results = []
    for img_path in img_paths:
        try:
            result, csv_file = generate_ocr(method, img_path)
            results.append((img_path, result, csv_file))
            print(f"Processed {img_path}. Results saved to: {csv_file}")
        except Exception as e:
            print(f"An error occurred while processing {img_path}: {str(e)}")
    return results

# Simple command-line interface
if __name__ == "__main__":
    print("Welcome to the OCR Tool")
    img_paths = input("Enter the paths to your images (separated by commas): ").split(',')
    img_paths = [path.strip() for path in img_paths]  # Remove any whitespace

    print("\nChoose OCR method:")
    print("1. PaddleOCR")
    print("2. EasyOCR")
    print("3. Tesseract")
    choice = input("Enter your choice (1-3): ")

    method_map = {"1": "PaddleOCR", "2": "EasyOCR", "3": "Tesseract"}
    method = method_map.get(choice)

    if method:
        results = process_images(method, img_paths)
        print("\nProcessing complete. Summary:")
        for img_path, result, csv_file in results:
            print(f"\nImage: {img_path}")
            print(f"OCR Result: {result[:100]}...")  # Print first 100 characters
            print(f"Full results saved to: {csv_file}")
    else:
        print("Invalid choice. Please run the script again and select a valid option.")

Welcome to the OCR Tool
Enter the paths to your images (separated by commas): /content/MobPhoto_5.jpg

Choose OCR method:
1. PaddleOCR
2. EasyOCR
3. Tesseract
Enter your choice (1-3): 3
Processed /content/MobPhoto_5.jpg. Results saved to: ocr_result_MobPhoto_5_20240930_194307.csv

Processing complete. Summary:

Image: /content/MobPhoto_5.jpg
OCR Result:  

  

i

CS.

ABBYY Mobile OCR Engine 3.0

Compact Code OCR for Mobile Devices

What is the ABBYY M...
Full results saved to: ocr_result_MobPhoto_5_20240930_194307.csv


# 3- KAPALI DEVRE YOL HARİTASI

In [None]:
pip download paddleocr paddlepaddle easyocr opencv-python-headless pandas pytesseract

In [None]:
pip install --no-index --find-links /path/to/downloaded/packages paddleocr paddlepaddle easyocr opencv-python-headless pandas pytesseract

In [None]:
# Bu script, kapalı devre bir ortamda çalışmak üzere tasarlanmıştır.
# Kullanmadan önce aşağıdaki kütüphanelerin manuel olarak yüklenmiş olduğundan emin olun:
# - tesseract-ocr
# - paddleocr
# - paddlepaddle
# - easyocr
# - opencv-python-headless
# - pandas
# - pytesseract

import cv2
import easyocr
from paddleocr import PaddleOCR
import pandas as pd
from datetime import datetime
import os
import pytesseract

def ocr_with_paddle(img_path):
    finaltext = ''
    ocr = PaddleOCR(lang='en', use_angle_cls=True)
    result = ocr.ocr(img_path)

    for i in range(len(result[0])):
        text = result[0][i][1][0]
        finaltext += ' ' + text
    return finaltext

def ocr_with_tesseract(img_path):
    img = cv2.imread(img_path)
    text = pytesseract.image_to_string(img)
    return text

def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

def thresholding(src):
    return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1]

def ocr_with_easy(img_path):
    img = cv2.imread(img_path)
    gray_scale_image = get_grayscale(img)
    thresholding(gray_scale_image)
    cv2.imwrite('temp_image.png', gray_scale_image)
    reader = easyocr.Reader(['th','en'])
    bounds = reader.readtext('temp_image.png', paragraph="False", detail = 0)
    bounds = ''.join(bounds)
    return bounds

def generate_ocr(method, img_path):
    if method == 'EasyOCR':
        text_output = ocr_with_easy(img_path)
    elif method == 'Tesseract':
        text_output = ocr_with_tesseract(img_path)
    elif method == 'PaddleOCR':
        text_output = ocr_with_paddle(img_path)
    else:
        raise ValueError("Invalid OCR method")

    # Create DataFrame and save to CSV
    df = pd.DataFrame({'Method': [method], 'OCR Result': [text_output]})
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    img_name = os.path.splitext(os.path.basename(img_path))[0]
    csv_filename = f'ocr_result_{img_name}_{timestamp}.csv'
    df.to_csv(csv_filename, index=False)

    return text_output, csv_filename

# Process multiple images
def process_images(method, img_paths):
    results = []
    for img_path in img_paths:
        try:
            result, csv_file = generate_ocr(method, img_path)
            results.append((img_path, result, csv_file))
            print(f"Processed {img_path}. Results saved to: {csv_file}")
        except Exception as e:
            print(f"An error occurred while processing {img_path}: {str(e)}")
    return results

# Simple command-line interface
if __name__ == "__main__":
    print("Welcome to the OCR Tool")
    img_paths = input("Enter the paths to your images (separated by commas): ").split(',')
    img_paths = [path.strip() for path in img_paths]  # Remove any whitespace

    print("\nChoose OCR method:")
    print("1. PaddleOCR")
    print("2. EasyOCR")
    print("3. Tesseract")
    choice = input("Enter your choice (1-3): ")

    method_map = {"1": "PaddleOCR", "2": "EasyOCR", "3": "Tesseract"}
    method = method_map.get(choice)

    if method:
        results = process_images(method, img_paths)
        print("\nProcessing complete. Summary:")
        for img_path, result, csv_file in results:
            print(f"\nImage: {img_path}")
            print(f"OCR Result: {result[:100]}...")  # Print first 100 characters
            print(f"Full results saved to: {csv_file}")
    else:
        print("Invalid choice. Please run the script again and select a valid option.")

Öncelikle, internet bağlantısı olan bir bilgisayarda şu adımları izleyin:
a) Tesseract OCR'ı indirin:

Windows için: https://github.com/UB-Mannheim/tesseract/wiki
Linux için: Kaynak kodunu indirip derleyebilirsiniz veya dağıtımınıza özel paket yöneticisini kullanabilirsiniz.

b) Python paketlerini indirin:
Copypip download paddleocr paddlepaddle easyocr opencv-python-headless pandas pytesseract
Bu komut, tüm bağımlılıklarıyla birlikte paketleri indirecektir.
İndirilen tüm dosyaları (Tesseract kurulum dosyası ve .whl uzantılı Python paketleri) kapalı devre ortamınıza aktarın.
Kapalı devre ortamında:
a) Tesseract OCR'ı kurun.
b) Python paketlerini şu şekilde yükleyin:
Copypip install --no-index --find-links /path/to/downloaded/packages paddleocr paddlepaddle easyocr opencv-python-headless pandas pytesseract


Şimdi, güncellenmiş kodu görelim:
OCR Script for Closed Network EnvironmentClick to open code
Bu güncellenmiş kod, kapalı devre bir ortamda çalışmak üzere tasarlanmıştır. Önemli noktalar:

Kodun başında, manuel olarak yüklenmesi gereken kütüphanelerin bir listesi bulunmaktadır.
Otomatik yükleme komutları kaldırılmıştır, çünkü bunlar kapalı bir ağda çalışmayacaktır.
Kodun geri kalanı aynı şekilde çalışmaya devam edecektir.

Bu kodu kapalı devre ortamınızda kullanmak için:

Yukarıda belirtilen adımları izleyerek gerekli tüm paketleri ve Tesseract OCR'ı önceden indirin ve yükleyin.
Bu Python scriptini kapalı devre ortamınıza aktarın.
Scripti çalıştırın:
Copypython script_name.py

İstendiğinde, işlemek istediğiniz görüntü dosyalarının yollarını girin ve OCR yöntemini seçin.

Bu yaklaşım, kapalı devre bir ortamda çalışmanıza olanak tanır. Ancak, bazı paketlerin (özellikle easyocr ve paddleocr) ek model dosyaları indirebileceğini unutmayın. Bu dosyaları da önceden indirip uygun konumlara yerleştirmeniz gerekebilir.