<a href="https://colab.research.google.com/github/HanifRidal/ocr_comparation/blob/main/compare_ocr_done.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pytesseract
!apt-get install tesseract-ocr

In [None]:
!pip install easyocr

In [None]:
!pip install "tensorflow==2.15.1"
!pip install keras_ocr

In [None]:
!pip install paddleocr paddlepaddle

In [None]:
# !pip install --upgrade tensorflow
# !pip install --upgrade keras_ocr

In [None]:
import cv2
import pytesseract
import easyocr
import keras_ocr
from paddleocr import PaddleOCR
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import time
import os
import re
import psutil
import gc

In [None]:
# Inisialisasi OCR Models
tesseract_config = '--oem 3 --psm 6'
reader_easyocr = easyocr.Reader(['en', 'id'])
pipeline_keras = keras_ocr.pipeline.Pipeline()
ocr_paddle = PaddleOCR(use_angle_cls=True, lang='en')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
from PIL import Image

dataset_path = '/content/drive/MyDrive/Skripsi/dataset/'

image_files = sorted([f for f in os.listdir(dataset_path) if os.path.isfile(os.path.join(dataset_path, f)) and f.endswith(('.png', '.jpg', '.jpeg'))]) # Sort to maintain order

# Set up a grid display
num_images = len(image_files)
cols = 5  # Number of columns in grid
rows = (num_images // cols) + 1  # Calculate rows dynamically

plt.figure(figsize=(15, 3 * rows))

for i, img_file in enumerate(image_files):
    img_path = os.path.join(dataset_path, img_file)
    img = Image.open(img_path)

    plt.subplot(rows, cols, i + 1)
    plt.imshow(img)
    plt.axis("off")
    plt.title(img_file[:15])  # Show partial filename

plt.tight_layout()
plt.show()

In [None]:
# Direktori gambar
# image_directory = "/content/images"
image_directory = '/content/drive/MyDrive/Skripsi/dataset/'
image_files = [os.path.join(image_directory, f) for f in os.listdir(image_directory) if f.endswith(('.png', '.jpg', '.jpeg'))]

In [None]:
# Ground truth untuk perhitungan akurasi
# ground_truth_dict = {
#     '/content/images/gambar1.png': """Once upon a time, There were a girl named Bawang Putih,
#     she lived With her step mother and her step sister named Bawang
#     Merah. Bawang Putih’s life was sad. Her step mother and her step
#     sister treated Bawang Putih badly and always asked her to do all the
#     household chores.

#     One morning, Bawang Putih was washing some clothes in a
#     river. Accidentlly, her mother’s clothes fell down to the rever. Finally,
#     she met an old woman. The old woman returned the clothes. She
#     also gave Bawang Putih a small pumpkin and a big pumpkin. At home,
#     Bawang Merah looked at the pumpkins, and she look a big pumpkin.
#     Bawang Putih opened the small pumpkin and found jewelleries inside
#     her pumpkin. Then, Bawang -

#     Finally both of them realized their mistakes. They apologized
#     to Bawang Putih and she forgave them.""",

#     '/content/images/gambar2.jpg': """membuat animasi
#     text writing""",

#     '/content/images/gambar3.jpg': """Best.
#     Summer.
#     Ever.""",
#     }

ground_truth_dict = {
    '/content/drive/MyDrive/Skripsi/dataset/gambar1.png': """Once upon a time, There were a girl named Bawang Putih,
    she lived With her step mother and her step sister named Bawang
    Merah. Bawang Putih’s life was sad. Her step mother and her step
    sister treated Bawang Putih badly and always asked her to do all the
    household chores.

    One morning, Bawang Putih was washing some clothes in a
    river. Accidentlly, her mother’s clothes fell down to the rever. Finally,
    she met an old woman. The old woman returned the clothes. She
    also gave Bawang Putih a small pumpkin and a big pumpkin. At home,
    Bawang Merah looked at the pumpkins, and she look a big pumpkin.
    Bawang Putih opened the small pumpkin and found jewelleries inside
    her pumpkin. Then, Bawang -

    Finally both of them realized their mistakes. They apologized
    to Bawang Putih and she forgave them.""",

    '/content/drive/MyDrive/Skripsi/dataset/gambar2.jpg': """membuat animasi
    text writing""",

    '/content/drive/MyDrive/Skripsi/dataset/gambar3.jpg': """Best.
    Summer.
    Ever.""",
    }

In [None]:
# Fungsi untuk menghitung Word Accuracy (WA)
def word_accuracy(extracted_text, ground_truth):
    extracted_words = re.findall(r'\w+', extracted_text.lower())
    ground_truth_words = re.findall(r'\w+', ground_truth.lower())
    correct_words = len(set(extracted_words) & set(ground_truth_words))
    total_words = len(ground_truth_words)
    return (correct_words / total_words * 100) if total_words > 0 else 0.0

# Fungsi untuk mengukur RAM usage
def get_ram_usage():
    return psutil.Process().memory_info().rss / (1024 * 1024)  # Dalam MB

# Fungsi untuk menggambar bounding box pada gambar hasil OCR
def draw_bounding_boxes(image, boxes):
    for box in boxes:
        x_min, y_min = map(int, box[0][0])
        x_max, y_max = map(int, box[0][2])
        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
    return image

# Fungsi OCR menggunakan Tesseract dengan preprocessing
def ocr_tesseract(image_path):
    img = cv2.imread(image_path)

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Preprocessing
    _, img_bin = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)

    start_time = time.time()
    extracted_text = pytesseract.image_to_string(img_bin, lang='eng+ind')

    h, w, _ = img.shape
    boxes = pytesseract.image_to_boxes(img_bin)
    for b in boxes.splitlines():
        b = b.split()
        x, y, w_b, h_b = int(b[1]), int(b[2]), int(b[3]), int(b[4])
        cv2.rectangle(img, (x, h - y), (w_b, h - h_b), (0, 255, 0), 2)

    elapsed_time = time.time() - start_time
    return extracted_text, elapsed_time, img

# Fungsi OCR menggunakan EasyOCR
def ocr_easyocr(image_path):
    img = cv2.imread(image_path)

    start_time = time.time()
    results = reader_easyocr.readtext(img)

    extracted_text = "\n".join([text for (_, text, _) in results])
    for (bbox, text, _) in results:
        cv2.rectangle(img,
                      tuple(map(int, bbox[0])),
                      tuple(map(int, bbox[2])),
                       (0, 255, 0), 2)

    elapsed_time = time.time() - start_time
    return extracted_text, elapsed_time, img

# Fungsi OCR menggunakan Keras-OCR
def ocr_keras(image_path):
    img = keras_ocr.tools.read(image_path)

    start_time = time.time()
    results = pipeline_keras.recognize([img])[0]

    extracted_text = "\n".join([text for text, _ in results])
    for (text, bbox) in results:
        cv2.rectangle(img,
                      tuple(map(int, bbox[0])),
                      tuple(map(int, bbox[2])),
                       (0, 255, 0), 2)

    elapsed_time = time.time() - start_time
    return extracted_text, elapsed_time, img

# Fungsi OCR menggunakan PaddleOCR
def ocr_paddleocr(image_path):
    img = cv2.imread(image_path)

    start_time = time.time()
    result = ocr_paddle.ocr(image_path, cls=True)

    extracted_text = "\n".join([word_info[1][0] for line in result for word_info in line])
    for line in result:
        for word_info in line:
            x_min, y_min = map(int, word_info[0][0])
            x_max, y_max = map(int, word_info[0][2])
            cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

    elapsed_time = time.time() - start_time
    return extracted_text, elapsed_time, img

In [None]:
# Menyimpan hasil
results_data = {
    "Method": [],
    "Execution Time (s)": [],
    "Word Accuracy (%)": [],
    "RAM Usage (MB)": []
}

results_list = []

In [None]:
# Proses semua gambar
for image_path in image_files:
    print(f"\nMemproses: {image_path}")
    ground_truth = ground_truth_dict.get(image_path, "")

    methods = {
        "Tesseract": ocr_tesseract,
        "EasyOCR": ocr_easyocr,
        "Keras-OCR": ocr_keras,
        "PaddleOCR": ocr_paddleocr
    }

    for method_name, method in methods.items():
        gc.collect()  # Bersihkan memory sebelum eksekusi
        ram_before = get_ram_usage()
        extracted_text, processing_time, processed_image = method(image_path)
        ram_after = get_ram_usage()
        wa = word_accuracy(extracted_text, ground_truth)

        # Simpan hasil dalam list untuk DataFrame
        results_list.append([method_name, image_path, processing_time, wa, ram_after - ram_before])

        results_data["Method"].append(method_name)

        # Menampilkan gambar asli & hasil OCR dengan bounding box
        fig, ax = plt.subplots(1, 2, figsize=(12, 5))
        ax[0].imshow(cv2.imread(image_path)[..., ::-1])
        ax[0].set_title("Gambar Asli")
        ax[0].axis("off")

        ax[1].imshow(processed_image[..., ::-1])
        ax[1].set_title(f"{method_name} - Hasil OCR")
        ax[1].axis("off")

        plt.show()

        results_data["Execution Time (s)"].append(processing_time)
        results_data["Word Accuracy (%)"].append(wa)
        results_data["RAM Usage (MB)"].append(ram_after - ram_before)

        # Menampilkan hasil gambar dengan bounding box
        # fig, ax = plt.subplots(1, 2, figsize=(12, 5))
        # ax[0].imshow(cv2.imread(image_path)[..., ::-1])
        # ax[0].set_title("Gambar Asli")
        # ax[0].axis("off")

        # ax[1].imshow(processed_image)
        # ax[1].set_title(f"Hasil OCR - {method_name}")
        # ax[1].axis("off")

        # Output hasil per metode
        print(f"🔹 {method_name}")
        print(f"   - Hasil Teks:\n{extracted_text[:200]}...")  # Potong teks untuk tampilan
        print(f"   - Waktu Eksekusi: {processing_time:.4f} detik")
        print(f"   - Word Accuracy: {wa:.2f}%")
        print(f"   - RAM Digunakan: {ram_after - ram_before:.2f} MB\n")

In [None]:
# Visualisasi hasil
plt.figure(figsize=(12, 5))

In [None]:
# Grafik waktu eksekusi
plt.subplot(1, 1, 1)
plt.bar(results_data["Method"], results_data["Execution Time (s)"], color=['red', 'blue', 'green', 'purple'])
plt.ylabel("Waktu Eksekusi (s)")
plt.title("Perbandingan Waktu Eksekusi OCR")

In [None]:
# Grafik akurasi
plt.subplot(2, 1, 2)
plt.bar(results_data["Method"], results_data["Word Accuracy (%)"], color=['red', 'blue', 'green', 'purple'])
plt.ylabel("Word Accuracy (%)")
plt.title("Perbandingan Akurasi OCR")

In [None]:
# Grafik penggunaan RAM
plt.subplot(3, 1, 3)
plt.bar(results_data["Method"], results_data["RAM Usage (MB)"], color=['red', 'blue', 'green', 'purple'])
plt.ylabel("Penggunaan RAM (MB)")
plt.title("Perbandingan Penggunaan RAM OCR")

plt.tight_layout()
plt.show()

In [None]:
# Visualisasi hasil
plt.figure(figsize=(12, 5))

plt.subplot(1, 3, 1)
plt.bar(results_data["Method"], results_data["Execution Time (s)"], color=['red', 'blue', 'green', 'purple'])
plt.ylabel("Waktu Eksekusi (s)")
plt.title("Perbandingan Waktu Eksekusi OCR")

plt.subplot(1, 3, 2)
plt.bar(results_data["Method"], results_data["Word Accuracy (%)"], color=['red', 'blue', 'green', 'purple'])
plt.ylabel("Word Accuracy (%)")
plt.title("Perbandingan Akurasi OCR")

plt.subplot(1, 3, 3)
plt.bar(results_data["Method"], results_data["RAM Usage (MB)"], color=['red', 'blue', 'green', 'purple'])
plt.ylabel("Penggunaan RAM (MB)")
plt.title("Perbandingan Penggunaan RAM OCR")

plt.tight_layout()
plt.show()

In [None]:
# Buat DataFrame dari hasil
df_results = pd.DataFrame(results_list, columns=["Method", "Image", "Execution Time (s)", "Word Accuracy (%)", "RAM Usage (MB)"])

In [None]:
# Tampilkan tabel hasil
print("\n📊 **Tabel Performa OCR**")
print(df_results)

In [None]:
# Heatmap untuk visualisasi perbandingan
plt.figure(figsize=(12, 6))
df_pivot = df_results.pivot(index="Method", columns="Image", values="Word Accuracy (%)")
sns.heatmap(df_pivot, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Heatmap Akurasi Word Accuracy (%) Per Metode OCR")
plt.show()

In [None]:
# Heatmap untuk Execution Time (s)
plt.figure(figsize=(12, 6))
df_pivot_time = df_results.pivot(index="Method", columns="Image", values="Execution Time (s)")
sns.heatmap(df_pivot_time, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Heatmap Execution Time (s) Per Metode OCR")
plt.show()

In [None]:

# Heatmap untuk RAM Usage (MB)
plt.figure(figsize=(12, 6))
df_pivot_ram = df_results.pivot(index="Method", columns="Image", values="RAM Usage (MB)")
sns.heatmap(df_pivot_ram, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Heatmap RAM Usage (MB) Per Metode OCR")
plt.show()