# **Download Data**

In [None]:
import os
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d naderabdalghani/iam-handwritten-forms-dataset


In [None]:
!unzip iam-handwritten-forms-dataset.zip -d iam_dataset


In [None]:
!pip install opencv-python-headless

In [None]:
import glob
import cv2
import matplotlib.pyplot as plt
import random

# **Preprocessing Data**

images split as digital part and handwriting part

In [None]:
images = sorted(glob.glob('iam_dataset/data/*/*.png'))

In [None]:
len(images)

In [None]:
def crop_and_split(img):
    if isinstance(img, str):
        img = cv2.imread(img)
        assert img is not None

    img = img[:, 200:]

    prt = img[:670]
    hand = img[670:2600]
    return hand, prt

img = images[0]
hand, prt = crop_and_split(img)
_, (ax1, ax2) = plt.subplots(1, 2)
ax1.imshow(hand)
ax2.imshow(prt)
plt.show()
plt.close()

In [None]:
def save_cropped_images(original_image_path, output_hand_dir, output_prt_dir):

    base_name = os.path.splitext(os.path.basename(original_image_path))[0]
    hand_img_array, prt_img_array = crop_and_split(original_image_path)

    hand_output_path = os.path.join(output_hand_dir, f"{base_name}_hand.png")
    prt_output_path = os.path.join(output_prt_dir, f"{base_name}_prt.png")
    cv2.imwrite(hand_output_path, hand_img_array)
    cv2.imwrite(prt_output_path, prt_img_array)


In [None]:
import cv2
import matplotlib.pyplot as plt

handw ="/content/handw"
prt = "/content/prt"


'''for img_path in images:
    save_cropped_images(img_path,handw,prt)'''

save_cropped_images(images[102],handw,prt)

# **Transformers**

In [None]:
!pip install -U transformers

In [None]:
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import requests

In [None]:
model_name = "microsoft/trocr-base-handwritten"

processor = TrOCRProcessor.from_pretrained(model_name)
model = VisionEncoderDecoderModel.from_pretrained(model_name)

# **Cropped Lines**

In [None]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

def trim_line_edges(line_img, pad=2, smooth_window=5, rel_thresh=0.02, min_width=5, verbose=False):

    if isinstance(line_img, Image.Image):
        arr = np.array(line_img)
    else:
        arr = line_img

    if arr.dtype != np.uint8:
        if np.issubdtype(arr.dtype, np.floating):
            arr = (np.clip(arr, 0.0, 1.0) * 255).astype(np.uint8)
        else:
            arr = arr.astype(np.uint8)

    # 3) color -> gray
    if arr.ndim == 3:
        gray = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)

        if gray.mean() > 250 and arr.shape[2] >= 3:
            gray = cv2.cvtColor(arr, cv2.COLOR_RGB2GRAY)
    else:
        gray = arr.copy()

    h, w = gray.shape

    try:
        _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    except Exception:
        thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                       cv2.THRESH_BINARY_INV, 11, 2)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,1))
    clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

    col_sum = np.sum(clean, axis=0).astype(np.float32)

    if smooth_window and smooth_window > 1:
        k = np.ones(smooth_window, dtype=np.float32) / smooth_window
        col_sum_smooth = np.convolve(col_sum, k, mode='same')
    else:
        col_sum_smooth = col_sum

    max_val = col_sum_smooth.max() if col_sum_smooth.size else 0.0

    if max_val > 0:
        thresh_val = max(1.0, rel_thresh * max_val)
        mask = col_sum_smooth >= thresh_val
    else:
        mask = col_sum_smooth > 0

    if verbose:
        print("trim_line_edges: width", w, "max_col_sum", max_val, "thresh_val", (thresh_val if max_val>0 else 0))

    if not np.any(mask):
        coords = cv2.findNonZero(clean)
        if coords is not None:
            bx, by, bw, bh = cv2.boundingRect(coords)
            left = bx
            right = bx + bw - 1
        else:
            if verbose: print("No text detected in line -> returning original")
            return gray
    else:
        left = int(np.argmax(mask))
        right = int(len(mask) - 1 - np.argmax(mask[::-1]))

    left = max(0, left - pad)
    right = min(w - 1, right + pad)

    if right - left + 1 < min_width:
        if verbose: print("Cropped width too small -> returning original")
        return gray

    cropped = gray[:, left:right+1]

    if verbose:
        print("Cropped from", (0, w-1), "to", (left, right), "-> new width", cropped.shape[1])
        plt.figure(figsize=(12,3))
        plt.subplot(1,3,1); plt.imshow(gray, cmap='gray'); plt.title('orig gray'); plt.axis('off')
        plt.subplot(1,3,2); plt.imshow(clean, cmap='gray'); plt.title('thresh cleaned'); plt.axis('off')
        plt.subplot(1,3,3); plt.imshow(cropped, cmap='gray'); plt.title('cropped'); plt.axis('off')
        plt.show()

    return cropped


In [None]:
def segment_lines(img, debug=False,
                  min_line_height=8,
                  min_gap=8,
                  smooth_window=5,
                  pad_horiz=2,
                  pad_vert=2):

    if len(img.shape) == 3:
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        gray = img.copy()

    h_img, w_img = gray.shape

    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    hist = np.sum(thresh, axis=1)

    hist_mask = hist > 0

    starts = []
    ends = []
    in_line = False
    for i, v in enumerate(hist_mask):
        if v and not in_line:
            in_line = True
            start = i
        if (not v) and in_line:
            in_line = False
            end = i
            if end - start >= min_line_height:
                starts.append(start)
                ends.append(end)
    if in_line:
        end = len(hist_mask)
        if end - start >= min_line_height:
            starts.append(start); ends.append(end)

    merged = []
    if starts:
        cur_s, cur_e = starts[0], ends[0]
        for s, e in zip(starts[1:], ends[1:]):
            if s - cur_e <= min_gap:
                cur_e = e
            else:
                merged.append((cur_s, cur_e))
                cur_s, cur_e = s, e
        merged.append((cur_s, cur_e))
    else:
        merged = []

    line_images = []
    for (y1, y2) in merged:
        yy1 = max(0, y1 - pad_vert)
        yy2 = min(h_img, y2 + pad_vert)
        line = gray[yy1:yy2, :]
        thresh_line = thresh[yy1:yy2, :]

        col_sum = np.sum(thresh_line, axis=0).astype(np.float32)

        if smooth_window > 1:
            k = np.ones(smooth_window) / smooth_window
            col_sum_smooth = np.convolve(col_sum, k, mode='same')
        else:
            col_sum_smooth = col_sum

        eps_thresh = 1
        thresh_mask = col_sum_smooth > eps_thresh

        tight = trim_line_edges(line, pad=2, smooth_window=7, rel_thresh=0.02, min_width=5, verbose=False)

        line_images.append(tight)

    if debug:
        fig, axs = plt.subplots(2, 1, figsize=(12, 8))
        axs[0].imshow(gray, cmap='gray')
        for (y1, y2) in merged:
            axs[0].axhline(y=y1, color='r')
            axs[0].axhline(y=y2, color='r')
        axs[0].set_title("Line (red)")

        axs[1].plot(hist, label='horiz_proj')
        axs[1].set_title("Horizontal Projection (hist)")
        axs[1].legend()
        plt.show()

        for i, li in enumerate(line_images):
            plt.figure(figsize=(10,2))
            plt.imshow(li, cmap='gray')
            plt.title(f"Line {i} (shape={li.shape})")
            plt.axis('off')
            plt.show()

    return line_images


In [None]:
def detect_lines(img_path):
  img = cv2.imread(img_path)
  lines = segment_lines(img, debug=True)
  print(f"{len(lines)} Lines detected.")
  return lines

In [None]:
import io
decoded = []

def decode_image(img):
    img = cv2.imread(img)
    lines = segment_lines(img, debug=False)
    print(f"{len(lines)} Lines detected.")
    for i in lines:
      pil_img = Image.fromarray(i)
      buffer = io.BytesIO()
      pil_img.save(buffer, format="PNG")
      buffer.seek(0)
      pil_png = Image.open(buffer).convert("RGB")
      pixel_values = processor(images=pil_png, return_tensors="pt").pixel_values
      generated_ids = model.generate(pixel_values)
      generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
      decoded.append(generated_text)
    return decoded


In [None]:
detect_lines("/content/handw/a02-017_hand.png")

In [None]:
!pip install pytesseract

In [None]:
import pytesseract
from PIL import Image

def ocr(img_path):
  image = Image.open(img_path)
  text = pytesseract.image_to_string(image, lang="eng")
  return text

In [None]:
from pathlib import Path

handw_folder_path = Path("/content/handw")
prt_folder_path = Path("/content/prt")

for handw_file_path, prt_file_path in zip(handw_folder_path.iterdir(), prt_folder_path.iterdir()):

    decode_image(handw_file_path)

    pred = ""
    for i in decoded:
      pred = pred + i

    print(pred)
    decoded.clear()

    ocr_result = ocr(prt_file_path)

    print(ocr_result)

# **Levenshtein Score**

In [None]:
!pip install python-Levenshtein

In [None]:
import Levenshtein

predicted = pred
ground_truth = ocr_result

distance = Levenshtein.distance(predicted, ground_truth)
similarity = Levenshtein.ratio(predicted, ground_truth)

print("Levenshtein distance:", distance)
print("Similarity:", similarity)