In [1]:
!pip install -U git+https://github.com/faustomorales/keras-ocr.git
!pip install imgaug
!pip install -U opencv-python

!gdown --id 1BEOWSoIbzXgPnxtaKdCuP9qxYuOgCwSd
!gdown --id 12JOXOOHnK8LIbzwxKHTZsLVyZVAGs2kE
!gdown --id 1dhn30gRDRKWiF24Gi8dd4bhEMxvVXi3_
!gdown --id 1Yq2LgCgstCSD9S1Hvz2y5EviucbIOX9o
%tensorflow_version 2.x

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/faustomorales/keras-ocr.git
  Cloning https://github.com/faustomorales/keras-ocr.git to /tmp/pip-req-build-qiimra23
  Running command git clone -q https://github.com/faustomorales/keras-ocr.git /tmp/pip-req-build-qiimra23
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting validators
  Downloading validators-0.19.0.tar.gz (30 kB)
Collecting pyclipper
  Downloading pyclipper-1.3.0.post2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (603 kB)
[K     |████████████████████████████████| 603 kB 55.6 MB/s 
Collecting fonttools
  Downloading fonttools-4.33.3-py3-none-any.whl (930 kB)
[K     |████████████████████████████████| 930 kB 52.9 MB/s 
[?25hCollecting essential_generators
  Downloading essential_generators-1.0-py3-none-any.whl 

In [2]:
import os
import math
import imgaug
import numpy as np
import matplotlib.pyplot as plt
import sklearn.model_selection
import tensorflow as tf
import zipfile
import typing

import keras_ocr

dir = "."

In [13]:
def extract_train_dataset(main_dir="."):
  if not os.path.isdir(main_dir):
    os.mkdir(main_dir)
  train_image_dir = os.path.join(main_dir, "train_images")
  if not os.path.isdir(train_image_dir):
    os.mkdir(train_image_dir)
  training_zip_images_path = os.path.join(".", "Challenge2_Training_Task12_Images.zip")
  if len(os.listdir(train_image_dir)) != 229:
    with zipfile.ZipFile(training_zip_images_path, 'r') as zip_ref:
      zip_ref.extractall(train_image_dir)

  train_gt_dir = os.path.join(main_dir, "train_gt")
  if not os.path.isdir(train_gt_dir):
    os.mkdir(train_gt_dir)
  training_zip_gt_path = os.path.join(".", "Challenge2_Training_Task1_GT.zip")
  if len(os.listdir(train_gt_dir)) != 229:
      with zipfile.ZipFile(training_zip_gt_path) as zfile:
          zfile.extractall(train_gt_dir)

In [14]:
def extract_test_dataset(main_dir="."):
  if not os.path.isdir(main_dir):
    os.mkdir(main_dir)
  test_image_dir = os.path.join(main_dir, "test_images")
  if not os.path.isdir(test_image_dir):
    os.mkdir(test_image_dir)
  test_zip_images_path = os.path.join(".", "Challenge2_Test_Task12_Images.zip")
  if len(os.listdir(test_image_dir)) != 233:
    with zipfile.ZipFile(test_zip_images_path, 'r') as zip_ref:
      zip_ref.extractall(test_image_dir)

  test_gt_dir = os.path.join(main_dir, "test_gt")
  if not os.path.isdir(test_gt_dir):
    os.mkdir(test_gt_dir)
  test_zip_gt_path = os.path.join(".", "Challenge2_Test_Task1_GT.zip")
  if len(os.listdir(test_gt_dir)) != 233:
      with zipfile.ZipFile(test_zip_gt_path) as zfile:
          zfile.extractall(test_gt_dir)

In [115]:
def preprocess_train_dataset(dir="."):
  main_dir = os.path.join(dir, "icdar2013")
  extract_train_dataset(main_dir)
  dataset = []

  train_gt_dir = os.path.join(main_dir, "train_gt")
  train_image_dir = os.path.join(main_dir, "train_images")

  for filename in os.listdir(train_gt_dir):
    file_path = os.path.join(train_gt_dir, filename)
    image_id = filename.split("_")[1].split(".")[0]
    image_path = os.path.join(train_image_dir, image_id + ".jpg")
    lines = []
    with open(file_path, "r", encoding="utf8") as f:
      current_line: typing.List[typing.Tuple[np.ndarray, str]] = []
      for row in f.read().split("\n"):
        if row == "":
          lines.append(current_line)
          current_line = []
        else:
          line_row = row.split(" ")
          text = line_row[-1].strip("\"")
          x1, y1, x2, y2 = map(int, line_row[:4])
          current_line.append(
              (np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]]), text)
          )
      lines = [line for line in lines if line]
      dataset.append((image_path, lines, 1)) 

  return dataset

In [114]:
def preprocess_test_dataset(dir="."):
  main_dir = os.path.join(dir, "icdar2013")
  extract_test_dataset(main_dir)
  dataset = []

  test_gt_dir = os.path.join(main_dir, "test_gt")
  test_image_dir = os.path.join(main_dir, "test_images")

  for filename in os.listdir(test_gt_dir):
    file_path = os.path.join(test_gt_dir, filename)
    image_id = filename.split("_")[2].split(".")[0]
    image_path = os.path.join(test_image_dir, "img_" + image_id + ".jpg")
    lines = []
    with open(file_path, "r", encoding="utf8") as f:
      current_line: typing.List[typing.Tuple[np.ndarray, str]] = []
      for row in f.read().split("\n"):
        if row == "":
          lines.append(current_line)
          current_line = []
        else:
          line_row = row.split(", ")
          text = line_row[-1].strip("\"")
          x1, y1, x2, y2 = map(int, line_row[:4])
          current_line.append(
              (np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]]), text)
          )
      lines = [line for line in lines if line]
      dataset.append((image_path, lines, 1)) 

  return dataset

In [116]:
train_labels = preprocess_train_dataset(dir)
test_labels = preprocess_test_dataset(dir)

In [117]:
print(train_labels[0])

('./icdar2013/train_images/189.jpg', [[(array([[129,  74],
       [390,  74],
       [390, 114],
       [129, 114]]), 'WALLACE'), (array([[107, 588],
       [423, 588],
       [423, 629],
       [107, 629]]), 'MONUMENT')]], 1)


In [118]:
train_labels, validation_labels = sklearn.model_selection.train_test_split(train_labels, train_size=0.8, random_state=42)

augmenter = imgaug.augmenters.Sequential([
    imgaug.augmenters.Affine(
      scale=(1.0, 1.2),
      rotate=(-5, 5)
    ),
    imgaug.augmenters.GaussianBlur(sigma=(0, 0.5)),
    imgaug.augmenters.Multiply((0.8, 1.2), per_channel=0.2)
])

generator_kwargs = {'width': 640, 'height': 640}
training_image_generator = keras_ocr.datasets.get_detector_image_generator(
    labels=train_labels,
    augmenter=augmenter,
    **generator_kwargs
)
validation_image_generator = keras_ocr.datasets.get_detector_image_generator(
    labels=validation_labels,
    **generator_kwargs
)

test_image_generator = keras_ocr.datasets.get_detector_image_generator(
    labels=test_labels,
    **generator_kwargs
)

In [124]:
detector = keras_ocr.detection.Detector()

Looking for /root/.keras-ocr/craft_mlt_25k.h5


In [120]:
batch_size = 1
training_generator, validation_generator, test_generator = [
    detector.get_batch_generator(
        image_generator=image_generator, batch_size=batch_size
    ) for image_generator in
    [training_image_generator, validation_image_generator, test_image_generator]
]

In [125]:
detector.model.fit(
    x=training_generator,
    steps_per_epoch=math.ceil(len(train_labels) / batch_size),
    epochs=1000,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(restore_best_weights=True, patience=5),
        tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(dir, 'detector_icdar2013.h5'))
    ],
    validation_data=validation_generator,
    validation_steps=math.ceil(len(validation_labels) / batch_size),
    verbose=1
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000


<keras.callbacks.History at 0x7fc82eb90290>

In [126]:
from google.colab import files
files.download('/content/detector_icdar2013.h5') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>