In [1]:
!pip install -U git+https://github.com/faustomorales/keras-ocr.git
!pip install imgaug
!pip install -U opencv-python
%tensorflow_version 2.x

!gdown --id 12WOvX4IDbmJUTDOdWbV3wWAc4lr5GGmO
!gdown --id 1xCeveu2CyYLNyTeXfsRRTABPwGRYpI82

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/faustomorales/keras-ocr.git
  Cloning https://github.com/faustomorales/keras-ocr.git to /tmp/pip-req-build-942k6k0i
  Running command git clone -q https://github.com/faustomorales/keras-ocr.git /tmp/pip-req-build-942k6k0i
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Downloading...
From: https://drive.google.com/uc?id=12WOvX4IDbmJUTDOdWbV3wWAc4lr5GGmO
To: /content/Challenge2_Training_Task3_Images_GT.zip
100% 83.9M/83.9M [00:00<00:00, 258MB/s]
Downloading...
From: https://drive.google.com/uc?id=1xCeveu2CyYLNyTeXfsRRTABPwGRYpI82
To: /content/Challen

In [2]:
import random
import string
import math
import itertools
import os
import zipfile
import typing

import numpy as np

import imgaug
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn.model_selection

import keras_ocr

dir = "."

In [3]:
def extract_train_dataset(main_dir="."):
  if not os.path.isdir(main_dir):
    os.mkdir(main_dir)
  train_image_dir = os.path.join(main_dir, "train_images")
  if not os.path.isdir(train_image_dir):
    os.mkdir(train_image_dir)
  training_zip_images_path = os.path.join(".", "Challenge2_Training_Task3_Images_GT.zip")
  if len(os.listdir(train_image_dir)) != 849:
    with zipfile.ZipFile(training_zip_images_path, 'r') as zip_ref:
      zip_ref.extractall(train_image_dir)

In [4]:
def extract_test_dataset(main_dir="."):
  if not os.path.isdir(main_dir):
    os.mkdir(main_dir)
  test_image_dir = os.path.join(main_dir, "test_images")
  if not os.path.isdir(test_image_dir):
    os.mkdir(test_image_dir)
  training_zip_images_path = os.path.join(".", "Challenge2_Test_Task3_Images_GT.zip")
  if len(os.listdir(test_image_dir)) != 1095:
    with zipfile.ZipFile(training_zip_images_path, 'r') as zip_ref:
      zip_ref.extractall(test_image_dir)

In [5]:
def preprocess_train_dataset(dir="."):
  main_dir = os.path.join(dir, "icdar2013")
  extract_train_dataset(main_dir)
  dataset = []

  train_image_dir = os.path.join(main_dir, "train_images")
  train_gt_path = os.path.join(train_image_dir, "gt.txt")

  dataset = []

  with open(train_gt_path, "r", encoding="utf8") as f:
    for line in f:
      image_id, text = line.split(".png,")
      text = text.strip("\"\n ").lower()
      image_path = os.path.join(train_image_dir, image_id + ".png")
      dataset.append((image_path, None, text))
      
  return dataset

In [6]:
def preprocess_test_dataset(dir="."):
  main_dir = os.path.join(dir, "icdar2013")
  extract_test_dataset(main_dir)
  dataset = []

  test_image_dir = os.path.join(main_dir, "test_images")
  test_gt_path = os.path.join(test_image_dir, "gt.txt")

  dataset = []

  with open(test_gt_path, "r", encoding="utf8") as f:
    for line in f:
      image_id, text = line.split(".png,")
      text = text.strip("\"\n ").lower()
      image_path = os.path.join(test_image_dir, image_id + ".png")
      dataset.append((image_path, None, text))
      
  return dataset

In [7]:
train_dataset = preprocess_train_dataset(dir)
test_dataset = preprocess_test_dataset(dir)
train_labels = [(filepath, box, word.lower()) for filepath, box, word in train_dataset]
test = [(filepath, box, word.lower()) for filepath, box, word in test_dataset]

train_labels_extend, test_labels = sklearn.model_selection.train_test_split(test, test_size=0.2, random_state=42)
train_labels.extend(train_labels_extend)

In [8]:
print(len(train_labels))
print(len(test_labels))

1724
219


In [9]:
alphabet = string.digits + string.ascii_letters + '!?., /\\"\'@()<>-'
recognizer_alphabet = ''.join(sorted(set(alphabet.lower())))

In [10]:
BUILD_PARAMS = {
    "height": 31,
    "width": 200,
    "color": False,
    "filters": (64, 128, 256, 256, 512, 512, 512),
    "rnn_units": (128, 128),
    "dropout": 0.25,
    "rnn_steps_to_discard": 2,
    "pool_size": 2,
    "stn": True,
}

In [11]:
detector = keras_ocr.detection.Detector(weights='clovaai_general')
recognizer = keras_ocr.recognition.Recognizer(
    alphabet=recognizer_alphabet,
    weights='kurapan',
    build_params=BUILD_PARAMS
)
recognizer.compile(metrics=["acc"])
for layer in recognizer.backbone.layers:
    layer.trainable = False

Looking for /root/.keras-ocr/craft_mlt_25k.h5
Provided alphabet does not match pretrained alphabet. Using backbone weights only.
Looking for /root/.keras-ocr/crnn_kurapan_notop.h5


In [12]:
def make_generators(train_labels, validation_labels, augmenter, batch_size):
  (training_image_gen, training_steps), (validation_image_gen, validation_steps) = [
      (
          keras_ocr.datasets.get_recognizer_image_generator(
              labels=labels,
              height=recognizer.model.input_shape[1],
              width=recognizer.model.input_shape[2],
              alphabet=recognizer.alphabet,
              augmenter=augmenter
          ),
          len(labels) // batch_size
      ) for labels, augmenter in [(train_labels, augmenter), (validation_labels, None)]     
  ]
  training_gen, validation_gen = [
      recognizer.get_batch_generator(
          image_generator=image_generator,
          batch_size=batch_size
      )
      for image_generator in [training_image_gen, validation_image_gen]
  ]
  return training_gen, validation_gen, training_steps, validation_steps

In [13]:
def train_recognizer_kfold(training_labels, augmenter, batch_size, callbacks=None, k=10, epochs=100):
  all_scores = []
  num_val_samples = len(training_labels) // k
  for i in range(k):
    print("Processing fold #", i+1)
    val = training_labels[i * num_val_samples: (i + 1) * num_val_samples]
    train = training_labels[:i * num_val_samples]
    train.extend(
        training_labels[(i + 1) * num_val_samples:]
    )
    training_gen, validation_gen, training_steps, validation_steps = make_generators(
        train, val, augmenter, batch_size
    )
    history = recognizer.training_model.fit(
          x=training_gen,
          steps_per_epoch=training_steps,
          callbacks=callbacks,
          epochs=epochs,
          verbose=1
    )
    val_loss, val_acc = recognizer.training_model.evaluate(
        x=validation_gen,
        steps=validation_steps,
        verbose=1
    )
    all_scores.append(val_loss)
  val_loss_mean = np.mean(all_scores)
  return val_loss_mean

In [14]:
batch_size = 8
augmenter = imgaug.augmenters.Sequential([
    imgaug.augmenters.GammaContrast(gamma=(0.25, 3.0)),
])
val_loss = train_recognizer_kfold(
    train_labels,
    augmenter,
    batch_size,
    k=10,
    epochs=10
)

Processing fold # 1
17 / 1552 instances have illegal characters.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Processing fold # 2
15 / 1552 instances have illegal characters.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
2 / 172 instances have illegal characters.
Processing fold # 3
17 / 1552 instances have illegal characters.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Processing fold # 4
14 / 1552 instances have illegal characters.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
3 / 172 instances have illegal characters.
Processing fold # 5
14 / 1552 instances have illegal characters.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
3 / 172 instances have illegal cha

In [15]:
recognizer.training_model.save("recognizer_icdar2013.h5")

In [17]:
print(val_loss)

0.8834629029035568


In [35]:
recognizer.model.load_weights("./recognizer_icdar2013.h5")
recognizer.prediction_model.summary()

Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 31, 200, 1)  0           []                               
                                ]                                                                 
                                                                                                  
 permute (Permute)              (None, 200, 31, 1)   0           ['input_2[0][0]']                
                                                                                                  
 lambda (Lambda)                (None, 200, 31, 1)   0           ['permute[0][0]']                
                                                                                                  
 conv_1 (Conv2D)                (None, 200, 31, 64)  640         ['lambda[0][0]']           

In [16]:
from google.colab import files
files.download('/content/recognizer_icdar2013.h5') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>