<a href="https://colab.research.google.com/github/GruAna/VU/blob/master/m_keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Keras-OCR

## Packages

These two versions of keras-ocr and matplotlib works together on Google Colab

In [None]:
!pip install keras-ocr==0.8.9
!pip install matplotlib==3.3.0
!pip install imgaug==0.2.6      # because of incompatibility warning



Mount google drive.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import matplotlib.pyplot as plt
import os
import cv2 as cv
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd

from tqdm import tqdm

import keras_ocr

In [None]:
!cp drive/MyDrive/Colab_Notebooks/VU/utils.py .
from utils import *

## Setup

In [None]:
# whether images are to be resized (skrinking)

resize = False
width = 300

## Dataset loading

**CTW1500 dataset**

Get images. Update location of images **manually**.

In [None]:
# path to image directory, get full path to all files
imgs_dir = '/content/drive/MyDrive/Colab_Notebooks/VU/FewImages/images/'
(_, _, filenames) = next(os.walk(imgs_dir))
filenames.sort()
list_img_paths = [os.path.join(imgs_dir, file) for file in filenames]
n_imgs = len(list_img_paths)

In [None]:
# load images
original_images = [(cv.imread(file)) for file in list_img_paths]
                        
# shrink images
if resize:
    images = shrink_all(original_images, width)
else:
    images = original_images.copy()

Get paths to files with labels. Update location **manually**.

In [None]:
labels_dir = '/content/drive/MyDrive/Colab_Notebooks/VU/FewImages/labelsxml/'
(_, _, xml_files) = next(os.walk(labels_dir))
xml_files.sort()
list_xml_paths = [os.path.join(labels_dir, file) for file in xml_files]

Get ground truths from all xml files

In [None]:
from google.colab.patches import cv2_imshow

ground_truth = []

# if images where resized we need to resize also the coordinates in gt
if resize:
    for i, file in enumerate(list_xml_paths):
        ratio = width / original_images[i].shape[1]
        if ratio > 1:
            ratio = 1
        ground_truth.append(get_labels_xml(file, scaling_ratio=ratio))
else:
    for i, file in enumerate(list_xml_paths):
        ground_truth.append(get_labels_xml(file))

# ground_truth is in the is a list of tuples, where first is the gt word 
# and second is an array of top left and bottom right coordinates

## Prediction

Run OCR method.

In [None]:
# keras-ocr will automatically download pretrained weights for the detector and recognizer.
pipeline = keras_ocr.pipeline.Pipeline()

Looking for /root/.keras-ocr/craft_mlt_25k.h5
Downloading /root/.keras-ocr/craft_mlt_25k.h5
Looking for /root/.keras-ocr/crnn_kurapan.h5
Downloading /root/.keras-ocr/crnn_kurapan.h5


Adjust batch size based on your GPU.

In [None]:
predictions = []

# adjust batch size (step)
# step = 1 # for large images (at least one has one dimension greater than 800px)
step = 1                                 # batch size
number_of_batches = n_imgs // step   # how many times does a batch of given step size fits to list_img_paths (based on length) (integer division)
for i in tqdm(range(number_of_batches)):
    if (i+1)*step+1 < step*number_of_batches:
        predictions += (pipeline.recognize(images[i*step : (i+1)*step]))
    # else is for last, possibly incomplete, batch
    else:
        predictions += (pipeline.recognize(images[i*step : ]))
    # each list of predictions in prediction_groups is a list of (word, box) tuples.


  0%|          | 0/20 [00:00<?, ?it/s]


ValueError: ignored

In [None]:
# # Plot the predictions
# fig, axs = plt.subplots(nrows=len(images), figsize=(200, 200))
# for ax, image, predicts in zip(axs, images, predictions):
#     keras_ocr.tools.drawAnnotations(image=image, predictions=predicts, ax=ax)

## Results postprocessing

Calculate bounding rectangles for detected words in an image.

For all images.

Replace polygon coordinates by these two rectangle coordinates.

In [None]:
# gather non empty predictions in a list of tuples (word, bounding_box_coordinates)
# predicted contains all images, each image can have multiple recognized words
# each image contains tuples in mentioned format

predicted = []
for i in range(n_imgs):
    results = []
    for text, box in predictions[i]:
        if len(text) > 0 and not text.isspace():
            results.append((text, bounding_rectangle(box)))
    predicted.append(results)

Compare bounding boxes of predicition and ground truth.

Count Intersection over Union (IoU) metric for bounding boxes. Store for all images in a list `iou_images`.

Count Character Error Rate (CER) metric for characters in words. Store for all images in a list `cer_images`.

In [None]:
iou_images = []
cer_images = []

# loop through images:
for i in range(n_imgs)):
    # separate list on columns (iterate through tuples in the list)
    predicted_cols = list(zip(*predicted[i]))
    ground_truth_cols = list(zip(*ground_truth[i]))
    # take only coordinate arrays from list for each images
    pred_boxes = predicted_cols[1]
    gt_boxes = ground_truth_cols[1]
    iou_from_image = iou_image(pred_boxes, gt_boxes)

    iou_text_regions = group_text(iou_from_image)

    # take only labels for each image
    pred_labels = predicted_cols[0]
    gt_labels = ground_truth_cols[0]

    # compare corresponding labels
    # comparision is a list of all text regions on one image
    comparision = []
    for gt_ind, observation in enumerate(iou_text_regions):
        pred_ind = observation[1]
        predicted_text = " ".join([pred_labels[i] for i in pred_ind])
        gt_pred_text = (gt_labels[gt_ind], predicted_text)
        
        # comparision for one text region (on one image)
        comparision.append((compare_text_cer(gt_pred_text)))

    iou_images.append((iou_text_regions))
    cer_images.append((comparision))

### Metrics
Metrics for each image (average of values of all regions in one image).

IoU (detection) `iou_in_image`

CER (recognition) `cer_in_image`

In [None]:
iou_in_image = []
cer_in_image = []

for i in range(n_imgs):
    mean_in_regions = [average(list(zip(*cer_images[i][j]))[2]) for j in range(len(cer_images[i]))]
    cer_in_image.append(average(mean_in_regions))
    iou_in_image.append(average(list(zip(*iou_images[i]))[0]))

## Overall results for dataset

In [None]:
df_results = pd.DataFrame(list(zip(filenames, iou_in_image, cer_in_image)), columns =['Filename', 'IoU', 'CER'])
mean_iou = round(df_results['IoU'].mean() * 100, 1)
mean_cer = round((1 - df_results['CER'].mean()) * 100, 1)
print(f"mean IoU accuracy = {mean_iou}%, mean CER accuracy = {mean_cer}%")

df_results

### Save results

In [None]:
# set and create output directory if it doesn't exist

output_dir = 'results'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

Specify unique filename and specification.

Specification is an array of first used method, second some useful infromation.

In [None]:
# SELECT FROM OR CREATE OWN, COMMENT UNUSED:

# basic keras-OCR (original image size, case insensitive, only alphanumeric)
file_name = "kerasOCR_basic"
specifications = ["keras-OCR", "original image width, case insensitive, only alphanumeric"]

# basic keras-OCR (300px image width, case insensitive, only alphanumeric)
# file_name = "kerasOcr_smallimgs"
# specifications = ["keras-OCR", "300px image width, case insensitive, only alphanumeric"]

In [None]:
# create file with results (specify method and other parameters)

with open('%s/result_%s.txt' % (output_dir, file_name), 'w') as output_file:
    output_file.write(": ".join(str(text) for text in specifications))
    output_file.write("\n"+f"iou = {mean_iou}")
    output_file.write("\n"+f"cer = {mean_cer}")

Visualize bounding rectangles and corresponing words.

In [None]:
# all images / one sample image
# for i in range(len(images)):
i = 0

im = plot_results(images[i], ground_truth[i], predicted[i])  
im.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)

im.savefig('%s/result_%s.png' % (output_dir, file_name))
im.show()
