In [2]:
import h5py
from matplotlib import pyplot as plt
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sn
import pandas as pd

import math
import random

%matplotlib inline

# Notify when finished training
%load_ext jupyternotify


print("Test")

<IPython.core.display.Javascript object>

Test


# Load constants

In [17]:
FONTS = ['Skylark', 'Ubuntu Mono', 'Sweet Puppy']
# Pre-calculated average width, height of all cropped train data
AVG_CHAR_WIDTH = 28
AVG_CHAR_HEIGHT = 49

train_filename = "font_recognition_train_set/SynthText.h5"
val_filename = "validation_set/SynthText_val.h5"

# Load model

In [3]:

model = keras.models.load_model("saved_model.h5")

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_16 (Conv2D)           (None, 41, 20, 128)       10496     
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 20, 10, 128)       0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 18, 8, 256)        295168    
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 9, 4, 256)         0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 7, 2, 512)         1180160   
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 3, 1, 512)         0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 1536)             

# func: Create x,y sets from h5 file.

In [22]:
def populate(filename, X, Y):
    """
    filename - h5 file to read from
    X - array to populate
    Y - array to populate
    """
    
    # Read from db
    db = h5py.File(filename, "r")
    im_names = list(db["data"].keys())
    num_of_images = len(im_names)
    print(f"Number of images: {num_of_images}")
    
    for img_name in im_names:
        res = extract_data(db, img_name)
        for word in res["words"]:
            for char in word["chars"]:
                char_font = char["font"]
                char_crop = char["crop"]

                # There are some images with defect bounding boxes (image: hubble_22.jpg)
                if char_crop.shape[0] == 0 or char_crop.shape[1] == 0:
                    word_str = word["word"]
                    char_str = char["char"]
                    print(f"Invalid crop at image: {img_name}, word: {word_str}, char: {char_str}")
                else:
                    append_to_set (X, Y, char_crop, char_font, noisy=False)

# func: Crop the word with perfect angle - affine transformation

Tutorial that helped me:

https://docs.opencv.org/3.4/d4/d61/tutorial_warp_affine.html

In [12]:
def crop_affine(img, bb):
    """
    Crop image using affine transformation, around bounding box. Returns cropped image.
    """
    img_copy = img.copy()
    width = img_copy.shape[1]
    height = img_copy.shape[0]
        
    point1 = (bb[0][0], bb[1][0]) # Top-left
    point2 = (bb[0][1], bb[1][1]) # Top-right
    point3 = (bb[0][2], bb[1][2]) # Bottom-Right
    point4 = (bb[0][3], bb[1][3]) # Bottom-Left
    
    # Euclidian distance
    bb_width = int(np.linalg.norm(np.array(point1) - np.array(point2)))
    bb_height = int(np.linalg.norm(np.array(point1) - np.array(point3)))

    # Mapping srcPoints (list of points of size 3) to dstPoints (list of points of size 3)
    srcTri = np.array( [point1, point2, point4] ).astype(np.float32)
    dstTri = np.array( [[0, 0], [bb_width, 0], [0, bb_height]] ).astype(np.float32)
    
    # Apply transformation
    warp_mat = cv2.getAffineTransform(srcTri, dstTri)
    warp_dst = cv2.warpAffine(img_copy, warp_mat, (width, height))
    
    # Crop the 'warped' image
    crop = warp_dst[0:bb_height, 0:bb_width]
    
    return crop

# func: Extract data from image name return json

In [13]:
def extract_data(db, img_name:str):
    """
    Process the image and returned processed result.
    Parameter db is h5 database read from file.
    Return a json in the following structure (as an example):
    {
        "img": <ndarray>,
        "name": "test.png",
        "words": [
            {
                "word": "the",
                "font": "Ubuntu Mono",
                "chars": [
                    {
                        "char": "t",
                        "font": "Ubuntu Mono",
                        "crop": <ndarray>,
                        "bb": <ndarray>
                    }, ...
                ],
                "bb": <ndarray>
                "crop": <ndarray>
            }, ...
        ]
    }
    """
    img = db['data'][img_name][:]                 # The image.
    font = db['data'][img_name].attrs['font']     # Contains list of fonts.
    txt = db['data'][img_name].attrs['txt']       # Contains list of words.
    charBB = db['data'][img_name].attrs['charBB'] # Contains list of bb for words.
    wordBB = db['data'][img_name].attrs['wordBB'] # Contain list of bb for chars.


    words = []
    char_index_accumulator = 0
    word_index = 0 # Counter
    
    # Process word
    for word in txt:
        word_font = font[char_index_accumulator].decode() # Convert bytes to string
        chars = []

        word_bb = wordBB[:, :, word_index]
        word_crop = crop_affine(img, word_bb)

        # Process chars
        for char_index in range(len(word)):
            char = chr(word[char_index])
            char_font = font[char_index_accumulator].decode()
            char_bb = charBB[:, :, char_index_accumulator]
            
            #assert char_font == word_font # Double check that the pre-processed image is indeed 1 font per word, and each char is same font as word.
            
            crop_char = crop_affine(img, char_bb)
            
            chars.append({
                "char": char,
                "font": char_font,
                "crop": crop_char,
                "bb": char_bb
            })
            
            char_index_accumulator += 1

        words.append({
            "word": word.decode(),
            "font": word_font,
            "chars": chars,
            "bb": word_bb,
            "crop": word_crop,
        })
        word_index += 1
    
    # Return result
    return {
        "img": img,
        "name": img_name,
        "words": words,
    }

# func: Process image and label and append to training set

In [21]:
def append_to_set(X, Y, x, y, noisy=True):
    """
    Append (x,y) sample to (X,Y) arrays. Checking correct font (y) and shape of image (x).
    Set noisy to False if you don't want to convert image 'x' to noisy image and appending it (append 'x' without modification).
    """
    # Convert to gray
    try:
        if x.shape[2] != 1:
            x = cv2.cvtColor(x, cv2.COLOR_BGR2GRAY)
    except:
        pass
    # Resize
    if x.shape[0] != AVG_CHAR_HEIGHT or x.shape[1] != AVG_CHAR_WIDTH:
        x = cv2.resize(x, (AVG_CHAR_WIDTH, AVG_CHAR_HEIGHT))
    # Normalize
    x = normalize(x)
    
    # Map y string to float
    if type(y) == str:
        if y == "Ubuntu Mono":
            y = 0
        elif y == "Skylark":
            y = 1
        elif y == "Sweet Puppy":
            y = 2
        else:
            raise "Error font, no such font: " + str(y)
    
    if noisy:
        x = noisy(x)
    
    X.append(x)
    Y.append(y)

# func: Normalize function

![](images/normalize_formula.png)

In [19]:
def normalize(img, low=0, high=1):
    """
    Normalize image to range [low, high] from any range. Note: fast algorithm.
    """
    return np.interp(img, [np.min(img), np.max(img)], [low, high])

# Load validation set

In [23]:
%%time
x_val = [] #Images
y_val = [] #Labels

populate(val_filename, x_val, y_val)
print(f"x_val length: {len(x_val)} y_val length: {len(y_val)}")

Number of images: 520
x_val length: 8198 y_val length: 8198
CPU times: user 44.6 s, sys: 41.1 s, total: 1min 25s
Wall time: 9.35 s


In [26]:
X_val = np.array(x_val)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2], 1)
Y_val = np.array(y_val)

print("X val shape: ", X_val.shape)
print("Y val shape: ", Y_val.shape)

X val shape:  (8198, 49, 28, 1)
Y val shape:  (8198,)


# Evaluate

In [27]:
print("Evaluate on validation data")
results = model.evaluate(X_val, Y_val, batch_size=128)
print("val loss, val acc:", results)

Evaluate on validation data
val loss, val acc: [0.19643855094909668, 0.9523054361343384]


In [28]:
Y_val[0:10]

array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])