#### Setup

In [3]:
! pip install wand

Collecting wand
  Downloading Wand-0.6.10-py2.py3-none-any.whl (142 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m142.8/142.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
[?25hInstalling collected packages: wand
Successfully installed wand-0.6.10


In [3]:
! brew uninstall imagemagick
! brew install imagemagick@6
! brew unlink imagemagick
! brew link imagemagick@6 --force
! echo 'export MAGICK_HOME="/usr/local/opt/imagemagick@6/lib"' >> ~/.bash_profile
! echo 'export PATH="/usr/local/opt/imagemagick@6/bin:$PATH"' >> ~/.bash_profile

! source ~/.bash_profile

Uninstalling /opt/homebrew/Cellar/imagemagick/7.1.0-51... (805 files, 30.8MB)
[34m==>[0m [1mDownloading https://ghcr.io/v2/homebrew/core/imagemagick/6/manifests/6.9.12-[0m
######################################################################## 100.0%
[34m==>[0m [1mDownloading https://ghcr.io/v2/homebrew/core/imagemagick/6/blobs/sha256:3681[0m
[34m==>[0m [1mDownloading from https://pkg-containers.githubusercontent.com/ghcr1/blobs/sh[0m
######################################################################## 100.0%
[34m==>[0m [1mPouring imagemagick@6--6.9.12-66.arm64_monterey.bottle.tar.gz[0m
[34m==>[0m [1mCaveats[0m
imagemagick@6 is keg-only, which means it was not symlinked into /opt/homebrew,
because this is an alternate version of another formula.

If you need to have imagemagick@6 first in your PATH, run:
  echo 'export PATH="/opt/homebrew/opt/imagemagick@6/bin:$PATH"' >> ~/.zshrc

For compilers to find imagemagick@6 you may need to set:
  export LDFLAGS="-L/opt

In [None]:
! convert -list font

## Font Generation

In [239]:
import os
import numpy as np
import cv2
from tqdm import tqdm
from wand.color import Color
from wand.image import Image
from wand.drawing import Drawing
from wand.compat import nested
import shutil
import pandas as pd

In [240]:
# Charatcters 
language = 'abcdefghijklmnopqrstuvwxyz'
language_upper = language.upper()
language += language_upper
language += '0123456789'

assert len(language) == 26 * 2 + 10 
language = language_upper
language

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [298]:
# Fonts
handwritten_fonts = ['Herculanum', 'Annai MN', 'Bradley Hand', 'Brush Script MT', 'Chalkboard',
             'Comic Sans MS', 'Luminary', 'Noteworthy', 'Papyrus', 'Party LET', 'Savoye LET', 
             'Sign Painter', 'Skia', 'Snell Roundhand', 'Times New Roman', 'Trattatello', 'Zapfino']
typed_fonts = ['Arial', 'Arial Black', 'Arial Narrow', 'Arial Rounded MT Bold', 'Copperplate', 'Courier New', 'Helvetica',
              'Impact', 'Lucinda Grande', 'Microsoft Sans Serif', 'Tahoma', 'Verdana', 'Menlo', 'Didot', 'Copperplate', 'Avenir', 'Futura']

fonts = handwritten_fonts + typed_fonts 
# fonts = ['Chalkboard']

In [299]:
# resolution
width=200
height=200
font_size=100
background = Color('white')
foreground=Color('rgb(0, 0, 0)')

In [300]:
language

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [301]:
def delete(language, language_upper):
    for c in language:
        if c in language_upper:
            if os.path.exists(f'{c}{c}'):
                shutil.rmtree(f'{c}{c}')
        else:
            if os.path.exists(c):
                shutil.rmtree(f'{c}')

In [302]:
def generate(fonts, language, language_upper, width=200, height=200, font_size=100, background=Color('white'), foreground=Color('rgb(0,0,0)')):
    for f in tqdm(fonts):
        for c in language:
            if c in language_upper:
                if not os.path.exists(f'{c}{c}'):
                    os.mkdir(f'{c}{c}')
                filename = os.path.join(f'{c}{c}', f'{f}.png')
            else:
                if not os.path.exists(c):
                    os.mkdir(f'{c}')
                filename = os.path.join(f'{c}', f'{f}.png')

            with Drawing() as draw:
                with Image(width=width, height=height, background=background) as img:
                    draw.font_family = f
                    draw.font_size = font_size
                    draw.push()
                    draw.fill_color = foreground
                    draw.text(0,int(img.height/2), c)
                    draw.pop()
                    draw(img)
                    if os.path.exists(filename):
                        os.unlink(filename)
                    img.save(filename='tmp.png')

                    img = cv2.imread('tmp.png')

                    os.unlink('tmp.png')
                    ys, xs = np.where(np.all(img == (0, 0, 0), axis=-1))
                    
                    if len(ys) > 0 and len(xs) > 0:
                        cropped = img[min(ys):max(ys)+1, min(xs):max(xs)+1]
                        cv2.imwrite(filename, cropped)


In [303]:
generate(fonts, language, language_upper)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:04<00:00,  7.07it/s]


#### Inference

##### Functions

In [322]:
a = (1,2)
*a, 3

(1, 2, 3)

In [613]:
import matplotlib.pyplot as plt
def show(*img):
    for i in img:
        plt.imshow(i, cmap='gray')
        plt.show()
        
def show_rgb(*img):
    for i in img:
        plt.imshow(i, cmap='gray')
        plt.show()

def read(path):
    return cv2.imread(path, cv2.IMREAD_GRAYSCALE)

def read_rgb(path):
    return cv2.imread(path, cv2.IMREAD_UNCHANGED)

def bin(img, bins=10, min_diff=10):
    count, division = np.histogram(img, bins)
    diff_series = pd.Series(count).diff()
    
    first_histogram_reduction_index = diff_series[(diff_series < 0) & (np.abs(diff_series) > min_diff)].index[0]
    threshold = division[first_histogram_reduction_index+1]
        
    _, img = cv2.threshold(img,threshold,255,cv2.THRESH_BINARY)
    return img

def bin2(img):
    return cv2.threshold(img, 0, 255, cv2.THRESH_BINARY +cv2.THRESH_OTSU)[1]

def neg(img):
    return 255 - img

def redraw_contours(img, thickness=16): 
    # Find contours and redraw
    contours, hierarchy = cv2.findContours(image=img, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)
                                      
    # draw contours on the original image
    image_copy = img.copy()
    cv2.drawContours(image=image_copy, contours=contours, contourIdx=-1, color=(255, 255, 255), thickness=thickness, lineType=cv2.LINE_AA)
    return image_copy

def components(img, min_area=800):
    # Connected components
    output = cv2.connectedComponentsWithStats(img, 4, cv2.CV_32S)
    num_letters = output[0]
    letters = output[1]
    stats = output[2]
    centroids = output[3]

    letter_imgs = []
    ## Sort letters + area check
    for l in range(1, num_letters):
        minx = stats[l, cv2.CC_STAT_LEFT]
        maxx = stats[l, cv2.CC_STAT_WIDTH] + minx
        miny = stats[l, cv2.CC_STAT_TOP] 
        maxy = stats[l, cv2.CC_STAT_HEIGHT] + miny
        area = stats[l, cv2.CC_STAT_AREA]
        if area < min_area:
            continue

        cropped_img = img[miny:maxy+1, minx: maxx+1].copy()
        
        # Add negatives directly 
        letter_imgs.append((neg(cropped_img), minx, maxx, miny, maxy))

    # Detect spaces and newlines
    sorted_imgs_y = sorted(letter_imgs, 
       key=lambda x: x[3])

    rectangles = sorted_imgs_y.copy()
    
    from collections import defaultdict
    rows = defaultdict(list)
    row_id = 0
    while len(rectangles) > 0:
        r = rectangles[0]
        rows[row_id].append(r)
        ctr_y = (r[3] + r[4])/2
        rectangles.remove(r)
        
        to_remove = []
        for (img, minx, maxx, miny, maxy) in rectangles:
            # if central y line goes through letter box
            if ctr_y >= miny and ctr_y <= maxy:
                rows[row_id].append((img, minx, maxx, miny, maxy))
                to_remove.append((img, minx, maxx, miny, maxy))
        
        for rem in to_remove:
            rectangles.remove(rem)
            
        row_id+=1
    
    final_result = []
    # for y in text
    for row in range(row_id):
        line = []
        # Search for spaces
        sorted_imgs_x = sorted(rows[row], 
           key=lambda x: x[1])
        
        # for char in line
        word = []
        for i in range(0, len(sorted_imgs_x)-1):
            xmax_first = sorted_imgs_x[i][2]
            xmin_second = sorted_imgs_x[i+1][1]

            prev_letter_width = sorted_imgs_x[i][2] - sorted_imgs_x[i][1]
            dist = (xmin_second - xmax_first)
            
            word.append(sorted_imgs_x[i])
            if dist > prev_letter_width:
                line.append(word)
                word = []
        
        # Assume no space at end of line
        word.append(sorted_imgs_x[-1])
        line.append(word)
        final_result.append(line)
    
    return final_result

def gray(img):
    return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

def resize(img, size=(100, 100)):
    return cv2.resize(img, size, interpolation=cv2.INTER_NEAREST)

def erode(img, it=1):
    kernel = np.ones((3, 3), np.uint8)
    return cv2.erode(img, kernel, iterations=it)

def dilate(img, it=1):
    kernel = np.ones((3, 3), np.uint8)
    return cv2.dilate(img, kernel, iterations=it)

def blur(x):
    return cv2.GaussianBlur(x, (3, 3), cv2.BORDER_DEFAULT)

In [617]:
def pipeline_2(path, show_steps=False):
    x = read(path)
    y = cv2.Laplacian(x, cv2.CV_16S, ksize=3)

    # Binarize
    z = y.copy()
    z[z > 127] = 255
    z[z <= 127] = 0
    
    # Thicken letters
    q = dilate(z, it=3).astype(np.uint8)
    
    # Find components
    lines = components(q)

    if show_steps:
        show(x)
        show(y)
        show(z)
        show(q)
        for line in lines:
            for word in line:
                for c in word:
                    show(c[0])
            
    return lines

In [621]:
r = ocr_nn('multiline.png')
print(r)

FMI 
IHAGE 
PRQQESSLNG 



In [627]:
r = ocr_nn('total.png', show_steps=False)
print(r)

AL SHTEWS 
ESHTA BOB 
RALZFLLO 



### NN

In [307]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

In [308]:
def augment(train_images, train_labels, depth=2):
    res_images = []
    res_labels = []
    for i in range(train_images.shape[0]):
        img = train_images[i]
        l = train_labels[i]
        for it in range(0, depth):
            img_x = erode(img, it=it)
            res_images.append(img_x)
            res_labels.append(l)
        for it in range(0, depth):
            img_x = dilate(img, it=it)
            res_images.append(img_x)
            res_labels.append(l)
    return np.stack(res_images),np.array(res_labels)  

In [601]:
tf.keras.utils.set_random_seed(0)

## Augment Data
size = 64
epochs = 20
batch_size = 128

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(size, size, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(len(language)))
model.summary()

import glob

image_paths = sorted(glob.glob("*/*.png"))
train_labels = np.array(list(range(26)))
classes = sorted(set([i[:i.index('/')] for i in image_paths]))
classes_fmap = dict(zip(classes, train_labels))
classes_imap = dict(zip(train_labels, classes))

train_images = [bin(resize(read(x), size=(size, size)))//255 for x in image_paths]
train_labels = np.stack([classes_fmap[i[:i.index('/')]] for i in image_paths])
train_images = np.stack(train_images)

# Augment images 
train_images_augmented, train_labels_augmented = augment(train_images, train_labels, depth=2)
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(train_images_augmented, train_labels_augmented, 
                    epochs=epochs,
                    batch_size=batch_size)


Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_36 (Conv2D)          (None, 62, 62, 32)        320       
                                                                 
 max_pooling2d_24 (MaxPoolin  (None, 31, 31, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_37 (Conv2D)          (None, 29, 29, 64)        18496     
                                                                 
 max_pooling2d_25 (MaxPoolin  (None, 14, 14, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_38 (Conv2D)          (None, 12, 12, 64)        36928     
                                                                 
 flatten_12 (Flatten)        (None, 9216)            

In [620]:
def classify_nn_augmented(letter_img, inference_size=(size, size), _filter='.png'):
    # Add dilation to inference
    letter_imgs_augmented = [dilate(letter_img, it=i) for i in range(3)]
    letter_imgs_augmented += [erode(letter_img, it=i) for i in range(3)]
    
    characters = []
    for i, letter_img in enumerate(letter_imgs_augmented):
        letter_img_res = bin(resize(letter_img, inference_size))//255
        letter_img_tensor = np.stack([letter_img_res])

        results = model.predict(letter_img_tensor, verbose=False)
        char_idx = np.argmax(results)
        character = classes_imap[char_idx][0]
        characters.append({'char': character, 'dilate_it': i, 'result': np.max(results)})
        
    characters = pd.DataFrame(characters)
    return characters['char'].mode().values[0]

def classify_nn(letter_img, inference_size=(size, size), _filter='.png'):
    letter_img_res = bin(resize(letter_img, inference_size))//255
    letter_img_tensor = np.stack([letter_img_res])
    
    results = model.predict(letter_img_tensor, verbose=False)
    char_idx = np.argmax(results)
    character = classes_imap[char_idx][0]
    return character, results

def ocr(path, show_steps, class_fn):
    lines = pipeline_2(path, show_steps=show_steps)
    text = ''
    for line in lines:
        for word in line:
            for c, *_ in word:
                char = class_fn(c)[0]
                text += char
            text += ' '
        text += '\n'
    return text

def ocr_nn(path, show_steps=False):
    return ocr(path, show_steps, classify_nn)
    
    
def ocr_nn_augmented(path, show_steps=False):
    return ocr(path, show_steps, classify_nn_augmented)

#### Tests NN

In [582]:
ocr_nn('test.png')

[[('C',
   array([[-7.9650264e+00, -1.3834951e+00,  1.2596487e+01,  5.6434002e+00,
            6.2750216e+00, -1.0346209e+01, -3.0045459e+00, -6.4410105e+00,
            6.4317684e+00,  5.4019656e+00, -6.6936736e+00, -1.5903983e-02,
           -5.3574519e+00,  4.6499548e+00,  1.2555685e+01, -3.0026367e+00,
            1.1905471e+00, -5.1748452e+00,  2.7976406e+00,  3.0307505e+00,
           -4.7817626e+00, -1.6235720e+01, -1.7238410e+00,  3.0715261e+00,
           -1.3852016e+00, -1.3062192e+00]], dtype=float32)),
  ('L',
   array([[ -8.455614 ,  -0.8418528,  14.129976 ,   6.329447 ,   5.1831527,
           -18.671293 ,   3.747546 ,  -5.011556 ,  10.150172 ,   7.6068316,
             7.6509523,  23.275553 ,  -7.660453 ,  -7.0412903,   6.889268 ,
            -2.4809484,  14.086274 ,  -6.427495 ,   1.5482782,  -9.092689 ,
            -3.589717 , -11.280183 , -12.544739 ,   2.9061317,   1.3964843,
            13.260579 ]], dtype=float32)),
  ('F',
   array([[  2.9253075,  -5.151885 , -21.

In [297]:
results = {}
# fns = [ocr, ocr_nn, ocr_nn_augmented]
fns = [ocr_nn, ocr_nn_augmented]
tests = [x for x in os.listdir('.') if x.endswith('.png') or x.endswith('.jpg')]
for fn in fns:
    num_correct = 0
    num_total = 0
    
    for t in tests:
        label = t[:t.index('.')].upper()
        num_total += len(label)

        result = ''.join(fn(t))
        for i in range(min(len(label), len(result))):
            if label[i] == result[i]:
                num_correct+=1
        print(result)
        
    results[fn] = num_correct/num_total
results

TEAN
FRI
DIGLTAL
TACTICNL
OCR
TEST
PROCESSLNN
TEAN
FMI
DIGLTAL
TACTICNL
OCR
TEST
PROCESSINN


{<function __main__.ocr_nn(path, show_steps=False, return_dfs=False)>: 0.8461538461538461,
 <function __main__.ocr_nn_augmented(path, show_steps=False, return_dfs=False)>: 0.8974358974358975}

In [273]:
! ls -R /Library/Fonts | grep ttf

Arial Unicode.ttf


### Tests

In [406]:
num_correct = 0
num_total = 0

for t in ['test.png', 'ocr.png', 'tactical.png', 'digital.png', 'team.png', 'processing.png']:
    label = t[:t.index('.')].upper()
    num_total += len(label)
    
    result = ''.join(ocr(t))    
    for i in range(len(label)):
        if label[i] == result[i]:
            num_correct+=1
        
    print(ocr(t))
print('Accuracy:', num_correct/num_total)

['T', 'F', 'S', 'T']
['O', 'C', 'R']
['T', 'A', 'C', 'T', 'T', 'C', 'A', 'L']
['Q', 'T', 'C', 'T', 'T', 'A', 'L']
['T', 'E', 'A', 'A']
['P', 'X', 'O', 'C', 'C', 'S', 'S', 'X', 'X', 'A']
Accuracy: 0.6666666666666666


In [378]:
ocr('test.png')

['T', 'F', 'S', 'T']

In [379]:
ocr('tactical.png', show_steps=False)

['T', 'A', 'C', 'T', 'T', 'C', 'A', 'L']

In [86]:
ocr('digital.png', show_steps=False)

['D', 'I', 'C', 'I', 'T', 'A', 'L']

In [87]:
ocr('ocr.png', show_steps=False)

['O', 'C', 'R']

In [88]:
ocr('team.png', show_steps=False)

['T', 'E', 'A', 'A']

In [89]:
ocr('processing.png', show_steps=False)

['F', 'R', 'D', 'C', 'F', 'S', 'S', 'I', 'X', 'A']