# Signature recognition using YOLOv5s and Tesseract (LSTM)

This workflow does represent the final pipeline proposal.

# Setup

Enable GPU acceleration in Google Colab!

In [None]:
!git clone https://github.com/roboflow-ai/yolov5
%cd yolov5
!pip install -qr requirements.txt
!sudo add-apt-repository -y ppa:alex-p/tesseract-ocr
!sudo apt-get update
!sudo apt-get install tesseract-ocr libtesseract-dev libleptonica-dev pkg-config
!pip install tesserocr

!curl -O https://raw.githubusercontent.com/tesseract-ocr/tessdata_fast/4.1.0/eng.traineddata
!mv eng.traineddata /usr/share/tesseract-ocr/4.00/tessdata/

import torch

from IPython.display import Image, clear_output  # to display images

clear_output()

print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

## run once to load model

In [None]:
import argparse
import time
from pathlib import Path
from google.colab.patches import cv2_imshow

import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random
import numpy as np
import matplotlib

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \
    strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized

# variables
path = 'path-to-image' # path to image for initial run
weights = 'path-to-weight-file' # path to weight file
imgsz = 416 # image size
conf_thres = 0.7 # confidence
iou_thres = 0.25 # iou
classes = '' # filter by class: 0 or 0 2 3
agnostic_nms = '' # class-agnostic NMS
device = select_device('0') # 'cpu' or '0'
half = device.type != 'cpu'  # half precision only supported on CUDA

def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
    # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
    shape = img.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better test mAP)
        r = min(r, 1.0)

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, 32), np.mod(dh, 32)  # wh padding
    elif scaleFill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return img, ratio, (dw, dh)

#load model - weights = path to weight file
model = attempt_load(weights, map_location=device)
imgsz = check_img_size(imgsz, s=model.stride.max())
if half:
    model.half()  # to FP16

# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

# Run inference
img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once

img1 = cv2.imread(path)
img0 = cv2.resize(img1, (imgsz, imgsz))
img = letterbox(img0, new_shape=imgsz)[0]
img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)

# scaling factors for x and y respective to original input size and chosen imgsz for prediction
scaley = img1.shape[0]/imgsz
scalex = img1.shape[1]/imgsz

img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float()  # uint8 to fp16/32
img /= 255.0  # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
  img = img.unsqueeze(0)

# Inference
pred = model(img, '')[0]

# Apply NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic=agnostic_nms)

# draw boxes
for *xyxy, conf, cls in reversed(pred[0]):
    label = '%s %.2f' % (names[int(cls)], conf)
    plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3)
  
cv2_imshow(img0)

In [None]:
def detect(path):
    img1 = cv2.imread(path)
    img0 = cv2.resize(img1, (imgsz, imgsz))
    img = letterbox(img0, new_shape=imgsz)[0]
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img)

    # scaling factors for x and y respective to original input size and chosen imgsz for prediction
    scaley = img1.shape[0]/imgsz
    scalex = img1.shape[1]/imgsz

    img = torch.from_numpy(img).to(device)
    img = img.half() if half else img.float()  # uint8 to fp16/32
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if img.ndimension() == 3:
      img = img.unsqueeze(0)

    # Inference
    pred = model(img, '')[0]

    # Apply NMS
    pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic=agnostic_nms)

    return pred, img0, img1, scalex, scaley

## Preprocessing steps

run once to load methods

In [None]:
import cv2
import numpy as np

# get grayscale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal - blur
def blur(image):
    return cv2.medianBlur(image,5)
 
# thresholding
def thresholding(image):
    return cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 21, 15)

# dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
# erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

# opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

# closing - dilation followed by erosion
def closing(image):
    kernel = np.ones((1,1),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)

# Inference on folder

ALL signatures from EVERY image in the directory are stored in the array 'texts'.

In [None]:
import os
import tesserocr
from PIL import Image

directory = 'path-to-inference-folder'

scale = 1
texts = []

with tesserocr.PyTessBaseAPI(oem=tesserocr.OEM.LSTM_ONLY, psm=tesserocr.PSM.SINGLE_BLOCK) as api:
    api.SetVariable('tessedit_char_whitelist', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+().,')
    api.SetVariable('tessedit_char_blacklist', '!?@#$%&*<>_=/:;\'"')

    for filename in os.listdir(directory):

        if filename.endswith('.jpg') or filename.endswith('.png'):

            path = os.path.join(directory, filename)

            pred, img_resized, img_orig, scalex, scaley = detect(path)

            cutouts = []

            for *xyxy, conf, cls in reversed(pred[0]):
                label = '%s %.2f' % (names[int(cls)], conf)
                xyxy1 = [abs(xyxy[0].item() * scalex), abs(xyxy[1].item() * scaley), abs(xyxy[2].item() * scalex), abs(xyxy[3].item() * scaley)]
                cutouts.append(img_orig[int(xyxy1[1]):int(xyxy1[3]), int(xyxy1[0]):int(xyxy1[2])])

            if cutouts:
                for c in cutouts:
                    g = get_grayscale(c)
                    b = blur(g)
                    t = thresholding(b)
                    o = opening(t)
                    cl = closing(o)
                    im_pil = Image.fromarray(cl)
                    api.SetImage(im_pil)
                    output = api.GetUTF8Text()
                    output = output.replace('\n', ' ')
                    texts.append(output)

            cv2_imshow(img)