# Python OpenCV

## Required modules

In [54]:
!sudo apt install tesseract-ocr

Reading package lists... Done
Building dependency tree       
Reading state information... Done
tesseract-ocr is already the newest version (4.00~git2288-10f4998a-2).
0 upgraded, 0 newly installed, 0 to remove and 14 not upgraded.


In [55]:
!pip install opencv-python pytesseract



In [204]:
# image processing opencv
import cv2 
import pytesseract
from google.colab.patches import cv2_imshow

# matcher to get similarity
from difflib import SequenceMatcher

# results
from pprint import pprint

## Image example importation

In [57]:
!curl -L -o "img1.jpg" "https://pbs.twimg.com/media/CZl5VNKWQAAQUfO?format=jpg&name=large"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  368k  100  368k    0     0   428k      0 --:--:-- --:--:-- --:--:--  428k


In [58]:
!curl -L -o "img2.jpg" "https://papagayonews.com/wp-content/uploads/2019/11/DzoMHdAW0AM4nTB.pnglarge-720%C3%97463-Google-Chrome.jpg"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 62079  100 62079    0     0   230k      0 --:--:-- --:--:-- --:--:--  230k


In [59]:
!curl -L -o "img3.jpg" "https://scontent.fscl11-2.fna.fbcdn.net/v/t1.0-9/41292282_1812427158834835_5101710936150900736_o.jpg?_nc_cat=107&ccb=2&_nc_sid=110474&_nc_ohc=-ERJ_aU6KqsAX8XR7Hz&_nc_ht=scontent.fscl11-2.fna&oh=ca9ab9c1509a5396447369092ac30c3a&oe=5FFE0294"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  203k  100  203k    0     0   215k      0 --:--:-- --:--:-- --:--:--  215k


In [170]:
!curl -L -o "img4.jpg" "https://upload.wikimedia.org/wikipedia/commons/f/fe/El_ejemplo_de_Cedula_identidad_Chile_2013.jpg"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  196k  100  196k    0     0   488k      0 --:--:-- --:--:-- --:--:--  487k


In [162]:
!curl -L -o "img5.jpg" "https://pbs.twimg.com/media/Ce9vyi8WQAAVD5s?format=jpg&name=large"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  336k  100  336k    0     0  1741k      0 --:--:-- --:--:-- --:--:-- 1741k


In [163]:
!curl -L -o "img6.jpg" "https://ligaseis.leagueapp.cl/logos/89714b760d43988d85bb04aec3801ceb38654cc7"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  101k  100  101k    0     0   182k      0 --:--:-- --:--:-- --:--:--  182k


## Function definitions

### Image binarizationn

In [60]:
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def similarity_tester(expected, processed, threshold=0.8):
    # split processed result and partialy clean it
    processed = filter(lambda text: text != '', processed.split('\n'))
    processed = list(map(lambda text: text.replace('\'', '').strip(), processed))
    
    matches = []
    # comparison is O(n^2) as we do not know the order of the the recieved input
    for prc in processed:
        for xpct in expected:
          thrsh = similar(xpct, prc)
          if thrsh >= threshold:
              matches.append((xpct, prc, thrsh))

    return matches

In [156]:
def process_black_white(img_name, threshold=100, contrast=1, brightness=0, show=False):
    '''
    @params

    img_name: str
    threshold: int (0-255) -> value of max light of pixel to convert to white
    contrast: float (0-3) -> value of desired contrast (1 is no extra applied)
    brightness: float (0-100) -> value of extra brightness to apply
    show: bool -> show or not image and corresponding percieved text


    @returns
    
    str: percieved read text
    '''
    img = cv2.imread(img_name)
    img = cv2.convertScaleAbs(img, alpha=contrast, beta=brightness)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    thresh, new_img = cv2.threshold(img, threshold, 255, cv2.THRESH_BINARY)

    if show:
      print(pytesseract.image_to_string(new_img))
      cv2_imshow(new_img)

    return pytesseract.image_to_string(new_img)

### Robust Locally-Adaptive Soft Binarization

Ref: https://stackoverflow.com/a/57103789

In [112]:
def adjust_gamma(image, gamma=1.2):
    # build a lookup table mapping the pixel values [0, 255] to
    # their adjusted gamma values
    invGamma = 1.0 / gamma
    table = np.array([((i / 255.0) ** invGamma) * 255
        for i in np.arange(0, 256)]).astype("uint8")

    # apply gamma correction using the lookup table
    return cv2.LUT(image, table)

In [139]:
# These are probably the only important parameters in the
# whole pipeline (steps 0 through 3).
BLOCK_SIZE = 40
DELTA = 25

# Do the necessary noise cleaning and other stuffs.
# I just do a simple blurring here but you can optionally
# add more stuffs.
def preprocess(image):
    image = cv2.medianBlur(image, 3)
    return 255 - image

# Again, this step is fully optional and you can even keep
# the body empty. I just did some opening. The algorithm is
# pretty robust, so this stuff won't affect much.
def postprocess(image):
    kernel = np.ones((3,3), np.uint8)
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    return image

# Just a helper function that generates box coordinates
def get_block_index(image_shape, yx, block_size): 
    y = np.arange(max(0, yx[0]-block_size), min(image_shape[0], yx[0]+block_size))
    x = np.arange(max(0, yx[1]-block_size), min(image_shape[1], yx[1]+block_size))
    return np.meshgrid(y, x)

# Here is where the trick begins. We perform binarization from the 
# median value locally (the img_in is actually a slice of the image). 
# Here, following assumptions are held:
#   1.  The majority of pixels in the slice is background
#   2.  The median value of the intensity histogram probably
#       belongs to the background. We allow a soft margin DELTA
#       to account for any irregularities.
#   3.  We need to keep everything other than the background.
#
# We also do simple morphological operations here. It was just
# something that I empirically found to be "useful", but I assume
# this is pretty robust across different datasets.
def adaptive_median_threshold(img_in, delta):
    med = np.median(img_in)
    img_out = np.zeros_like(img_in)
    img_out[img_in - med < delta] = 255
    kernel = np.ones((3,3),np.uint8)
    img_out = 255 - cv2.dilate(255 - img_out,kernel,iterations = 2)
    return img_out

# This function just divides the image into local regions (blocks),
# and perform the `adaptive_mean_threshold(...)` function to each
# of the regions.
def block_image_process(image, block_size, delta):
    out_image = np.zeros_like(image)
    for row in range(0, image.shape[0], block_size):
        for col in range(0, image.shape[1], block_size):
            idx = (row, col)
            block_idx = get_block_index(image.shape, idx, block_size)
            out_image[block_idx] = adaptive_median_threshold(image[block_idx], delta)
    return out_image

# This function invokes the whole pipeline of Step 2.
def process_image(img, block_size=40, delta=25):
    image_in = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    image_in = preprocess(image_in)
    image_out = block_image_process(image_in, block_size, delta)
    image_out = postprocess(image_out)
    return image_out

In [140]:
# This is the function used for composing
def sigmoid(x, orig, rad):
    k = np.exp((x - orig) * 5 / rad)
    return k / (k + 1.)

# Here, we combine the local blocks. A bit lengthy, so please
# follow the local comments.
def combine_block(img_in, mask):
    # First, we pre-fill the masked region of img_out to white
    # (i.e. background). The mask is retrieved from previous section.
    img_out = np.zeros_like(img_in)
    img_out[mask == 255] = 255
    fimg_in = img_in.astype(np.float32)

    # Then, we store the foreground (letters written with ink)
    # in the `idx` array. If there are none (i.e. just background),
    # we move on to the next block.
    idx = np.where(mask == 0)
    if idx[0].shape[0] == 0:
        img_out[idx] = img_in[idx]
        return img_out

    # We find the intensity range of our pixels in this local part
    # and clip the image block to that range, locally.
    lo = fimg_in[idx].min()
    hi = fimg_in[idx].max()
    v = fimg_in[idx] - lo
    r = hi - lo

    # Now we use good old OTSU binarization to get a rough estimation
    # of foreground and background regions.
    img_in_idx = img_in[idx]
    ret3,th3 = cv2.threshold(img_in[idx],0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)

    # Then we normalize the stuffs and apply sigmoid to gradually
    # combine the stuffs.
    bound_value = np.min(img_in_idx[th3[:, 0] == 255])
    bound_value = (bound_value - lo) / (r + 1e-5)
    f = (v / (r + 1e-5))
    f = sigmoid(f, bound_value + 0.05, 0.2)

    # Finally, we re-normalize the result to the range [0..255]
    img_out[idx] = (255. * f).astype(np.uint8)
    return img_out

# We do the combination routine on local blocks, so that the scaling
# parameters of Sigmoid function can be adjusted to local setting
def combine_block_image_process(image, mask, block_size):
    out_image = np.zeros_like(image)
    for row in range(0, image.shape[0], block_size):
        for col in range(0, image.shape[1], block_size):
            idx = (row, col)
            block_idx = get_block_index(image.shape, idx, block_size)
            out_image[block_idx] = combine_block(
                image[block_idx], mask[block_idx])
    return out_image

# Postprocessing (should be robust even without it, but I recommend
# you to play around a bit and find what works best for your data.
# I just left it blank.
def combine_postprocess(image):
    return image

# The main function of this section. Executes the whole pipeline.
def combine_process(img, mask):
    image_in = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    image_out = combine_block_image_process(image_in, mask, 20)
    image_out = combine_postprocess(image_out)
    return image_out

In [141]:
# pipelining previous code to return the final image with text

def process_locall_adjusted(img_name, gamma=1, block_size=40, delta=25, show=False):
    img = cv2.imread(img_name)
    mask = adjust_gamma(img, gamma=gamma)
    mask = process_image(mask, block_size=block_size, delta=delta)
    new_img = combine_process(img, mask)

    if show:
      print(pytesseract.image_to_string(new_img))
      cv2_imshow(new_img)

    return pytesseract.image_to_string(new_img) 

## Results

**Disclaimer**: all images were publicly found on Google images search and are references in the beginning

In [215]:
expected = [
  ['RODRIGUEZ', 'ZEPEDA', 'CLAUDIO ANTONIO', 'CHILENA M', '13 OCT 1965 102.814.050', '05 AGO 2014 13 OCT 2019', 'RUN 9.932.656-5'],
  ['RODRIGUEZ', 'APONTE', 'MARIA TERESA', 'EXTRANJERO', 'VEN F', '15 MAR 1991', '06 SEPT 2018 31 AGO 2019'],
  ['SANHUEZA', 'HARRIS', 'OLGA ESTER', 'CHILENA M', '25 SEPT 1964', '23 JUL 2014'],
  ['FRERDEZ', 'VIDAL', 'MARCELA CAROLINA', 'CHILENA F', '21 FEB 1982 100000001', '1 SEP 2013 10 AGO 2023', 'RUN 12.749.625-K'],
  ['MALDONADO', 'JEREZ', 'JUANN DANIEL', '15 MAR 1948 102.773.350', '31 JUL 2014 15 MAR 2020', 'RUN 5.632.605-7'],
  ['LEIVA', 'SEREY', 'RONY ORLANDO', 'CHILENA M', '31 DIC 1997 511.408.104', '13 MAR 2017 31 DIC 2020', 'RUN 19.711.416-9']
]

### Simple B&W

In [230]:
import warnings
# ignore specific warning of Colab
warnings.filterwarnings(action='ignore')

print('Simple B&W\n')
for i in range(1, 7):
    result = process_black_white('img{}.jpg'.format(i), threshold=100, show=False)
    result = similarity_tester(expected[i - 1], result)
    print('detected {}%'.format( (len(result) / len(expected[i - 1])) * 100 ))

print('\nAdaptive\n')
for i in range(1, 7):
    result = process_locall_adjusted('img{}.jpg'.format(i), block_size=80, delta=50, show=False)
    result = similarity_tester(expected[i - 1], result)
    print('detected {}%'.format( (len(result) / len(expected[i - 1])) * 100 ))

Simple B&W

detected 100.0%
detected 100.0%
detected 100.0%
detected 42.857142857142854%
detected 33.33333333333333%
detected 100.0%

Adaptive

detected 100.0%
detected 100.0%
detected 83.33333333333334%
detected 100.0%
detected 100.0%
detected 100.0%


## Performance

In [231]:
print('Simple B&W\n')
for i in range(1, 7):
    %timeit process_black_white('img{}.jpg'.format(i), threshold=100, show=False)

print('\nAdaptive\n')
for i in range(1, 7):
    %timeit process_locall_adjusted('img{}.jpg'.format(i), block_size=80, delta=50, show=False)

Simple B&W

1 loop, best of 3: 1.24 s per loop
1 loop, best of 3: 955 ms per loop
1 loop, best of 3: 927 ms per loop
1 loop, best of 3: 588 ms per loop
1 loop, best of 3: 1 s per loop
1 loop, best of 3: 1.34 s per loop

Adaptive

1 loop, best of 3: 3.08 s per loop
1 loop, best of 3: 1.08 s per loop
1 loop, best of 3: 1.96 s per loop
1 loop, best of 3: 924 ms per loop
1 loop, best of 3: 3.11 s per loop
1 loop, best of 3: 1.82 s per loop
