In [3]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import math
import os

In [4]:
# Searching between threshold / binarization algorithms with a certain expected efficiency for images with such noise level, considering that it is noticeable that some basic known method would not be so successful for the challenge I found an algorithm known for: Binarization Algorithm by Su et al.(http://doi.acm.org/10.1145/1815330.1815351) and have code on: https://gist.github.com/pebbie/2c17620e60c662950b02c4949b3010f2#file-su-py.
# The method is able to filter the background by estimating the contrast from the local maximum and mininos. The algorithm was applied to images of historical documents, which indicates it as an excellent solution candidate. The algorithm has been adapted for use here.


nfns = [
        lambda x: np.roll(x, -1, axis=0),
        lambda x: np.roll(np.roll(x, 1, axis=1), -1, axis=0),
        lambda x: np.roll(x, 1, axis=1),
        lambda x: np.roll(np.roll(x, 1, axis=1), 1, axis=0),
        lambda x: np.roll(x, 1, axis=0),
        lambda x: np.roll(np.roll(x, -1, axis=1), 1, axis=0),
        lambda x: np.roll(x, -1, axis=1),
        lambda x: np.roll(np.roll(x, -1, axis=1), -1, axis=0)
        ]

def localminmax(img, fns):
    mi = img.astype(np.float64)
    ma = img.astype(np.float64)
    for i in range(len(fns)):
        rolled = fns[i](img)
        mi = np.minimum(mi, rolled)
        ma = np.maximum(ma, rolled)
    result = (ma-mi)/(mi+ma+1e-16)
    return result

def numnb(bi, fns):
    nb = bi.astype(np.float64)
    i = np.zeros(bi.shape, nb.dtype)
    i[bi==bi.max()] = 1
    i[bi==bi.min()] = 0
    for fn in fns:
        nb += fn(i)
    return nb

def rescale(r,maxvalue=255):
    mi = r.min()
    return maxvalue*(r-mi)/(r.max()-mi)

def binarize_Su_et_al(img):
    gfn = nfns
    N_MIN = 4

    
    g = img
    I = g.astype(np.float64)


    cimg = localminmax(I, gfn)
    _, ocimg = cv2.threshold(rescale(cimg).astype(g.dtype), 0, 1, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    E = ocimg.astype(np.float64)


    N_e = numnb(ocimg, gfn)
    nbmask = N_e>0

    E_mean = np.zeros(I.shape, dtype=np.float64)
    for fn in gfn:
        E_mean += fn(I)*fn(E)

    E_mean[nbmask] /= N_e[nbmask]

    E_var = np.zeros(I.shape, dtype=np.float64)
    for fn in gfn:
        tmp = (fn(I)-E_mean)*fn(E)
        E_var += tmp*tmp

    E_var[nbmask] /= N_e[nbmask]
    E_std = np.sqrt(E_var)*.5

    R = np.ones(I.shape)*255
    R[(I<=E_mean+E_std)&(N_e>=N_MIN)] = 0

    return R.astype(np.uint8)




In [5]:
# The solution took place in three stages.
# The first, responsible for removing the background from the image (Algorithm by Su et al) and two others responsible for performing transformations for alignment.
# The first alignment step was the horizontal alignment step. The idea of alignment came from the detection of the line by the Hough probabilistic transform. This transform receives the image already transformed by three other filters: a Gaussian filter, a sharpen filter to highlight edges and then a canny filter is applied.
# With the detection of the lines it is possible to use one of them and check the horizontal alignment between the two vertices of the line. For alignment, warpAffine is then applied, which rotates 1 degree in the image clockwise or counter clockwise until the two vertices align on the y axis, more specifically, if vertex A is less than B the algorithm rotates the image in the direction clockwise and vice versa until the two align.
# For the second alignment step, the lines detected with this same process also have a good use, in this case, after the rigid transformation done by warpAffine we now perform a non-rigid transformation on the image through warpPerspective. The idea is, given two lines aligned in y, detected by the hough algorithm, it is believed that for a good alignment, the coordinates of the two ends should be aligned in x, with tests it was verified that a possible, but not optimal, value of be used to be 5 pixels of movement for each pixel of difference between the coordinates of the two lines. An example of execution: If the top line has a pixel in front of the bottom line, then the algorithm will shift the top corners 5 pixels to the left and the bottom corners 5 pixels to the right in the final image and vice versa up to the two lines line up.
# The downside of these transformation steps is that due to inaccurate line detection in some cases, the estimate is affected, making it impossible to achieve a better aligned result.



#Edge enhance
def sharpen_img(img):
    kernel = np.array([[-1,-1,-1,-1,-1],
                    [-1,2,2,2,-1],
                    [-1,2,8,2,-1],
                    [-2,2,2,2,-1],
                    [-1,-1,-1,-1,-1]])/8.0
    result=cv2.filter2D(img,-1,kernel)
    return result

def euclidian_distance(l):
  result = math.sqrt((l[2] - l[0])**2+(l[3] - l[1])**2)
  return result

def execute_rotation(img,angle):
  h, w = img.shape
  center = w // 2, h // 2
  matrix = cv2.getRotationMatrix2D(center, angle, 1)
  rotated = cv2.warpAffine(img, matrix, (w, h), flags=cv2.INTER_NEAREST, borderValue=255)
  return rotated

def execute_transformation(img,aux,top_steps,bottom_steps):
  #Taking the height and width of source and target image
  height_src_img,width_src_img = img.shape
  height_tgt_img,width_tgt_img = aux.shape

  #Taking the corners of the images: top-left, bottom-left, bottom-right, top-right
  array_corners_source_img = np.float32([[0,0],[0,height_src_img],[width_src_img,height_src_img],[width_src_img,0]])
  array_corners_target_img = np.float32([[top_steps,0],[0+bottom_steps,height_tgt_img],[width_tgt_img+bottom_steps,height_tgt_img],[width_tgt_img+top_steps,0]])
     
  # Apply Perspective Transform Algorithm 
  matrix = cv2.getPerspectiveTransform(array_corners_source_img, array_corners_target_img) 
  result = cv2.warpPerspective(img, matrix, (width_tgt_img,height_tgt_img),flags=cv2.INTER_NEAREST, borderValue=255)
  return result

def evaluate_warpingAffine(img):
  ksize = (10, 10) 

  final_angle = 0
  angle = 0
  execute = True

  aux = cv2.blur(img, ksize)
  aux = sharpen_img(aux)


  edges = cv2.Canny(aux, 50, 200, None, 3)
  hough_lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 50, None, 50, 10)


  hough_distances = []

  if hough_lines is not None:
      for i in range(0, len(hough_lines)):
          hough_distances.append((euclidian_distance(hough_lines[i][0]),i))


  hough_distances.sort()
  hough_distances.reverse()

  l = hough_lines[hough_distances[0][1]][0]

  while(execute):

    hough_distances = []
    #Verify if aligned on horizontal
    if l[1] > l[3]:
      angle = -1
      final_angle += angle
    elif l[1] < l[3]:
      angle = 1
      final_angle += angle
    else:
      execute = False
    edges = execute_rotation(edges,angle)
    hough_lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 50, None, 50, 10)
    if hough_lines is not None:
      for i in range(0, len(hough_lines)):
          hough_distances.append((euclidian_distance(hough_lines[i][0]),i))
    hough_distances.sort()
    hough_distances.reverse()
    l = hough_lines[hough_distances[0][1]][0]

  return execute_rotation(img,final_angle),hough_distances,hough_lines,edges

def evaluate_warpingPerspective(img,hough_distances,hough_lines,edges):
   

  final_top_steps = 0
  final_bottom_steps = 0
  
  top_steps = 0
  bottom_steps = 0
  execute = True
  aux = np.zeros((edges.shape[0]+80,edges.shape[1]+80), np.uint8)

  l = hough_lines[hough_distances[0][1]][0]
  l1 = hough_lines[hough_distances[1][1]][0]

  while(execute):
    hough_distances = []
    #Verify if aligned on Vertical and horizontal
    #also, condition execute transformation just if distance in from A to B in x is greater than 3 
    if (l[0] > l1[0]) and ((l[0] - l1[0]) > 3) and (l[1] < l[3]):
      top_steps  += 5
      bottom_steps -= 5
      final_top_steps = top_steps
      final_bottom_steps = bottom_steps
    elif (l[0] < l1[0]) and  ((l1[0] - l[0]) > 3) and (l[1] > l[3]):
      top_steps -= 5
      bottom_steps += 5
      final_top_steps = top_steps
      final_bottom_steps = bottom_steps
    else:
      execute = False
    
    edges = execute_transformation(edges,aux,top_steps,bottom_steps) 
    hough_lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 50, None, 50, 10)
    
    if hough_lines is not None:
      for i in range(0, len(hough_lines)):
          hough_distances.append((euclidian_distance(hough_lines[i][0]),i))
    
    hough_distances.sort()
    hough_distances.reverse()
    
    l = hough_lines[hough_distances[0][1]][0]
    l1 = hough_lines[hough_distances[1][1]][0]


  return execute_transformation(img,aux,final_top_steps,final_bottom_steps)
  
def execute_ALL(image_directory):

  pathImages = os.listdir(image_directory)
  pathImages.sort()

  for index in range(len(pathImages)):
    img = cv2.imread(image_directory+"/"+pathImages[index])
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    aux = binarize_Su_et_al(gray)

    rotated_img,hough_distances,hough_lines,edges = evaluate_warpingAffine(aux)
    rotated_img = evaluate_warpingPerspective(rotated_img,hough_distances,hough_lines,edges)
    


    name, ext = os.path.splitext(pathImages[index])
    cv2.imwrite("denoised_data/"+name+"_denoised_"+ext, rotated_img)




In [6]:
execute_ALL("noisy_data")