Input Image

In [1]:
import cv2
import numpy as np
import math

Image Preprocessing

In [2]:
def image_preprocessing(img, enhancements):
    # image preprocessing
    gaussian = cv2.GaussianBlur(img, (3,3), cv2.BORDER_DEFAULT)
    gray = cv2.cvtColor(gaussian, cv2.COLOR_BGR2GRAY)
    avg = cv2.blur(gray, (3,3))

    # enhancements: 1 - only contrast enhancement, 2 - only edge enhancement, 3 - both constrast and edge enhancement
    if enhancements == 1:
        # contrast enhancement
        sharp = cv2.equalizeHist(avg)
    elif enhancements == 2:
        # edge enhancement
        gauss = cv2.GaussianBlur(avg, (7,7), 0)
        sharp = cv2.addWeighted(avg, 2, gauss, -1, 0)
    else:
        # contrast enhancement
        he = cv2.equalizeHist(avg)
        
        # edge enhancement
        gauss = cv2.GaussianBlur(he, (7,7), 0)
        sharp = cv2.addWeighted(he, 2, gauss, -1, 0)

    # otsu binarisation
    otsu_threshold, image_result = cv2.threshold(sharp, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    return image_result

Edge Detection

In [3]:
def edge_detection(img):
    # edge detection (i)
    edges = cv2.Canny(image=img, threshold1=100, threshold2=200)

    # closing to join gaps between edges (ii)
    kernel = np.ones((5, 5), np.uint8)
    edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
    
    return edges

Contour Detection from Edge

In [4]:
def contour_detection(img, edges):
    # contour detection
    contours, hierarchy = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    c = max(contours, key = cv2.contourArea)
    approx = cv2.approxPolyDP(c, 0.01*cv2.arcLength(c, True), True)
    
    # Case 1: Rectangular Object
    if len(approx) == 4:
        # draw contour of object
        img = cv2.drawContours(img, [c], -1, (0,255,0), 1)
        
        # circle corners
        for i in range(len(approx)):
            img = cv2.circle(img, approx[i][0], 5, (0,0,255), 1)

        return approx.reshape(-1, 2)
    # Case 2: Irregularly Shaped Object
    else:
        # draw contour of object
        (x,y,w,h) = cv2.boundingRect(c)
        cv2.rectangle(img, (x,y), (x+w, y+h), (0, 255, 0), 1)

        # circle corners
        coords = [
            [x, y],
            [x, y+h],
            [x+w, y],
            [x+w, y+h]
        ]

        for i in range(len(coords)):
            img = cv2.circle(img, coords[i], 5, (0,0,255), 1)
            
        return np.array(coords)

Floor Detection

In [5]:
def floor_detection(img, coords):
    # get coordinates of pixels that belong to the detected object
    x_coords = [item[0] for item in coords]
    y_coords = [item[1] for item in coords]
    
    # get pixels that are do not belongs to the detected object
    bg = []

    for i in range(img.shape[1]):
        for j in range(img.shape[0]):
            if i in range(min(x_coords), max(x_coords) + 1) and j in range(min(y_coords), max(y_coords) + 1):
                continue
            else:
                bg.append(img[j][i])
                
    # get the number of pixels for each pixel value to determine colour with biggest area in the image, which is assumed as
    # the floor
    unique, counts = np.unique(bg, axis=0, return_counts=True)
            
    # construct a mask for the floor
    upper = unique[np.argmax(counts)] + 10
    lower = unique[np.argmax(counts)] - 10

    mask = cv2.inRange(img, lower, upper)

    # find contours of the floor
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # draw bounding box for the contours of the floor, to be assumed as the floor area
    height, width, _ = img.shape
    min_x, min_y = width, height
    max_x = max_y = 0

    for contour in contours:
        (x,y,w,h) = cv2.boundingRect(contour)
        min_x, max_x = min(x, min_x), max(x+w, max_x)
        min_y, max_y = min(y, min_y), max(y+h, max_y)

    cv2.rectangle(img, (min_x, min_y), (max_x, max_y), (255, 0, 0), 2)
    
    return (min_x, min_y, max_x, max_y)

Determine Input Image View

In [6]:
def image_view(floor_coords, img):
    min_x, min_y, max_x, max_y = floor_coords
    height, width, _ = img.shape
    
    bg_w = max_x - min_x
    bg_h = max_y - min_y

    area = bg_w * bg_h
    img_area = height * width

    # img_view: 1 - top down, 2 - side view
    # if detected floor covers more than 90% of the image area, then considered as top down view image
    if (area / img_area) * 100 > 90:
        return 1
    # otherwise, it is considered as side view image
    else:
        return 2

Calculate Transform Coordinates

In [7]:
# rearrange the order of the coordinates to [top left, top right, bottom right, bottom left]
def order_points(pts):
    rect = np.zeros((4, 2), dtype = "float32")
    
    s = pts.sum(axis = 1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    
    diff = np.diff(pts, axis = 1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    
    return rect

In [8]:
# top_axis: 1 - top, 2 - bottom; side_axis: 1 - left, 2 - right
def output_coords(in_coords, top_angle, top_axis, side_angle, side_axis):
    temp_coords = order_points(np.array(in_coords))
    top_left = temp_coords[0]
    top_right = temp_coords[1]
    bottom_right = temp_coords[2]
    bottom_left = temp_coords[3]

    # rotation of the image with respect to the specified axis / side, rotates the horizontal view
    if side_angle != 0:
        if side_axis == 1:
            new_top_right = [[], []]
            new_bottom_right = [[], []]

            new_top_right[0] = top_left[0] + ((top_right[0] - top_left[0]) * math.cos(side_angle * math.pi / 180)) + ((top_right[1] - top_left[1]) * math.sin(side_angle * math.pi / 180))
            new_top_right[1] = top_left[1] - ((top_right[0] - top_left[0]) * math.sin(side_angle * math.pi / 180)) + ((top_right[1] - top_left[1]) * math.cos(side_angle * math.pi / 180))
            new_bottom_right[0] = bottom_left[0] + ((bottom_right[0] - bottom_left[0]) * math.cos(side_angle * math.pi / 180)) + ((bottom_right[1] - bottom_left[1]) * math.sin(side_angle * math.pi / 180))
            new_bottom_right[1] = bottom_left[1] - ((bottom_right[0] - bottom_left[0]) * math.sin(side_angle * math.pi / 180)) + ((bottom_right[1] - bottom_left[1]) * math.cos(side_angle * math.pi / 180))

            top_right = new_top_right
            bottom_right = new_bottom_right
        else:
            new_top_left = [[], []]
            new_bottom_left = [[], []]
            
            new_top_left[0] = top_right[0] + ((top_left[0] - top_right[0]) * math.cos(side_angle * math.pi / 180)) + ((top_left[1] - top_right[1]) * math.sin(side_angle * math.pi / 180))
            new_top_left[1] = top_right[1] - ((top_left[0] - top_right[0]) * math.sin(side_angle * math.pi / 180)) + ((top_left[1] - top_right[1]) * math.cos(side_angle * math.pi / 180))
            new_bottom_left[0] = bottom_right[0] + ((bottom_left[0] - bottom_right[0]) * math.cos(side_angle * math.pi / 180)) + ((bottom_left[1] - bottom_right[1]) * math.sin(side_angle * math.pi / 180))
            new_bottom_left[1] = bottom_right[1] - ((bottom_left[0] - bottom_right[0]) * math.sin(side_angle * math.pi / 180)) + ((bottom_left[1] - bottom_right[1]) * math.cos(side_angle * math.pi / 180))
            
            top_left = new_top_left
            bottom_left = new_bottom_left
    
    # rotation of the image with respect to the specified axis / side, rotates the vertical view
    if top_angle != 0:
        if top_axis == 1:
            new_bottom_left = [[], []]
            new_bottom_right = [[], []]

            new_bottom_left[0] = top_left[0] + ((bottom_left[0] - top_left[0]) * math.cos(top_angle * math.pi / 180)) + ((bottom_left[1] - top_left[1]) * math.sin(top_angle * math.pi / 180))
            new_bottom_left[1] = top_left[1] - ((bottom_left[0] - top_left[0]) * math.sin(top_angle * math.pi / 180)) + ((bottom_left[1] - top_left[1]) * math.cos(top_angle * math.pi / 180))
            new_bottom_right[0] = top_right[0] + ((bottom_right[0] - top_right[0]) * math.cos(top_angle * math.pi / 180)) + ((bottom_right[1] - top_right[1]) * math.sin(top_angle * math.pi / 180))
            new_bottom_right[1] = top_right[1] - ((bottom_right[0] - top_right[0]) * math.sin(top_angle * math.pi / 180)) + ((bottom_right[1] - top_right[1]) * math.cos(top_angle * math.pi / 180))

            bottom_left = new_bottom_left
            bottom_right = new_bottom_right
        else:
            new_top_left = [[], []]
            new_top_right = [[], []]
            
            new_top_left[0] = bottom_left[0] + ((top_left[0] - bottom_left[0]) * math.cos(top_angle * math.pi / 180)) + ((top_left[1] - bottom_left[1]) * math.sin(top_angle * math.pi / 180))
            new_top_left[1] = bottom_left[1] - ((top_left[0] - bottom_left[0]) * math.sin(top_angle * math.pi / 180)) + ((top_left[1] - bottom_left[1]) * math.cos(top_angle * math.pi / 180))
            new_top_right[0] = bottom_right[0] + ((top_right[0] - bottom_right[0]) * math.cos(top_angle * math.pi / 180)) + ((top_right[1] - bottom_right[1]) * math.sin(top_angle * math.pi / 180))
            new_top_right[1] = bottom_right[1] - ((top_right[0] - bottom_right[0]) * math.sin(top_angle * math.pi / 180)) + ((top_right[1] - bottom_right[1]) * math.cos(top_angle * math.pi / 180))

            top_left = new_top_left
            top_right = new_top_right

    return [[top_left, top_right, bottom_right, bottom_left]]

In [9]:
# get coordinates to unwarp image into centered view
def dest_coords(shape):
    dst = np.zeros((4, 2), dtype = "float32")
    
    dst[0] = [0, 0]
    dst[1] = [shape[1], 0]
    dst[2] = [shape[1], shape[0]]
    dst[3] = [0, shape[0]]
    
    return dst

Perspective Transform

In [20]:
# get perspective transformed image according to defined angles
# int_flag: 0 - nearest, 1 - linear, 2 - cubic
def perspectiveTransform(img, in_coords, out_coords, int_flag):
    query_pts = np.float32(in_coords)
    train_pts = np.float32(out_coords)

    matrix = cv2.getPerspectiveTransform(query_pts, train_pts)
    
    x_coords = [item[0] for item in out_coords]
    y_coords = [item[1] for item in out_coords]
    
    output_size = (math.ceil(max(img.shape[1], max(x_coords))), math.ceil(max(img.shape[0], max(y_coords))))

    dst = cv2.warpPerspective(img, matrix, output_size, flags=cv2.INTER_LINEAR+cv2.WARP_INVERSE_MAP)
    
    return dst

Overall Flow

In [25]:
import time

# input image
img = cv2.imread('image_126.jpg')
img_copy = img.copy()

preprocessed_img = image_preprocessing(img, 3)
edges = edge_detection(preprocessed_img)
obj_coords = contour_detection(img, edges)
floor_coords = floor_detection(img, obj_coords)
im_view = image_view(floor_coords, img)
in_coords = order_points(obj_coords)
# for case with rotation angle
# out_coords = np.array(output_coords(in_coords, 20, 1, 20, 1)).reshape(-1, 2)
# for case to straighten image
out_coords = dest_coords(img.shape)

start_time = time.time()
transformed_image = perspectiveTransform(img_copy, in_coords, out_coords, 2)
end_time = time.time()

print(end_time - start_time)

cv2.imwrite('Image 4/be.png', img)
cv2.imwrite('OpenCV/img4.png', transformed_image)
cv2.imshow('img', img)
cv2.imshow('pt', transformed_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

0.0019998550415039062


Pillow

In [28]:
from PIL import Image
import numpy
import time

In [29]:
def find_coeffs(pa, pb):
    matrix = []
    for p1, p2 in zip(pa, pb):
        matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
        matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])

    A = numpy.matrix(matrix, dtype=numpy.float32)
    B = numpy.array(pb).reshape(8)

    res = numpy.dot(numpy.linalg.inv(A.T * A) * A.T, B)
    return numpy.array(res).reshape(8)

In [60]:
img = cv2.imread('image_126.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_copy = Image.fromarray(np.uint8(img.copy()))

preprocessed_img = image_preprocessing(img, 3)
edges = edge_detection(preprocessed_img)
obj_coords = contour_detection(img, edges)
floor_coords = floor_detection(img, obj_coords)
im_view = image_view(floor_coords, img)
in_coords = order_points(obj_coords)
# for case with rotation angle
# out_coords = np.array(output_coords(in_coords, 20, 1, 20, 1)).reshape(-1, 2)
# for case to straighten image
out_coords = dest_coords(img.shape)

width, height = img_copy.size
x_coords = [item[0] for item in out_coords]
y_coords = [item[1] for item in out_coords]

start_time = time.time()
coeffs = find_coeffs(np.array(in_coords), np.array(out_coords).reshape(-1, 2))
output_size = (math.ceil(max(img.shape[1], max(x_coords))), math.ceil(max(img.shape[0], max(y_coords))))
transformed_img = img_copy.transform(output_size, Image.PERSPECTIVE, coeffs)
end_time = time.time()

print(end_time - start_time)

0.007999420166015625


Scikit Image

In [23]:
import skimage
import time

In [27]:
img = cv2.imread('image_126.jpg')
img_copy = img.copy()

preprocessed_img = image_preprocessing(img, 3)
edges = edge_detection(preprocessed_img)
obj_coords = contour_detection(img, edges)
floor_coords = floor_detection(img, obj_coords)
im_view = image_view(floor_coords, img)
in_coords = order_points(obj_coords)
# for case with rotation angle
# out_coords = np.array(output_coords(in_coords, 20, 1, 20, 1)).reshape(-1, 2)
# for case to straighten image
out_coords = dest_coords(img.shape)

x_coords = [item[0] for item in out_coords]
y_coords = [item[1] for item in out_coords]

start_time = time.time()
tform3 = skimage.transform.ProjectiveTransform()
tform3.estimate(np.array(in_coords), np.array(out_coords).reshape(-1, 2))
output_size = (math.ceil(max(img.shape[1], max(x_coords))), math.ceil(max(img.shape[0], max(y_coords))))
warped = skimage.transform.warp(img_copy, tform3, output_shape=(output_size[1], output_size[0])) # (rows = height, columns = width)
end_time = time.time()

print(end_time - start_time)

warped = cv2.cvtColor(skimage.img_as_ubyte(warped), cv2.COLOR_BGR2RGB)
cv2.imwrite('skimage/img4.png', warped*255)
cv2.imshow('img', img)
cv2.imshow('pt', warped)
cv2.waitKey(0)
cv2.destroyAllWindows()

0.2084660530090332
