In [1]:
import numpy as np
import cv2
from matplotlib import pyplot as plt
import os
import math
from sklearn.cluster import KMeans
%matplotlib inline
import sys

In [2]:
sys.path.append("..")
from framework import perspective_transform
from framework import visualize

In [3]:
data_path = "/home/avk/study/1C/Diplom/data/NewData/samples/"

In [4]:
for file_num, filename in enumerate(os.listdir(data_path)):
    full_file_path = os.path.join(data_path, filename)
    if file_num == 5:
#     if file_num == 2:
        break
file_num

5

In [5]:
initial_image = cv2.imread(full_file_path)
visualize.visualize_image(initial_image)
initial_image.shape

(4032, 3016, 3)

# Уменьшение изображения

In [6]:
SHORT_SIDE_SMALL_LEN = 300
initial_small_side_len = sorted(initial_image.shape[:2])[0]
resizing_ratio = initial_small_side_len / SHORT_SIDE_SMALL_LEN

resized_image = cv2.resize(initial_image, None, fx=1 / resizing_ratio, 
                           fy=1 / resizing_ratio, interpolation=cv2.INTER_CUBIC)
                           
visualize.visualize_image(resized_image)
resized_image.shape

(401, 300, 3)

# Размытие текста

In [7]:
def blur_image(image, blur, kernel_size, times=1):
    resulted_image = image
    for i in range(times):
        try:
            resulted_image = blur(resulted_image, kernel_size)
        except:
            resulted_image = blur(resulted_image, (kernel_size, kernel_size), 0)
    return resulted_image

In [8]:
# for times in range(1, 8):
#     smoothed_image = blur_image(resized_image, cv2.medianBlur, 5, times)
#     visualize.visualize_image(smoothed_image, str(times))

In [9]:
smoothed_image = blur_image(resized_image, cv2.medianBlur, 5, 3)
visualize.visualize_image(smoothed_image)
# smoothed_image = blur_image(smoothed_image, cv2.medianBlur, 3, 3)
# visualize.visualize_image(smoothed_image)

# Выделение границ

In [10]:
edges = cv2.Canny(smoothed_image, 10, 50, apertureSize=3)
visualize.visualize_image(edges)

In [11]:
hough_lines = cv2.HoughLines(edges, 1, np.pi / 180, 100, None, 0, 0)
# visualize.visualize_image(hough_lines)
len(hough_lines)

4

In [12]:
cdst = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)

In [13]:
# visualize.visualize_image(cdst)

In [14]:
# Draw the lines
if hough_lines is not None:
    for i in range(0, len(hough_lines)):
        rho = hough_lines[i][0][0]
        theta = hough_lines[i][0][1]
        a = math.cos(theta)
        b = math.sin(theta)
        x0 = a * rho
        y0 = b * rho
        pt1 = (int(x0 + 1000*(-b)), int(y0 + 1000*(a)))
        pt2 = (int(x0 - 1000*(-b)), int(y0 - 1000*(a)))
        cv2.line(cdst, pt1, pt2, (0,0,255), 1, cv2.LINE_AA)

In [15]:
visualize.visualize_image(cdst)

In [16]:
from collections import defaultdict
def segment_by_angle_kmeans(lines, k=2, **kwargs):
    """Groups lines based on angle with k-means.

    Uses k-means on the coordinates of the angle on the unit circle 
    to segment `k` angles inside `lines`.
    """

    # Define criteria = (type, max_iter, epsilon)
    default_criteria_type = cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER
    criteria = kwargs.get('criteria', (default_criteria_type, 10, 1.0))
    flags = kwargs.get('flags', cv2.KMEANS_RANDOM_CENTERS)
    attempts = kwargs.get('attempts', 10)

    # returns angles in [0, pi] in radians
    angles = np.array([line[0][1] for line in lines])
    # multiply the angles by two and find coordinates of that angle
    pts = np.array([[np.cos(2*angle), np.sin(2*angle)]
                    for angle in angles], dtype=np.float32)

    # run kmeans on the coords
    labels, centers = cv2.kmeans(pts, k, None, criteria, attempts, flags)[1:]
    labels = labels.reshape(-1)  # transpose to row vec

    # segment lines based on their kmeans label
    segmented = defaultdict(list)
    for i, line in zip(range(len(lines)), lines):
        segmented[labels[i]].append(line)
    segmented = list(segmented.values())
    return segmented

In [17]:
segmented = segment_by_angle_kmeans(hough_lines)

In [18]:
segmented

[[array([[-232.       ,    3.0892327]], dtype=float32),
  array([[5.900000e+01, 5.235988e-02]], dtype=float32)],
 [array([[70.       ,  1.5707964]], dtype=float32),
  array([[344.       ,   1.5882496]], dtype=float32)]]

In [19]:
def intersection(line1, line2):
    """Finds the intersection of two lines given in Hesse normal form.

    Returns closest integer pixel locations.
    See https://stackoverflow.com/a/383527/5087436
    """
    rho1, theta1 = line1[0]
    rho2, theta2 = line2[0]
    A = np.array([
        [np.cos(theta1), np.sin(theta1)],
        [np.cos(theta2), np.sin(theta2)]
    ])
    b = np.array([[rho1], [rho2]])
    x0, y0 = np.linalg.solve(A, b)
    x0, y0 = int(np.round(x0)), int(np.round(y0))
    return [[x0, y0]]


def segmented_intersections(lines):
    """Finds the intersections between groups of lines."""

    intersections = []
    for i, group in enumerate(lines[:-1]):
        for next_group in lines[i+1:]:
            for line1 in group:
                for line2 in next_group:
                    intersections.append(intersection(line1, line2)) 

    return intersections

In [20]:
intersections = segmented_intersections(segmented)

In [21]:
len(intersections)

4

In [22]:
points = []
for point in intersections:
    coordinates = tuple(point[0])
    points.append(coordinates)
#     print(coordinates)
    cv2.circle(cdst, coordinates, 3, (0, 255, 0), -1)
visualize.visualize_image(cdst)

In [23]:
np.array(points)

array([[236,  70],
       [251, 348],
       [ 55,  70],
       [ 41, 345]])

In [24]:
corners = KMeans(n_clusters=4).fit(np.array(points)).cluster_centers_

In [25]:
for corner in corners:
    corner = tuple(np.array(corner, dtype=int))
    print(corner)
    cv2.circle(cdst, corner, 3, (255, 0, 0), -1)
visualize.visualize_image(cdst)

(55, 70)
(251, 348)
(41, 345)
(236, 70)


In [26]:
corners = np.array(corners * resizing_ratio, dtype=int)
corners

array([[ 552,  703],
       [2523, 3498],
       [ 412, 3468],
       [2372,  703]])

In [27]:
# suggestion = np.array([[0,0],[299,280],[0,280],[299,0]], np.float32) * 10
# warped = perspective_transform.transform_four_points_to_four_points(initial_image, corners, suggestion)
warped = perspective_transform.remove_perspective_distortion(initial_image, corners)
visualize.visualize_image(warped)
warped.shape