In [2]:
import xml.etree.ElementTree as ET
import numpy as np
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from skimage.util import img_as_float
from skimage import io, color
from scipy import misc
from skimage.segmentation import slic, mark_boundaries
import cv2
import matplotlib.pyplot as plt
import pickle

In [472]:
'''
Given XML file, gets the xy positions for text block
'''
def get_xy_coordinates(filename):    
    # Give location of XML document
    tree = ET.parse(filename)
#     tree = ET.parse('d-006_kai.chen@unifr.ch.xml')
    root = tree.getroot()

    # Get the page
    for child in root:
        if child.tag == 'Page':
            page = child

    # Identify all (x,y) corresponding to text blok
    coordinates = []
    for region in page.findall('TextRegion'):
        coords = region.find('Coords')
        label = region.get('type')
        if label=='text':
#             labels.append(label)
            xy = []
            for point in coords.findall('Point'):
                x = int(point.get('x'))
                y = int(point.get('y'))
                x = x*0.125
                y = y*0.125
                xy.append((x,y))
            coordinates.extend(xy)
    # return
    return coordinates

In [4]:
'''
Reads the image and scales it by a factor of 2^-3
'''
def read_image(img_file):
    # Read input image
    image = img_as_float(io.imread(img_file))
    image = color.rgb2gray(io.imread(img_file))
    image = misc.imresize(image, 0.125, interp='nearest', mode=None)
    # plt.imshow(image, cmap='gray')
    # plt.show()
    return image

In [5]:
'''
Creat image segments using SLIC.
Returns: mask and the pixel intensities for every region.
'''
def SLIC(num):
    masks = []
    Regions = []
    segments = slic(image, n_segments = num)
    
    for (i, segVal) in enumerate(np.unique(segments)):
        # construct a mask for the segment
        mask = np.zeros(image.shape[:2], dtype = "uint8")
        mask[segments == segVal] = 255
        seg = cv2.bitwise_and(image, image, mask = mask)
        r = cv2.bitwise_and(image, image, mask = mask)
        masks.append(mask)
        Regions.append(r)
    masks = np.array(masks)
    Regions = np.array(Regions)
    return masks, Regions, segments

In [6]:
def view_segments(image, segments):
    fig = plt.figure("Superpixels -- %d segments" % (1000))
    ax = fig.add_subplot(1, 1, 1)
    ax.imshow(mark_boundaries(image, segments))
    plt.axis("off")

    # show the plots
    plt.show()

In [416]:
'''
Get input images (kxk) for the CNN and their borders
'''
def get_images_borders(Regions, total_rows, k, total_cols):
    input_images = []
    all_coordinates = []
#     for i in range(len(Regions)):
    i = 0
    for r in range(0,total_rows-k+1,k):
        for c in range(0,total_cols-k+1,k):
            input_images.append(Regions[i][r:r+k,c:c+k])
            all_coordinates.append([(r,c),(r,c+k-1),(r+k-1,c+k-1),(r+k-1,c)])
            i += 1
                
#     all_coordinates = np.array(all_coordinates)
    input_images = np.array(input_images)
    
    input_images_f = []
    all_coordinates_f = []
    # Remove all zero images
    for j in range(len(input_images)):
        if not (input_images[j]==0).all():
            input_images_f.append(input_images[j])
            all_coordinates_f.append(all_coordinates[j])
    return input_images_f, all_coordinates_f

In [428]:
pickle_file = open('input_images.pkl', 'wb')
pickle.dump(input_images, pickle_file)
pickle_file.close()
# input_images = pickle.load(open('input_images.pkl','r'))
# all_coordinates = pickle.load(open('all_coordinates.pkl','r'))

In [478]:
def make_training_data(input_images, all_coordinates, dataset_xy):
    training_data = [] # col 0 : Image, col 1 : Label;  Label = 1 => Text
    for i in range(len(all_coordinates)):
        data = []
        coordinate = all_coordinates[i]
        img = input_images[i]
        data.append(img)
        label = 0
        for xy in dataset_xy:
#             print xy, i
            point = Point(xy)
            polygon = Polygon(coordinate)
            if polygon.contains(point):
                label = 1
                break
        data.append(label)
        training_data.append(data)
    training_data = np.array(training_data)
    return training_data

In [319]:
point = Point(0.5, 0.5)
polygon = Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])
# polygon = Polygon([all_coordinates[0]])
print(polygon.contains(point))

True


In [473]:
'''
Calling Functions
'''
image = read_image("d-006.jpg") # Read Image

masks, Regions, segments = SLIC(1000) # Perform SLIC into 1000 segments

# view_segments(image, segments) # To Plot segments

dataset_xy = get_xy_coordinates('d-006.xml') # Get (x,y) from the xml file

k = np.count_nonzero(masks[0][0]) # Get dimension of input image (kxk)

total_rows = Regions.shape[1]

total_columns = Regions.shape[2]

In [475]:
input_images, all_coordinates = get_images_borders(Regions, total_rows, k, total_cols)

In [479]:
training_data = make_training_data(input_images, all_coordinates, dataset_xy)

In [481]:
np.count_nonzero(training_data[:,1])

124

In [456]:
dataset_xy

[[(57.625, 27.75),
  (64.0, 27.0),
  (76.875, 26.375),
  (108.375, 26.5),
  (109.25, 31.125),
  (106.0, 32.25),
  (106.0, 37.25),
  (103.375, 45.625),
  (110.125, 46.0),
  (114.0, 51.625),
  (117.625, 52.0),
  (118.0, 56.625),
  (117.625, 57.5),
  (108.625, 58.25),
  (108.0, 61.25),
  (116.5, 61.25),
  (116.875, 71.0),
  (119.375, 71.375),
  (122.625, 77.25),
  (126.5, 82.25),
  (124.875, 84.125),
  (117.75, 84.125),
  (117.75, 88.125),
  (128.875, 88.625),
  (127.25, 92.875),
  (112.75, 94.875),
  (109.875, 100.375),
  (111.375, 108.5),
  (118.5, 109.5),
  (119.625, 111.5),
  (119.25, 119.25),
  (112.625, 121.375),
  (113.5, 124.375),
  (116.5, 129.375),
  (119.875, 131.375),
  (119.75, 137.25),
  (108.875, 138.0),
  (107.125, 151.0),
  (116.125, 151.875),
  (115.5, 158.25),
  (105.875, 158.25),
  (106.0, 162.375),
  (115.875, 164.25),
  (115.125, 171.875),
  (119.375, 173.0),
  (119.0, 178.625),
  (117.25, 180.625),
  (104.375, 180.5),
  (102.375, 181.125),
  (102.625, 184.5),
  (113

In [474]:
dataset_xy

[(57.625, 27.75),
 (64.0, 27.0),
 (76.875, 26.375),
 (108.375, 26.5),
 (109.25, 31.125),
 (106.0, 32.25),
 (106.0, 37.25),
 (103.375, 45.625),
 (110.125, 46.0),
 (114.0, 51.625),
 (117.625, 52.0),
 (118.0, 56.625),
 (117.625, 57.5),
 (108.625, 58.25),
 (108.0, 61.25),
 (116.5, 61.25),
 (116.875, 71.0),
 (119.375, 71.375),
 (122.625, 77.25),
 (126.5, 82.25),
 (124.875, 84.125),
 (117.75, 84.125),
 (117.75, 88.125),
 (128.875, 88.625),
 (127.25, 92.875),
 (112.75, 94.875),
 (109.875, 100.375),
 (111.375, 108.5),
 (118.5, 109.5),
 (119.625, 111.5),
 (119.25, 119.25),
 (112.625, 121.375),
 (113.5, 124.375),
 (116.5, 129.375),
 (119.875, 131.375),
 (119.75, 137.25),
 (108.875, 138.0),
 (107.125, 151.0),
 (116.125, 151.875),
 (115.5, 158.25),
 (105.875, 158.25),
 (106.0, 162.375),
 (115.875, 164.25),
 (115.125, 171.875),
 (119.375, 173.0),
 (119.0, 178.625),
 (117.25, 180.625),
 (104.375, 180.5),
 (102.375, 181.125),
 (102.625, 184.5),
 (113.375, 184.75),
 (113.125, 189.625),
 (116.0, 190.0)

In [460]:
# (50, 20), (50, 29), (59, 29), (59, 20)
point = Point((57.625, 27.75))
polygon = Polygon([(50, 20), (50, 29), (59, 29), (59, 20)])
# polygon = Polygon([all_coordinates[0]])
print(polygon.contains(point))

True


In [357]:
for r in Regions[900]:
    if r.any() != 0:
        print r

[ 31  36  27  25  51 195 138 186 205 211   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   

In [480]:
np.count_nonzero(training_data[:,1])

124

In [471]:
c = []
a = [1]
b = [2]
c.extend(a)
c.extend(b)
c

[1, 2]