In [299]:
import xml.etree.ElementTree as ET
import numpy as np
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from skimage.util import img_as_float
from skimage import io, color
from scipy import misc
from skimage.segmentation import slic, mark_boundaries
import cv2
import matplotlib.pyplot as plt
import pickle

In [300]:
'''
Given XML file, gets the xy positions for text block
'''
def get_xy_coordinates(filename):    
    # Give location of XML document
    tree = ET.parse(filename)
#     tree = ET.parse('d-006_kai.chen@unifr.ch.xml')
    root = tree.getroot()

    # Get the page
    for child in root:
        if child.tag == 'Page':
            page = child

    # Identify all (x,y) corresponding to text blok
    coordinates = []
    for region in page.findall('TextRegion'):
        coords = region.find('Coords')
        label = region.get('type')
        if label=='text':
            labels.append(label)
            xy = []
            for point in coords.findall('Point'):
                x = int(point.get('x'))
                y = int(point.get('y'))
                x = x*0.125
                y = y*0.125
                xy.append((x,y))
            coordinates.append(xy)
    # return
    return coordinates

In [301]:
'''
Reads the image and scales it by a factor of 2^-3
'''
def read_image(img_file):
    # Read input image
    image = img_as_float(io.imread(img_file))
    image = color.rgb2gray(io.imread(img_file))
    image = misc.imresize(image, 0.125, interp='nearest', mode=None)
    # plt.imshow(image, cmap='gray')
    # plt.show()
    return image

In [302]:
'''
Creat image segments using SLIC.
Returns: mask and the pixel intensities for every region.
'''
def SLIC(num):
    masks = []
    Regions = []
    segments = slic(image, n_segments = num)
    
    for (i, segVal) in enumerate(np.unique(segments)):
        # construct a mask for the segment
        mask = np.zeros(image.shape[:2], dtype = "uint8")
        mask[segments == segVal] = 255
        seg = cv2.bitwise_and(image, image, mask = mask)
        r = cv2.bitwise_and(image, image, mask = mask)
        masks.append(mask)
        Regions.append(r)
    masks = np.array(masks)
    Regions = np.array(Regions)
    return masks, Regions, segments

In [303]:
def view_segments(image, segments):
    fig = plt.figure("Superpixels -- %d segments" % (1000))
    ax = fig.add_subplot(1, 1, 1)
    ax.imshow(mark_boundaries(image, segments))
    plt.axis("off")

    # show the plots
    plt.show()

In [309]:
'''
Get input images for the CNN and their borders
'''
def get_images_borders(Regions):
    input_images = []
    all_coordinates = []
    for region in Regions:
        pix_img = []
        coordinates = []
        num_rows = 0 # Keep counter for row
        for i in range(len(region)):
            row = region[i]
            pix_row = []
            count = 0
            for j in range(len(row)):
                col = row[j]
                if region[i,j] != 0 and edge_c == 0: # Check if first occurence of pixel intensity
                    edge_c = 1
                    row_counter += 1
                    if row_counter==1 or row_counter==10:
                        coordinates.append((i,j)) # Store the co-ordinates
                    count = count + 1 # Increment count
                if count>0 and count<11:
                    # Add all pixel intensities in this row to pix_row
                    pix_row.append(region[i,j])
                    count = count + 1 # Increment count
                if count == 11:
                    if row_counter==1 or row_counter==10:
                        coordinates.append((i,j)) # Store the co-ordinates
                    edge_c = 0
                    break
            if pix_row:
                pix_img.append(pix_row)
        if pix_img:
            input_images.append(pix_img)  
        if coordinates:
            all_coordinates.append(coordinates)
    return input_images, all_coordinates

In [289]:
# pickle_file = open('input_images.pkl', 'wb')
# pickle.dump(input_images, pickle_file)
# pickle_file.close()
input_images = pickle.load(open('input_images.pkl','r'))
all_coordinates = pickle.load(open('all_coordinates.pkl','r'))

In [295]:
dataset_xy = get_xy_coordinates('d-006.xml')

In [310]:
input_images, all_coordinates = get_images_borders(Regions)

In [318]:
input_images_old, all_coordinates_old = get_images_borders_old(Regions)

In [None]:
# iterate over each image (950) 
# iterate over each x,y xml point. 
# Check if (x,y) lies inside coordinates. 
# init class label = 0 (default)
# If yes, class label = 1; break

for i in range(len(input_images)):
    for xy in dataset_xy:
        coordinate = all_coordinates[i]

In [317]:
all_coordinates


[[(0, 0), (0, 9), (9, 0), (9, 9)],
 [(0, 10), (0, 19), (9, 10), (9, 19)],
 [(0, 20), (0, 29), (9, 20), (9, 29)],
 [(0, 30), (0, 39), (9, 30), (9, 39)],
 [(0, 40), (0, 49), (9, 40), (9, 49)],
 [(0, 50), (0, 59), (9, 50), (9, 59)],
 [(0, 60), (0, 69), (9, 60), (9, 69)],
 [(0, 70), (0, 79), (9, 70), (9, 79)],
 [(0, 80), (0, 89), (9, 80), (9, 89)],
 [(0, 90), (0, 99), (9, 90), (9, 99)],
 [(0, 100), (0, 109), (9, 100), (9, 109)],
 [(0, 110), (0, 119), (9, 110), (9, 119)],
 [(0, 120), (0, 129), (9, 120), (9, 129)],
 [(0, 130), (0, 139), (9, 130), (9, 139)],
 [(0, 140), (0, 149), (9, 140), (9, 149)],
 [(0, 150), (0, 159), (9, 150), (9, 159)],
 [(0, 160), (0, 169), (9, 160), (9, 169)],
 [(0, 170), (0, 179), (9, 170), (9, 179)],
 [(0, 180), (0, 189), (9, 180), (9, 189)],
 [(0, 190), (0, 199), (9, 190), (9, 199)],
 [(0, 200), (0, 209), (9, 200), (9, 209)],
 [(0, 210), (0, 219), (9, 210), (9, 219)],
 [(0, 220), (0, 229), (9, 220), (9, 229)],
 [(0, 230), (0, 239), (9, 230), (9, 239)],
 [(0, 240), 

In [325]:
all_coordinates_old

[[(0, 0),
  (0, 9),
  (1, 0),
  (1, 9),
  (2, 0),
  (2, 9),
  (3, 0),
  (3, 9),
  (4, 0),
  (4, 9),
  (5, 0),
  (5, 9),
  (6, 0),
  (6, 9),
  (7, 0),
  (7, 9),
  (8, 0),
  (8, 9),
  (9, 0),
  (9, 9)],
 [(0, 10),
  (0, 19),
  (1, 10),
  (1, 19),
  (2, 10),
  (2, 19),
  (3, 10),
  (3, 19),
  (4, 10),
  (4, 19),
  (5, 10),
  (5, 19),
  (6, 10),
  (6, 19),
  (7, 10),
  (7, 19),
  (8, 10),
  (8, 19),
  (9, 10),
  (9, 19)],
 [(0, 20),
  (0, 29),
  (1, 20),
  (1, 29),
  (2, 20),
  (2, 29),
  (3, 20),
  (3, 29),
  (4, 20),
  (4, 29),
  (5, 20),
  (5, 29),
  (6, 20),
  (6, 29),
  (7, 20),
  (7, 29),
  (8, 20),
  (8, 29),
  (9, 20),
  (9, 29)],
 [(0, 30),
  (0, 39),
  (1, 30),
  (1, 39),
  (2, 30),
  (2, 39),
  (3, 30),
  (3, 39),
  (4, 30),
  (4, 39),
  (5, 30),
  (5, 39),
  (6, 30),
  (6, 39),
  (7, 30),
  (7, 39),
  (8, 30),
  (8, 39),
  (9, 30),
  (9, 39)],
 [(0, 40),
  (0, 49),
  (1, 40),
  (1, 49),
  (2, 40),
  (2, 49),
  (3, 40),
  (3, 49),
  (4, 40),
  (4, 49),
  (5, 40),
  (5, 49),
  (6

In [322]:
Regions[350][159]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)

In [319]:
point = Point(0.5, 0.5)
polygon = Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])
# polygon = Polygon([all_coordinates[0]])
print(polygon.contains(point))

True


In [235]:
'''
Calling Functions
'''
image = read_image("d-006.jpg")

masks, Regions, segments = SLIC(1000)

view_segments(image, segments)

In [161]:
segments = np.array(segments)
segments.shape
len(image[1])

250

In [153]:
coordinates = np.array(coordinates)
for c in coordinates:
    c = np.array(c)
    print c.shape

(162, 2)
(164, 2)


In [14]:
from matplotlib.path import Path

tupVerts=[(86, 52), (85, 52), (81, 53), (80, 52), (79, 48), (81, 49), (86, 53),
 (85, 51), (82, 54), (84, 54), (83, 49), (81, 52), (80, 50), (81, 48),
 (85, 50), (86, 54), (85, 54), (80, 48), (79, 50), (85, 49), (80, 51),
 (85, 53), (82, 49), (83, 54), (82, 53), (84, 49), (79, 49)]


x, y = np.meshgrid(np.arange(300), np.arange(300)) # make a canvas with coordinates
x, y = x.flatten(), y.flatten()
points = np.vstack((x,y)).T 

p = Path(tupVerts) # make a polygon
grid = p.contains_points(points)
mask = grid.reshape(300,300) # now you have a mask with points inside a polygon

In [154]:
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

point = Point(0.5, 0.5)
polygon = Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])
print(polygon.contains(point))

True


In [316]:
'''
Get input images for the CNN and their borders
'''
def get_images_borders_old(Regions):
    edge_c = 0  # edge has 3 states: 0 = Before region, 1 = Inside region, 2 = After Region (for the columns in each row)
    edge_r = 0
    input_images = []
    all_coordinates = []
    for region in Regions:
        pix_img = []
        coordinates = []
        row_counter = 0 # Keep counter for row
        for i in range(len(region)):
            row = region[i]
            pix_row = []
            count = 0
            for j in range(len(row)):
                col = row[j]
                if region[i,j] != 0 and edge_c == 0: # Check if first occurence of pixel intensity
                    edge_c = 1
                    coordinates.append((i,j)) # Store the co-ordinates
                    count = count + 1 # Increment count
                elif edge_c == 2:
                    edge_c = 0
                    break
                if count>0 and count<11:
                    # Add all pixel intensities in this row to pix_row
                    pix_row.append(region[i,j])
                    count = count + 1 # Increment count
                if count >= 11:
                    coordinates.append((i,j)) # Store the co-ordinates
                    edge_c = 2
            if pix_row:
                pix_img.append(pix_row)
        if pix_img:
            input_images.append(pix_img)  
        if coordinates:
            all_coordinates.append(coordinates)
    return input_images, all_coordinates

In [170]:
for i in range(len(image)):
    for j in range(len(image[0])):
        

240

In [223]:
'''
Get input images for the CNN and their borders
'''
def get_images_borders(Regions):
    edge_c = 0  # edge has 3 states: 0 = Before region, 1 = Inside region, 2 = After Region (for the columns in each row)
    r = 0
    input_images = []
    all_coordinates = []
    for region in Regions:
        r += 1
#         if r>3:
#             break
        pix_img = []
        coordinates = []
        row_counter = 0 # Keep counter for row
        for i in range(len(region)):
            if i == 350:
                row = region[i]
                pix_row = []
                count = 0
                for j in range(len(row)):
                    col = row[j]
                    if region[i,j] != 0 and edge_c == 0: # Check if first occurence of pixel intensity
                        edge_c = 1
                        row_counter += 1
                        if row_counter==1 or row_counter==10:
                            coordinates.append((i,j)) # Store the co-ordinates
                        count = count + 1 # Increment count
                    if count>0 and count<11:
                        # Add all pixel intensities in this row to pix_row
                        pix_row.append(region[i,j])
                        count = count + 1 # Increment count
                    if count == 11:
                        if row_counter==1 or row_counter==10:
                            coordinates.append((i,j)) # Store the co-ordinates
                        edge_c = 0
                        break
            if pix_row:
                pix_img.append(pix_row)
        if pix_img:
            input_images.append(pix_img)  
        if coordinates:
            all_coordinates.append(coordinates)
    return input_images, all_coordinates

q


In [204]:
a


[(1, 2), (3, 2)]