In [1]:
from IPython.display import Image, SVG
import matplotlib.pyplot as plt

%matplotlib inline

import numpy as np
import keras
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Reshape
from keras.layers.merge import concatenate
from keras.utils import plot_model, Sequence
from keras.preprocessing import image

import os.path 
from ast import literal_eval # for loading the list

Using TensorFlow backend.


# Load image and its bounding boxes

In [15]:
def load_boxes(number, folder="data"):
    try:
        f = open(folder + "/{0:010d}.txt".format(number))
        boxes = literal_eval(f.read())
        return boxes
    except Exception as e: 
        print(e)
        
def load_page(number, folder="data"):
    img = image.load_img(folder + "/{0:010d}.png".format(number), color_mode="grayscale")
    img = image.img_to_array(img) / 255
    img = img.reshape((-1, 640, 480, 1))
    boxes = load_boxes(number)
    return (img, boxes)

# Sequence class

In [20]:
class PageSequence(Sequence):
    
    def __init__(self, folder="data"):
        id = 0
        self.current = 0
        
        self.list = [] # avoid using numpy arrays and such in the sequence because i /want/ pass-by-reference
                       # otherwise i'd need like 40gb of ram to work with the whole sequence this way - or would
                       # need some complex machinations to figure out a way to dynamically load things from disk
                       # which would be good once i have many more samples, but not right now.
        
        while os.path.isfile(folder + "/{0:010d}.txt".format(id)):
            img, boxes = load_page(id, folder)
            
            # 1. the first thing in a sequence would be (0, 0, 0, 0)
            self.list.append(([img, np.array((0,0,0,0)).reshape((-1,4,))], np.array(boxes[0]).reshape((-1,4,))))
            
            # 2. append all the bounding boxes except for last
            for i in range(len(boxes) - 2): # stop before last element
                self.list.append(([img, np.array(boxes[i]).reshape((-1,4,))], np.array(boxes[i+1]).reshape((-1,4,))))
            
            # 3. the last thing in a sequence would be (-100, -100, -100, -100)
            # (this essentially functions as a terminator, meaning that no more elements are coming)
            self.list.append(([img, np.array(boxes[-1]).reshape((-1,4,))], np.array((-100, -100, -100, -100)).reshape((-1,4,))))
            self.list.append(([img, np.array((-100, -100, -100, -100)).reshape((-1,4,))], np.array((-100, -100, -100, -100)).reshape((-1,4,))))
                     
            
            id += 1
    
    def __len__(self):
        return len(self.list)
    
    def __getitem__(self, index):
        item = self.list[index] 
        return item[0], item[1]

data = PageSequence()
validation = PageSequence("data-validate")

print(len(data.list), len(validation.list))

46524 22438


In [32]:
# need two inputs, so using functional api

input_boxes = Input(shape=(4,)) # input bounding box of last word

input_image = Input(shape=(640,480,1)) # input image
conv1 = Conv2D(32, (3, 3), activation='relu')(input_image)
pool1 = MaxPooling2D((2,2))(conv1)
conv2 = Conv2D(16, (3, 3), activation='relu')(pool1)
pool2 = MaxPooling2D((2,2))(conv2)
conv3 = Conv2D(8, (3, 3), activation='relu')(pool2)
pool3 = MaxPooling2D((3,3))(conv3)
flatten1 = Flatten()(pool3)
#dense1 = Dense(128, activation='relu')(flatten1)

merge = concatenate([input_boxes, flatten1])
dense2 = Dense(512, activation='relu')(merge)
output = Dense(4, activation='linear')(dense2)

model = Model(inputs=[input_image, input_boxes], outputs=output)
model.compile(loss='mean_squared_error',
              optimizer='rmsprop',
              metrics=['mean_absolute_error'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_30 (InputLayer)           (None, 640, 480, 1)  0                                            
__________________________________________________________________________________________________
conv2d_48 (Conv2D)              (None, 638, 478, 32) 320         input_30[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_43 (MaxPooling2D) (None, 319, 239, 32) 0           conv2d_48[0][0]                  
__________________________________________________________________________________________________
conv2d_49 (Conv2D)              (None, 317, 237, 16) 4624        max_pooling2d_43[0][0]           
__________________________________________________________________________________________________
max_poolin

In [33]:
hist = model.fit_generator(generator=data, use_multiprocessing=True, epochs=5, validation_data=validation)

Epoch 1/5

Process ForkPoolWorker-12:
Process ForkPoolWorker-11:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/ezhik/anaconda3/envs/tf/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/ezhik/anaconda3/envs/tf/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/ezhik/anaconda3/envs/tf/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/ezhik/anaconda3/envs/tf/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/ezhik/anaconda3/envs/tf/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/Users/ezhik/anaconda3/envs/tf/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/Users/ezhik/anaconda3/envs/tf/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._

Epoch 1/5


KeyboardInterrupt: 

In [14]:
img, boxes = load_page(100)

FileNotFoundError: [Errno 2] No such file or directory: 'data-smaller/0000000100.png'

In [None]:
boxes[0]

In [None]:
model.predict([img, np.array((0,0,0,0)).reshape((1,4))])