Sequence are a safer way to do multiprocessing. This structure guarantees that the network will only train once on each sample per epoch which is not the case with generators.

In [99]:
input_img_size = (512,512)
network_input_img_size = (256,400)
network_output_imag_size = (384,432)

In [130]:
from keras.models import Model
from keras.layers.convolutional import Conv3D, Conv2D
from keras.layers.convolutional_recurrent import ConvLSTM2D
from keras.layers.normalization import BatchNormalization
from keras.layers import Input, TimeDistributed

input_shape = (None, 256, 400, 1)
input = Input(input_shape, name='input')

x = ConvLSTM2D(filters=14, kernel_size=(3, 3),
                   input_shape=input_shape,
                   padding='same', return_sequences=True)(input)
x = BatchNormalization()(x)
x = ConvLSTM2D(filters=14, kernel_size=(3, 3),
                   padding='same', return_sequences=True)(x)
x = BatchNormalization()(x)
x = TimeDistributed(Conv2D(filters=2, kernel_size=(3,3), padding='same',activation='relu'))(x)
output = TimeDistributed(Conv2D(filters=1, kernel_size=(1,1), padding='same', activation='sigmoid'), name='output')(x)
# output = Conv3D(filters=1, kernel_size=(3, 3, 3),
#                    activation='sigmoid',
#                    padding='same', data_format='channels_last')(x)


model = Model(inputs = [input], output=[output])
model.compile(loss='binary_crossentropy', optimizer='adadelta')
model.summary()



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, None, 256, 400, 1) 0         
_________________________________________________________________
conv_lst_m2d_24 (ConvLSTM2D) (None, None, 256, 400, 14 7616      
_________________________________________________________________
batch_normalization_24 (Batc (None, None, 256, 400, 14 56        
_________________________________________________________________
conv_lst_m2d_25 (ConvLSTM2D) (None, None, 256, 400, 14 14168     
_________________________________________________________________
batch_normalization_25 (Batc (None, None, 256, 400, 14 56        
_________________________________________________________________
time_distributed_10 (TimeDis (None, None, 256, 400, 2) 254       
_________________________________________________________________
output (TimeDistributed)     (None, None, 256, 400, 1) 3         
Total para

In [165]:
from distutils.version import LooseVersion
from collections import OrderedDict
import cv2

class DataLoader():
    def __init__(self, seq_length = None,shuffle=True, augment=None):
        self.seq_length = seq_length
        self.step = 1
        self.Xs = []
        self.ys = []
        self.augment = augment
        self.frames = []
        self.labels = []
        #print("Number of elements: \n")
        #print(self.nb_elements)
    
    def _add_frame(self, i, frame):
        frame = self._resizer(frame,(256,400))
        self.frames.append(frame)
            
    def _add_labels(self, i, cls):
        _cls = scipy.misc.imresize(cls,(256,400))
        #print("size after" + str(_cls.shape) + "\n")
        #self.labels_tag[256] = _cls.shape[1] #this is supposed to assign a new shape but no change in shape occured
        #self.labels_tag[257] = _cls.shape[0]
        self.labels.append(_cls)

    def _resizer(self, data, dimso):
        data = cv2.resize(data, dimso)
        return data


    def load_data(self,file_name):
         # Store sample
        input_name, ext = os.path.splitext(file_name)
        if ext not in ('.avi', '.mp4', '.tif'):
            raise IOError('Format %s not supported' % (ext))
        # Read video
        if ext in ('.avi', '.mp4'):
            print("Processing an avi file...")
            video = cv2.VideoCapture(file_name)
            if LooseVersion(cv2.__version__) < LooseVersion('3'):
                num_frames = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT))
            else:
                num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
            print("Number of frames in that sample " + str(num_frames))
            
            for i in range(num_frames):
                is_valid, img = video.read()
                if not is_valid:
                    print('Cannot read frame: %d of %s' % (i, file_name))
                    num_frames = i + 1
                    break
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                self._add_frame(i, img)
        else:  # TIFF
            seq = Image.open(file_name)
            num_frames = seq.n_frames
            print(n_frames)
            for i in range(start, seq.n_frames, step):
                seq.seek(i)
                a = np.array(seq.convert('L'))
                self._add_frame(i, a)
            #X[i,] = np.load( ID )

            # Store class
            self.label_file_fullpath = os.path.join(input_dir, label_file)
            self.label_file = label_file
            self.labels_tag = deepcopy(labels.tag)
            self.nb_classes = 0
            for i in range(start, num_frames, step):
                labels.seek(i)
                cls = np.array(labels)
                self.nb_classes = max(self.nb_classes, np.max(cls) + 1)
                # Read 'verified' tag in first frame
                if i == 0:
                    try:
                        self.verified = read_verified_tag(labels.tag)
                    except:
                        pass
                    # if len(self.verified) == 0:
                    #     raise IOError('Could not read verified tag.')
                self._add_labels(i, cls)
            #y[i] = self.labels[ID]
        X = self.frames
        y = self.labels
            
        return X, y

In [172]:
import numpy as np
import keras
import cv2

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=1, dim=(500,512,512,1), n_channels=1,
                 n_classes=10, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
        print("Generator Initiated")

    def __len__(self):
        'number of iterations per epoch. value (rounded up) obtained by dividing the number of samples by the batch size'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # The batch size determines how many IDs get fed into here
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        print("Number of IDs " + str(len(list_IDs_temp)))
        
        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            print("Id being processed " + ID)
            data_loader = DataLoader()
            X_out, y_out = data_loader.load_data(ID)
            #print(X.shape)
            #print(y.shape)
      
        #X, y = self.__data_generation(list_IDs_temp)
        #X, y = data_loader.load_data(list_IDs_temp)
        X = np.array(X_out)
        X = np.expand_dims(X, axis = -1)
        y = np.array(y_out)
        y = np.expand_dims(y, axis = -1)
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

#     def __data_generation(self, list_IDs_temp):
#         'Generates data containing batch_size samples can also do preprocessing here' # X : (n_samples, *dim, n_channels)
#         # Initialization
#         X = np.empty((self.batch_size, *self.dim, self.n_channels))
#         y = np.empty((self.batch_size, *self.dim, self.n_channels))

#         # Generate data
#         for i, ID in enumerate(list_IDs_temp):
#             X,y_ = load_data (list_IDs_temp)

#         return X, y
    

In [173]:
import numpy as np
import os

from keras.models import Sequential

# # Parameters
params = {'dim': (500,512,512,1),
          'batch_size': 1,
          'n_channels': 1,
          'n_classes': 0,
          'shuffle': True}


input_dir = os.path.expanduser('~/Documents/ConvLSTM/Databinary/')


# Datasets
partition = {}
labels = {}

partition['train'] = [input_dir + 'substack500_7fps_00.avi',input_dir + 'substack500_7fps_01.avi']
labels['train'] = [input_dir + 'substack500_7fps_00_label.tif',input_dir + 'substack500_7fps_01_label.tif']
partition['validation'] = [input_dir + 'substack500_7fps_02.avi']
labels['validation'] = [input_dir + 'substack500_7fps_02_label.tif']

# Generators
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

"""
steps_per_epoch: Integer. Total number of steps (batches of samples) to 
yield from generator before declaring one epoch finished and starting the 
next epoch. It should typically be equal to the number of samples of your 
dataset divided by the batch size. Optional for  Sequence: if unspecified, 
will use the len(generator) as a number of steps."""


Generator Initiated
Generator Initiated


'\nsteps_per_epoch: Integer. Total number of steps (batches of samples) to \nyield from generator before declaring one epoch finished and starting the \nnext epoch. It should typically be equal to the number of samples of your \ndataset divided by the batch size. Optional for  Sequence: if unspecified, \nwill use the len(generator) as a number of steps.'

In [174]:
# Train model on dataset
model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    steps_per_epoch = 1,
                    validation_steps = 1,
                    use_multiprocessing=True,
                    workers=1)
# When there are many workers, the do everything in parallel doing many samples at the same time

Number of IDs 1
Id being processed /home/pelonomi/Documents/ConvLSTM/Databinary/substack500_7fps_01.avi
Processing an avi file...
Number of frames in that sample 500
Epoch 1/1
Number of IDs 1
Id being processed /home/pelonomi/Documents/ConvLSTM/Databinary/substack500_7fps_00.avi
Processing an avi file...
Number of frames in that sample 500
Number of IDs 1
Id being processed /home/pelonomi/Documents/ConvLSTM/Databinary/substack500_7fps_01.avi
Processing an avi file...
Number of frames in that sample 500
Number of IDs 1
Id being processed /home/pelonomi/Documents/ConvLSTM/Databinary/substack500_7fps_00.avi
Processing an avi file...
Number of frames in that sample 500
Number of IDs 1
Id being processed /home/pelonomi/Documents/ConvLSTM/Databinary/substack500_7fps_01.avi
Processing an avi file...
Number of frames in that sample 500
Number of IDs 1
Id being processed /home/pelonomi/Documents/ConvLSTM/Databinary/substack500_7fps_00.avi
Processing an avi file...
Number of frames in that sampl

ValueError: Error when checking input: expected input to have 5 dimensions, but got array with shape (500, 400, 256)