In [1]:
import os
import random
import time

import keras

import PIL as pil
import numpy as np
import matplotlib.pyplot as plt

In [2]:
DATA_PATH = '/Users/gil-arnaudcoche/Documents/ijoutaku/data/UCSDped1/'
TRAIN_PATH = f'{DATA_PATH}Train/'
TEST_PATH = f'{DATA_PATH}Test/'

In [3]:
class DataGenerator(keras.utils.Sequence):
    
    _IMAGE_WIDTH = 128
    _IMAGE_HEIGHT = 88
    _TIF_EXTENSION = '.tif'
    _LOOKBACK = 5
    _BATCH_SIZE = 3
    _SEQUENCE_SIZE = 200

    def __init__(self, data_path, shuffle_at_start=False):
        self._data_path = data_path
        self._shuffle_at_start = shuffle_at_start
        self._sequences = list()
        self._batches = list()
        self._len = 0
        self.__load__()

    def __make_batches__(self, ):
        self._batches = [ self._sequences[i:i+self._BATCH_SIZE] for i in range(0, len(self._sequences), self._BATCH_SIZE) ]
        self._len = len(self._batches)

    def __load__(self, ):
        tmp_sequences = sorted([ f'{self._data_path}{data_folder}/' for data_folder in os.listdir(self._data_path) ])
        for sequence in tmp_sequences:
            if not os.path.isdir(sequence): continue
            image_files = sorted([ f'{sequence}{image_file}' for image_file in os.listdir(sequence) if image_file.find('.tif') > -1 ])
            if not image_files: continue
            for n in range(len(image_files) - self._LOOKBACK):
                predictors = image_files[n:n+self._LOOKBACK]
                predicted = image_files[n+self._LOOKBACK]
                self._sequences.append((predictors, predicted))
        if self._shuffle_at_start: random.shuffle(self._sequences)
        self.__make_batches__()

    def __len__(self, ):
        return self._len

    def __getitem__(self, index):
        X = np.zeros((self._BATCH_SIZE, self._LOOKBACK, self._IMAGE_HEIGHT, self._IMAGE_WIDTH, 1), dtype=np.float16)
        y = np.zeros((self._BATCH_SIZE, self._IMAGE_HEIGHT, self._IMAGE_WIDTH, 1), dtype=np.float16)
        batch = self._batches[index]
        for b, (predictors, predicted) in enumerate(batch):
            for l, predictor in enumerate(predictors):
                X[b, l, :, :, 0] = np.array(pil.Image.open(predictor).resize((self._IMAGE_WIDTH, self._IMAGE_HEIGHT)), dtype=np.float16)/256
            y[b, :, :, 0] = np.array(pil.Image.open(predicted).resize((self._IMAGE_WIDTH, self._IMAGE_HEIGHT)), dtype=np.float16)/256
        return X, y

    def on_epoch_end(self, ):
        random.shuffle(self._sequences)
        self.__make_batches__()

In [4]:
train_set = DataGenerator(TRAIN_PATH, shuffle_at_start=True)

In [5]:
EPOCHS = 3

CONV_FILTERS = 32
CONV_KERNEL = 4, 4
LSTM_FILTERS = 16
LSTM_KERNEL = 3, 3

PADDING = 'same'
SIGMOID = 'sigmoid'
RELU = 'relu'
STRIDES = 2
LOSS = 'mse'

LEARNING_RATE = 1e-4
EPSILON = 1e-6

model = keras.models.Sequential()

# Input layer
model.add(keras.layers.InputLayer(input_shape=(DataGenerator._LOOKBACK, DataGenerator._IMAGE_HEIGHT, DataGenerator._IMAGE_WIDTH, 1)))
# TimeDistributed Conv2D layer
model.add(keras.layers.TimeDistributed(keras.layers.Conv2D(CONV_FILTERS, CONV_KERNEL, activation=RELU, strides=STRIDES, padding=PADDING)))
model.add(keras.layers.LayerNormalization())
# ConvLSTM2D layer
model.add(keras.layers.ConvLSTM2D(LSTM_FILTERS, LSTM_KERNEL, padding=PADDING, return_sequences=False))  # return_sequences=False because we want only the last frame representation
model.add(keras.layers.LayerNormalization())

# Conv2DTranspose layers and output layer
model.add(keras.layers.Conv2DTranspose(CONV_FILTERS, CONV_KERNEL, activation=RELU, strides=STRIDES, padding=PADDING))
model.add(keras.layers.LayerNormalization())
model.add(keras.layers.Conv2D(1, CONV_KERNEL, activation=SIGMOID, padding=PADDING))
# Model compilation and training
print(model.summary())
model.compile(loss=LOSS, optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE, epsilon=EPSILON))
start = time.time()
model.fit(train_set, batch_size=DataGenerator._BATCH_SIZE, epochs=EPOCHS, shuffle=False)  # Assuming train_set is correctly formatted
end = time.time()
duration_seconds = int(end - start)
duration_minutes, leftover_seconds = duration_seconds // 60, duration_seconds % 60
print(f'Training took {duration_minutes} min {leftover_seconds} s')


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDist  (None, 5, 44, 64, 32)     544       
 ributed)                                                        
                                                                 
 layer_normalization (Layer  (None, 5, 44, 64, 32)     64        
 Normalization)                                                  
                                                                 
 conv_lstm2d (ConvLSTM2D)    (None, 44, 64, 16)        27712     
                                                                 
 layer_normalization_1 (Lay  (None, 44, 64, 16)        32        
 erNormalization)                                                
                                                                 
 conv2d_transpose (Conv2DTr  (None, 88, 128, 32)       8224      
 anspose)                                               