In [None]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [None]:
import numpy as np
import os
from time import time
import cv2

### Build Data Loader

In [None]:
from keras.utils import Sequence
from keras.utils import np_utils

class DataGenerator(Sequence):
    """Data Generator inherited from keras.utils.Sequence
    Args: 
        directory: the path of data set, and each sub-folder will be assigned to one class
        batch_size: the number of data points in each batch
        shuffle: whether to shuffle the data per epoch
    Note:
        If you want to load file with other data format, please fix the method of "load_data" as you want
    """
    def __init__(self, directory, batch_size=1, shuffle=True, data_augmentation=True):
        # Initialize the params
        self.batch_size = batch_size
        self.directory = directory
        self.shuffle = shuffle
        self.data_aug = data_augmentation
        # Load all the save_path of files, and create a dictionary that save the pair of "data:label"
        self.X_path, self.Y_dict = self.search_data() 
        # Print basic statistics information
        self.print_stats()
        return None
        
    def search_data(self):
        X_path = []
        Y_dict = {}
        # list all kinds of sub-folders
        self.dirs = sorted(os.listdir(self.directory))
        one_hots = np_utils.to_categorical(range(len(self.dirs)))
        for i,folder in enumerate(self.dirs):
            folder_path = os.path.join(self.directory,folder)
            for file in os.listdir(folder_path):
                file_path = os.path.join(folder_path,file)
                # append the each file path, and keep its label  
                X_path.append(file_path)
                Y_dict[file_path] = one_hots[i]
        return X_path, Y_dict
    
    def print_stats(self):
        # calculate basic information
        self.n_files = len(self.X_path)
        self.n_classes = len(self.dirs)
        self.indexes = np.arange(len(self.X_path))
        np.random.shuffle(self.indexes)
        # Output states
        print("Found {} files belonging to {} classes.".format(self.n_files,self.n_classes))
        for i,label in enumerate(self.dirs):
            print('%10s : '%(label),i)
        return None
    
    def __len__(self):
        # calculate the iterations of each epoch
        steps_per_epoch = np.ceil(len(self.X_path) / float(self.batch_size))
        return int(steps_per_epoch)

    def __getitem__(self, index):
        """Get the data of each batch
        """
        # get the indexs of each batch
        batch_indexs = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # using batch_indexs to get path of current batch
        batch_path = [self.X_path[k] for k in batch_indexs]
        # get batch data
        batch_x, batch_y = self.data_generation(batch_path)
        return batch_x, batch_y

    def on_epoch_end(self):
        # shuffle the data at each end of epoch
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def data_generation(self, batch_path):
        # load data into memory, you can change the np.load to any method you want
        batch_x = [self.load_data(x) for x in batch_path]
        batch_y = [self.Y_dict[x] for x in batch_path]
        # transfer the data format and take one-hot coding for labels
        batch_x = np.array(batch_x)
        batch_y = np.array(batch_y)
        return batch_x, batch_y
      
    def normalize(self, data):
        mean = np.mean(data)
        std = np.std(data)
        return (data-mean) / std
    
    def uniform_sampling(self, video, target_frames=64):
        # get total frames of input video and calculate sampling interval 
        len_frames = int(len(video))
        interval = int(np.ceil(len_frames/target_frames))
        # init empty list for sampled video and 
        sampled_video = []
        for i in range(0,len_frames,interval):
            sampled_video.append(video[i])     
        # calculate numer of padded frames and fix it 
        num_pad = target_frames - len(sampled_video)
        padding = []
        if num_pad>0:
            for i in range(-num_pad,0):
                try: 
                    padding.append(video[i])
                except:
                    padding.append(video[0])
            sampled_video += padding     
        # get sampled video
        return np.array(sampled_video, dtype=np.float32)
    
    def load_data(self, path):
        # in this project, just load the first 3 channels which containing RGB images
        data = np.load(path, mmap_mode='r')[...,3:]
        data = np.float32(data)
        # sampling 64 frames uniformly from the entire video 
        data = self.uniform_sampling(video=data, target_frames=64)
       # normalize the data into means=0, variance=1
        data = self.normalize(data)
        return data
    

### Build Model

In [None]:
from base_model import *

In [None]:
model = Inception_Inflated3d(include_top=True,
                #weights='rgb_imagenet_and_kinetics',
                input_tensor=None,
                input_shape=(64,224,224,2),
                dropout_prob=0.5,
                endpoint_logit=False,
                classes=2)

model.summary()

### Set the GPUs and make it parallel

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"

from keras.utils import multi_gpu_model   
parallel_model = multi_gpu_model(model, gpus=2)

### Model Compiling

In [None]:
from keras.optimizers import Adam, SGD

sgd  = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
parallel_model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

### Set Callbacks

- Learning Rate scheduler

In [None]:
import keras.backend as K
from keras.callbacks import LearningRateScheduler

def scheduler(epoch):
    if epoch % 10 == 0 and epoch != 0:
        lr = K.get_value(parallel_model.optimizer.lr)
        K.set_value(parallel_model.optimizer.lr, lr * 0.8)
    return K.get_value(parallel_model.optimizer.lr)

reduce_lr = LearningRateScheduler(scheduler)

- Saving the best model and training logs

In [None]:
from keras.callbacks import ModelCheckpoint, CSVLogger

class ParallelModelCheckpoint(ModelCheckpoint):
    def __init__(self,model,filepath, monitor='val_loss', verbose=0,
                 save_best_only=False, save_weights_only=False,
                 mode='auto', period=1):
        self.single_model = model
        super(ParallelModelCheckpoint,self).__init__(filepath, monitor, verbose,save_best_only, save_weights_only,mode, period)

    def set_model(self, model):
        super(ParallelModelCheckpoint,self).set_model(self.single_model)

check_point = ParallelModelCheckpoint(parallel_model ,'Logs/I3D-flow.hd5')



filename = 'Logs/I3D-flow_log.csv'
csv_logger = CSVLogger(filename, separator=',', append=True)

In [None]:
callbacks_list = [check_point, csv_logger, reduce_lr]

### Model Training

- set essential params

In [None]:
num_epochs  = 50
num_workers = 16
batch_size  = 8

- init data generator

In [None]:
dataset = 'RWF2000-opt'
train_generator = DataGenerator(directory='../Datasets/{}/train'.format(dataset), 
                                batch_size=batch_size,
                                data_augmentation=True)

val_generator = DataGenerator(directory='../Datasets/{}/val'.format(dataset),
                              batch_size=batch_size, 
                              data_augmentation=True)

- start to train

In [None]:
hist = parallel_model.fit_generator(
    generator=train_generator, 
    validation_data=val_generator,
    callbacks=callbacks_list,
    verbose=1, 
    epochs=num_epochs,
    workers=num_workers ,
    max_queue_size=4,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator))