In [1]:
import numpy as np
import os
import random
from os.path import join
from keras import layers
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, LSTM, Bidirectional, TimeDistributed, GRU, CuDNNGRU, Conv2D, MaxPooling2D, Flatten
from keras.utils import to_categorical
from keras_utils import set_keras_session
from inception_generators import frames_generator_rnn,load_whole_dataset
from plot_utils import plot_history

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
set_keras_session()
dataset = 'UCF11'
nb_classes = int(dataset[3:])

In [3]:
from inception_generators import get_dataset_split_structure, get_class_to_idx_dict

def dataset_loader(dataset_dir, split_key):
    
    base_dir = join(dataset_dir, split_key)
    
    dataset_structure = get_dataset_split_structure(base_dir)
    all_classes = dataset_structure.keys()
    class_to_idx_dict = get_class_to_idx_dict(all_classes)
        
    for cl in all_classes:

        class_idx = class_to_idx_dict[cl]
        
        video_inception_dict = {}

        for video in dataset_structure[cl]:

            inception_features = np.load(join(base_dir, cl, video))
            
            video_inception_dict[video] = inception_features
            
        dataset_structure[cl] = video_inception_dict
        
    return dataset_structure
        
inception_train = dataset_loader(join('datasets', dataset, 'separate_frames_50_h_240_w_320_inception'), 'train')
inception_valid = dataset_loader(join('datasets', dataset, 'separate_frames_50_h_240_w_320_inception'), 'valid')

In [4]:
frame_number = 50
width = 320
height = 240
channels = 3
padding = None

dataset_name= 'separate_frames_{}_h_{}_w_{}_yolo_padding_False'.format(frame_number, height, width)
if padding is not None:
    dataset_name += '_padding_{}'.format(padding)


dataset_dir = join('datasets', dataset, dataset_name)

batch_size = 16
train_generator = frames_generator_rnn(dataset_dir, 'train', batch_size, inception_train)
valid_generator = frames_generator_rnn(dataset_dir, 'valid', batch_size, inception_valid)

num_train, num_valid = next(train_generator), next(valid_generator)
X_batch = next(train_generator)[0]

inception_shape = X_batch[0].shape[1:]
yolo_shape = X_batch[1].shape[1:]

print(inception_shape, yolo_shape)

(50, 2048) (50, 19, 19, 425)


In [5]:
from keras.applications.inception_v3 import InceptionV3
from keras.applications.densenet import DenseNet121
from keras.models import Model
from keras import backend as K
from keras import regularizers
from keras.layers import Lambda, Reshape
from keras import layers

inception_input = layers.Input(inception_shape)
inception_net = Dense(128, activation='relu')(inception_input)

yolo_input = layers.Input(yolo_shape)

x = yolo_input
x = TimeDistributed(Conv2D(32, (5,5), activation='relu', padding='same'))(x)
x = TimeDistributed(Conv2D(64, (3,3), activation='relu', padding='same'))(x)
x = TimeDistributed(MaxPooling2D())(x)
x = TimeDistributed(Dropout(0.2))(x)
x = TimeDistributed(Conv2D(128, (3,3), activation='relu', padding='same'))(x)
x = TimeDistributed(MaxPooling2D())(x)  
x = TimeDistributed(Flatten())(x)
x = TimeDistributed(Dense(128, activation='relu'))(x)
                    
x = layers.concatenate([inception_net, x])
#x = Flatten()(x)
x = Dropout(0.5)(x)
x = layers.LSTM(256)(x)
#x = Dropout(0.25)(x)
x = layers.Dense(nb_classes, activation='softmax')(x)
                    
model = Model(inputs=[inception_input, yolo_input], outputs=[x])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 50, 19, 19, 4 0                                            
__________________________________________________________________________________________________
time_distributed_1 (TimeDistrib (None, 50, 19, 19, 3 340032      input_2[0][0]                    
__________________________________________________________________________________________________
time_distributed_2 (TimeDistrib (None, 50, 19, 19, 6 18496       time_distributed_1[0][0]         
__________________________________________________________________________________________________
time_distributed_3 (TimeDistrib (None, 50, 9, 9, 64) 0           time_distributed_2[0][0]         
__________________________________________________________________________________________________
time_distr

In [6]:
from keras import metrics, callbacks, optimizers
from functools import partial

top_3_k_categorical_accuracy = partial(metrics.top_k_categorical_accuracy, k=3)
top_3_k_categorical_accuracy.__name__ = 'top_3'

early_stopper = callbacks.EarlyStopping(patience=5)
reduce_lr = callbacks.ReduceLROnPlateau(patience=5, factor=0.75)

sgd = optimizers.SGD(momentum=0.9, nesterov=True, lr=0.001)
sgd = optimizers.SGD(momentum=0.9, lr=0.001)
adam = optimizers.Adam(lr=0.00005)

model.compile(optimizer=adam, loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [7]:
def train_model(**kwargs):
    return model.fit_generator(train_generator,
                    steps_per_epoch=num_train, 
                    validation_data=valid_generator,
                    validation_steps=num_valid,
                    epochs=kwargs.get('epochs', 50))

In [None]:
history = train_model()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50

KeyboardInterrupt: 

In [None]:
sgd = optimizers.SGD(momentum=0.9, lr=0.0005)

model.compile(optimizer=sgd, loss='categorical_crossentropy', 
              metrics=['accuracy', top_3_k_categorical_accuracy])

history = model.fit(X_train, Y_train, validation_data=(X_valid, Y_valid), 
          batch_size=64, epochs=50, callbacks=[reduce_lr])

In [None]:
np.set_printoptions(threshold=np.nan)
Y_predict = model.predict(X_valid)
class_predict = np.argmax(Y_predict, axis=1)
print(class_predict)

In [None]:
class_true = np.argmax(Y_valid, axis=1)
print(class_true)

In [None]:
for idx in range(len(class_true)):
    if class_true[idx] != class_predict[idx]:
        print(idx, 'true:', class_true[idx], '  predict:', class_predict[idx], '  prop_true:', Y_predict[idx][class_true[idx]])
        print(Y_predict[idx])
        print()

In [None]:
from count_num_frames_on_dataset import get_number_frames_from_video
from inception_generators import get_dataset_split_structure, get_class_to_idx_dict

def dataset_loader_with_frames(dataset_dir, split_key):
    
    base_dir = join(dataset_dir, split_key)
    
    dataset_structure = get_dataset_split_structure(base_dir)
    all_classes = dataset_structure.keys()
    class_to_idx_dict = get_class_to_idx_dict(all_classes)
        
    for cl in all_classes:

        class_idx = class_to_idx_dict[cl]

        for video in dataset_structure[cl]:
            
            # ex: video == v_shooting_22_05.npy   or  video == v_walk_dog_10_01.npy
            
            video_subfolder = video[:video.rfind('_')]
            video_mpg = video[:-3] + 'mpg'
            
            original_video = join('datasets', dataset, 'video', cl, video_subfolder, video_mpg)
            number_frames = get_number_frames_from_video(original_video)

            inception_features = np.load(join(base_dir, cl, video))

            yield inception_features, class_idx, number_frames, video[:-3]
                

def load_whole_dataset_with_frames(dataset_dir, split_key):
        
    data = list(dataset_loader_with_frames(dataset_dir, split_key))
    X, Y, frames, filenames = map(np.array, zip(*data))

    yield X, to_categorical(Y), frames, filenames

In [None]:
d = list(load_whole_dataset_with_frames(dataset_dir, 'valid'))

In [None]:
X_valid_2, Y_valid_2, frames, filenames = zip(*d)

In [None]:
X_valid_2, Y_valid_2, frames, filenames = X_valid_2[0], Y_valid_2[0], frames[0], filenames[0]

In [None]:
Y_predict = model.predict(X_valid_2)
class_predict = np.argmax(Y_predict, axis=1)
print(class_predict)

In [None]:
class_true = np.argmax(Y_valid_2, axis=1)
print(class_true)

In [None]:
frames_incorrect = []
class_incorrect = []

for idx in range(len(class_true)):
    if class_true[idx] != class_predict[idx]:
        print(idx, 'true:', class_true[idx], '  predict:', class_predict[idx], '  prop_true:', Y_predict[idx][class_true[idx]])
        print('num_frames:', frames[idx], 'filename:', filenames[idx])
        print(Y_predict[idx])
        print()
        frames_incorrect.append(frames[idx])
        class_incorrect.append(class_true[idx])

In [None]:
class_train = np.argmax(Y_train, axis=1)

In [None]:
from collections import Counter

Counter(class_train)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))


plt.subplot(121)
plt.title('Number of frames inside validation set')
plt.hist(frames)


plt.subplot(122)
plt.title('Number of frames of incorrect predicted validation videos')
plt.hist(frames_incorrect)

In [None]:
plt.figure(figsize=(10, 6))

plt.subplot(121)
plt.hist(np.argmax(Y_valid, axis=1))

plt.subplot(122)
plt.hist(class_incorrect)

In [None]:
plt.hist(np.argmax(Y_train, axis=1))

In [None]:
d = list(load_whole_dataset_with_frames(dataset_dir, 'train'))

In [None]:
X_train_2, Y_train_2, frames_train, filenames_train = zip(*d)

In [None]:
plt.hist(frames_train[0][frames_train[0] < 600])

In [None]:
frames_0 = X_train[0]
frames_2 = X_train[1]
frames_1 = X_train[-1]

In [None]:
np.sum(np.std(frames_0[0:50], axis=0))

In [None]:
np.sum(np.std(np.concatenate([frames_2[0:25], frames_1[:25]]), axis=0))

In [None]:
np.sum(np.std(np.concatenate([frames_0[0:25], frames_2[:25]]), axis=0))