In [1]:
import numpy as np
import os
import random
from os.path import join
from keras import layers
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, LSTM, Bidirectional, TimeDistributed, GRU, CuDNNGRU, Conv2D, MaxPooling2D, Flatten
from keras.utils import to_categorical
from keras_utils import set_keras_session
from plot_utils import plot_history
from tqdm import tqdm
from utils import take

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
from inception_generators import train_generator_single_images, valid_generator_single_images

In [3]:
set_keras_session()
dataset = 'UCF11'
nb_classes = int(dataset[3:])

In [4]:
from inception_generators import get_dataset_split_structure, get_class_to_idx_dict

def dataset_loader(dataset_dir, split_key):
    
    base_dir = join(dataset_dir, split_key)
    
    dataset_structure = get_dataset_split_structure(base_dir)
    all_classes = dataset_structure.keys()
    class_to_idx_dict = get_class_to_idx_dict(all_classes)
        
    for cl in all_classes:

        class_idx = class_to_idx_dict[cl]
        
        video_inception_dict = {}

        for video in dataset_structure[cl]:

            inception_features = np.load(join(base_dir, cl, video))
            
            video_inception_dict[video] = inception_features
            
        dataset_structure[cl] = video_inception_dict
        
    return dataset_structure
        
inception_train = dataset_loader(join('datasets', dataset, 'separate_frames_30_h_240_w_320_inception'), 'train')
inception_valid = dataset_loader(join('datasets', dataset, 'separate_frames_30_h_240_w_320_inception'), 'valid')

In [5]:
frame_number = 30
width = 320
height = 240
channels = 3
padding = None

dataset_name= 'separate_frames_{}_h_{}_w_{}_yolo_padding_False'.format(frame_number, height, width)
if padding is not None:
    dataset_name += '_padding_{}'.format(padding)


dataset_dir = join('datasets', dataset, dataset_name)

batch_size = 64
train_generator = train_generator_single_images(dataset_dir, batch_size, additional_data=inception_train)
valid_generator = valid_generator_single_images(dataset_dir, additional_data=inception_valid)

num_train = next(train_generator) * frame_number
num_valid = next(valid_generator)
X_batch = next(train_generator)[0]

inception_shape = X_batch[0].shape[1:]
yolo_shape = X_batch[1].shape[1:]

print(inception_shape, yolo_shape)

(2048,) (19, 19, 425)


In [6]:
from keras.applications.inception_v3 import InceptionV3
from keras.applications.densenet import DenseNet121
from keras.models import Model
from keras import backend as K
from keras import regularizers
from keras.layers import Lambda, Reshape
from keras import layers

inception_input = layers.Input(inception_shape)
inception_net = inception_input

yolo_input = layers.Input(yolo_shape)

x = yolo_input
x = Conv2D(128, (5,5), activation='relu', padding='same')(x)
x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
x = MaxPooling2D()(x)
x = Dropout(0.2)(x)
x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
x = MaxPooling2D()(x)  
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
                    
x = layers.concatenate([inception_net, x])
#x = Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.25)(x)
x = layers.Dense(nb_classes, activation='softmax')(x)
                    
model = Model(inputs=[inception_input, yolo_input], outputs=[x])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 19, 19, 425)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 19, 19, 32)   340032      input_2[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 19, 19, 64)   18496       conv2d_1[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 9, 9, 64)     0           conv2d_2[0][0]                   
__________________________________________________________________________________________________
dropout_1 

In [7]:
from keras import metrics, callbacks, optimizers
from functools import partial

top_3_k_categorical_accuracy = partial(metrics.top_k_categorical_accuracy, k=3)
top_3_k_categorical_accuracy.__name__ = 'top_3'

early_stopper = callbacks.EarlyStopping(patience=5)
reduce_lr = callbacks.ReduceLROnPlateau(patience=5, factor=0.75)

sgd = optimizers.SGD(momentum=0.9, nesterov=True, lr=0.001)
#sgd = optimizers.SGD(momentum=0.9, lr=0.001)
adam = optimizers.Adam(lr=0.00005)

model.compile(optimizer=sgd, loss='categorical_crossentropy', 
              metrics=['accuracy', top_3_k_categorical_accuracy])

In [8]:
def train_model(**kwargs):
    return model.fit_generator(train_generator,
                    steps_per_epoch=num_train, 
                    validation_data=valid_generator,
                    validation_steps=num_valid,
                    epochs=kwargs.get('epochs', 50))

In [9]:
history = train_model()

Epoch 1/50
 131/2250 [>.............................] - ETA: 2:58:33 - loss: 4.6467 - acc: 0.0292 - top_3: 0.0681

KeyboardInterrupt: 

In [None]:
def collect_statistics_on_videos():

    from collections import Counter

    count_videos = 0
    count_top_1 = 0
    count_top_3 = 0
    count_top_1_argmax = 0

    for frames, labels in tqdm(take(valid_generator, num_valid), total=num_valid):

        ### Compute the predicted labels using the model

        true_labels = labels[0]
        true_label_idx = np.argmax(true_labels)
        predicted_labels = model.predict(frames)

        ### Update counters with Approach 1 (mean)

        predicted_labels_mean = np.mean(predicted_labels, axis=0)
        predicted_labels_mean_idx = np.argmax(predicted_labels_mean)
        idx_sorted_top_3 = np.argsort(predicted_labels_mean)[-3:]

        if true_label_idx in idx_sorted_top_3:
            count_top_3 += 1

        if true_label_idx == predicted_labels_mean_idx:
            count_top_1 += 1

        ### Update count with Approach 2 (highest count)

        predicted_labels_argmax = np.argmax(predicted_labels, axis=1)
        counter = Counter(predicted_labels_argmax)

        if counter.most_common(1)[0][0] == true_label_idx:
            count_top_1_argmax += 1

        ### Update number of videos

        count_videos += 1
        
        
    print('Top 1 accuracy (using mean):', count_top_1 / count_videos)
    print('Top 3 accuracy (using mean):', count_top_3 / count_videos)
    print('Top 1 accuracy (using highest count):', count_top_1_argmax / count_videos)

In [None]:
collect_statistics_on_videos()