In [22]:
import os
import matplotlib
import cv2
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import sklearn.preprocessing
import os
import keras
import scipy.io as sio
from keras import Sequential
from keras.layers import Dense, Dropout
from keras.regularizers import l2
import tensorflow as tf
import tensorflow.keras.backend as K
import numpy as np
import scipy
from keras.utils.data_utils import get_file
from keras.models import Model


In [23]:
C3D_MEAN_PATH = 'https://github.com/adamcasson/c3d/releases/download/v0.1/c3d_mean.npy'
WEIGHTS_PATH = 'https://github.com/adamcasson/c3d/releases/download/v0.1/sports1M_weights_tf.h5'


def c3d_preprocess_input(video):
    """Preprocess video input to make it suitable for feature extraction.
    The video is resized, cropped, resampled and training mean is substracted
    to make it suitable for the network
    :param video: Video to be processed
    :returns: Preprocessed video
    :rtype: np.ndarray
    """

    intervals = np.ceil(np.linspace(0, video.shape[0] - 1, 16)).astype(int)
    frames = video[intervals]

    # Reshape to 128x171
    reshape_frames = np.zeros((frames.shape[0], 128, 171, frames.shape[3]))
    for i, img in enumerate(frames):
        img = cv2.resize(src=img, dsize=(171,128), interpolation=cv2.INTER_CUBIC)
        reshape_frames[i,:,:,:] = img


    mean_path = get_file('c3d_mean.npy',
                         C3D_MEAN_PATH,
                         cache_subdir='models',
                         md5_hash='08a07d9761e76097985124d9e8b2fe34')

    mean = np.load(mean_path)
    reshape_frames -= mean
    # Crop to 112x112
    reshape_frames = reshape_frames[:, 8:120, 30:142, :]
    # Add extra dimension for samples
    reshape_frames = np.expand_dims(reshape_frames, axis=0)

    return reshape_frames


def C3D(weights='sports1M'):
    """Creation of the full C3D architecture
    :param weights: Weights to be loaded into the network. If None,
    the network is randomly initialized.
    :returns: Network model
    :rtype: keras.model
    """

    if weights not in {'sports1M', None}:
        raise ValueError('weights should be either be sports1M or None')

    if K.image_data_format() == 'channels_last':
        shape = (16, 112, 112, 3)
    else:
        shape = (3, 16, 112, 112)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv3D(64, 3, activation='relu', padding='same', name='conv1',
                                            input_shape=shape),
        tf.keras.layers.MaxPooling3D(pool_size=(1,2,2), strides=(1,2,2),
                                            padding='same', name='pool1'),
        
        tf.keras.layers.Conv3D(128, 3, activation='relu', padding='same', name='conv2'),
        tf.keras.layers.MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2), padding='valid',
                                            name='pool2'),
        
        tf.keras.layers.Conv3D(256, 3, activation='relu', padding='same',name='conv3a'),
        tf.keras.layers.Conv3D(256, 3, activation='relu', padding='same',name='conv3b'),
        tf.keras.layers.MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2),
                                            padding='valid', name='pool3'),
        
        tf.keras.layers.Conv3D(512, 3, activation='relu', padding='same',name='conv4a'),
        tf.keras.layers.Conv3D(512, 3, activation='relu', padding='same',name='conv4b'),
        tf.keras.layers.MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2),
                                            padding='valid', name='pool4'),
        
        tf.keras.layers.Conv3D(512, 3, activation='relu', padding='same',name='conv5a'),
        tf.keras.layers.Conv3D(512, 3, activation='relu', padding='same',name='conv5b'),
        tf.keras.layers.ZeroPadding3D(padding=(0,1,1)),
        tf.keras.layers.MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2),
                                            padding='valid', name='pool5'),
        
        tf.keras.layers.Flatten(),
        
        tf.keras.layers.Dense(4096, activation='relu', name='fc6'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(4096, activation='relu', name='fc7'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(487, activation='softmax', name='fc8'),
    ])
    
    if weights == 'sports1M':
        weights_path = get_file('sports1M_weights_tf.h5',
                                WEIGHTS_PATH,
                                cache_subdir='models',
                                md5_hash='b7a93b2f9156ccbebe3ca24b41fc5402')
        model.load_weights(weights_path)
    

    return model


def c3d_feature_extractor():
    """Creation of the feature extraction architecture. This network is
    formed by a subset of the original C3D architecture (from the
    beginning to fc6 layer)
    :returns: Feature extraction model
    :rtype: keras.model
    """
    base_model = C3D(weights='sports1M')
    layer_name = 'fc6'
    #model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc6').output)
    feature_extractor_model = Model(inputs=base_model.input,outputs=base_model.get_layer(layer_name).output)
    return feature_extractor_model

In [24]:
def classifier_model():
    """Build the classifier
    :returns: Classifier model
    :rtype: keras.Model
    """
    model = Sequential()
    model.add(Dense(512, input_dim=4096, kernel_initializer='glorot_normal',
                    kernel_regularizer=l2(0.001), activation='relu'))
    model.add(Dropout(0.6))
    model.add(Dense(32, kernel_initializer='glorot_normal',
                    kernel_regularizer=l2(0.001)))
    model.add(Dropout(0.6))
    model.add(Dense(1, kernel_initializer='glorot_normal',
                    kernel_regularizer=l2(0.001), activation='sigmoid'))
    return model

In [25]:
def conv_dict(dict2):
    """Prepare the dictionary of weights to be loaded by the network
    :param dict2: Dictionary to format
    :returns: The dictionary properly formatted
    :rtype: dict
    """
    dict = {}
    for i in range(len(dict2)):
        if str(i) in dict2:
            if dict2[str(i)].shape == (0, 0):
                dict[str(i)] = dict2[str(i)]
            else:
                weights = dict2[str(i)][0]
                weights2 = []
                for weight in weights:
                    if weight.shape in [(1, x) for x in range(0, 5000)]:
                        weights2.append(weight[0])
                    else:
                        weights2.append(weight)
                dict[str(i)] = weights2
    return dict

In [26]:
def load_weights(model, weights_file):
    """Loads the pretrained weights into the network architecture
    :param model: keras model of the network
    :param weights_file: Path to the weights file
    :returns: The input model with the weights properly loaded
    :rtype: keras.model
    """
    dict2 = sio.loadmat(weights_file)
    dict = conv_dict(dict2)
    i = 0
    for layer in model.layers:
        weights = dict[str(i)]
        layer.set_weights(weights)
        i += 1
    return model

In [12]:
# build classifier and load pretrained weights
def create_classifier_model():
    model = classifier_model()
    model = load_weights(model, './weights_L1L2.mat')
    return model

## PARAMS

In [27]:
frame_height = 240
frame_width = 320
channels = 3

frame_count = 16

features_per_bag = 32

In [34]:
output_dir = './'
sample_video_path = 'Anomaly-Videos-Part-4/Stealing/Stealing003_x264.mp4'

In [29]:
def visualize_clip(clip, convert_bgr=False, save_gif=False, file_path=None):
    num_frames = len(clip)
    fig, ax = plt.subplots()
    fig.set_tight_layout(True)

    def update(i):
        if convert_bgr:
            frame = cv2.cvtColor(clip[i], cv2.COLOR_BGR2RGB)
        else:
            frame = clip[i]
        plt.imshow(frame)
        return plt

    # FuncAnimation will call the 'update' function for each frame; here
    # animating over 10 frames, with an interval of 20ms between frames.
    anim = FuncAnimation(fig, update, frames=np.arange(0, num_frames), interval=1)
    if save_gif:
        anim.save(file_path, dpi=80, writer='imagemagick')
    else:
        # plt.show() will just loop the animation forever.
        plt.show()


def visualize_predictions(video_path, predictions, save_path):
    frames = get_video_frames(video_path)
    assert len(frames) == len(predictions)

    fig, ax = plt.subplots(figsize=(5, 5))
    fig.set_tight_layout(True)

    line = matplotlib.lines.Line2D([], [])

    fig_frame = plt.subplot(2, 1, 1)
    img = fig_frame.imshow(frames[0])
    fig_prediction = plt.subplot(2, 1, 2)
    fig_prediction.set_xlim(0, len(frames))
    fig_prediction.set_ylim(0, 1.15)
    fig_prediction.add_line(line)

    def update(i):
        frame = frames[i]
        x = range(0, i)
        y = predictions[0:i]
        line.set_data(x, y)
        img.set_data(frame)
        return plt

    # FuncAnimation will call the 'update' function for each frame; here
    # animating over 10 frames, with an interval of 20ms between frames.

    anim = FuncAnimation(fig, update, frames=np.arange(0, len(frames), 10), interval=1, repeat=False)

    if save_path:
        anim.save(save_path, dpi=200, writer='imagemagick')
    else:
        plt.show()

In [30]:
def sliding_window(arr, size, stride):
    """Apply sliding window to an array, getting chunks of
    of specified size using the specified stride
    :param arr: Array to be divided
    :param size: Size of the chunks
    :param stride: Number of frames to skip for the next chunk
    :returns: Tensor with the resulting chunks
    :rtype: np.ndarray
    """
    num_chunks = int((len(arr) - size) / stride) + 2
    result = []
    for i in range(0,  num_chunks * stride, stride):
        if len(arr[i:i + size]) > 0:
            result.append(arr[i:i + size])
    return np.array(result)


def interpolate(features, features_per_bag):
    """Transform a bag with an arbitrary number of features into a bag
    with a fixed amount, using interpolation of consecutive features
    :param features: Bag of features to pad
    :param features_per_bag: Number of features to obtain
    :returns: Interpolated features
    :rtype: np.ndarray
    """
    feature_size = np.array(features).shape[1]
    interpolated_features = np.zeros((features_per_bag, feature_size))
    interpolation_indices = np.round(np.linspace(0, len(features) - 1, num=features_per_bag + 1))
    count = 0
    for index in range(0, len(interpolation_indices)-1):
        start = int(interpolation_indices[index])
        end = int(interpolation_indices[index + 1])

        assert end >= start

        if start == end:
            temp_vect = features[start, :]
        else:
            temp_vect = np.mean(features[start:end+1, :], axis=0)

        temp_vect = temp_vect / np.linalg.norm(temp_vect)

        if np.linalg.norm(temp_vect) == 0:
            print("Error")

        interpolated_features[count,:]=temp_vect
        count = count + 1

    return np.array(interpolated_features)


def extrapolate(outputs, num_frames):
    """Expand output to match the video length
    :param outputs: Array of predicted outputs
    :param num_frames: Expected size of the output array
    :returns: Array of output size
    :rtype: np.ndarray
    """

    extrapolated_outputs = []
    extrapolation_indices = np.round(np.linspace(0, len(outputs) - 1, num=num_frames))
    for index in extrapolation_indices:
        extrapolated_outputs.append(outputs[int(index)])
    return np.array(extrapolated_outputs)


In [17]:
def get_video_frames(video_path):
    """Reads the video given a file path
    :param video_path: Path to the video
    :returns: Video as an array of frames
    :rtype: np.ndarray
    """
    cap = cv2.VideoCapture(video_path)
    frames = []
    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret == True:
            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        else:
            break
    cap.release()
    return frames

def get_video_clips(video_path):
    """Divides the input video into non-overlapping clips
    :param video_path: Path to the video
    :returns: Array with the fragments of video
    :rtype: np.ndarray
    """
    frames = get_video_frames(video_path)
    clips = sliding_window(frames, frame_count, frame_count)
    return clips, len(frames)


In [None]:
def run_demo():

    video_name = os.path.basename(sample_video_path).split('.')[0]

    # read video
    video_clips, num_frames = get_video_clips(sample_video_path)

    print("Number of clips in the video : ", len(video_clips))

    # build models
    feature_extractor = c3d_feature_extractor()
    classifier_model = create_classifier_model()

    print("Models initialized")

    # extract features
    rgb_features = []
    for i, clip in enumerate(video_clips):
        clip = np.array(clip)
        if len(clip) < frame_count:
            continue

        clip = c3d_preprocess_input(clip)
        rgb_feature = feature_extractor.predict(clip)[0]
        rgb_features.append(rgb_feature)

        print("Processed clip : ", i)

    rgb_features = np.array(rgb_features)
    rgb_feature_bag = interpolate(rgb_features, features_per_bag)
    
    # classify using the trained classifier model
    predictions = classifier_model.predict(rgb_feature_bag)

    predictions = np.array(predictions).squeeze()

    predictions = extrapolate(predictions, num_frames)
    
    save_path = os.path.join('./', video_name + '.gif')
    # visualize predictions
    print('Executed Successfully - '+video_name + '.gif saved')
    visualize_predictions(sample_video_path, predictions, save_path)


if __name__ == '__main__':
    run_demo()

  from ipykernel import kernelapp as app


Number of clips in the video :  224
Models initialized
Processed clip :  0
Processed clip :  1
Processed clip :  2
Processed clip :  3
Processed clip :  4
Processed clip :  5
Processed clip :  6
Processed clip :  7
Processed clip :  8
Processed clip :  9
Processed clip :  10
Processed clip :  11
Processed clip :  12
Processed clip :  13
Processed clip :  14
Processed clip :  15
Processed clip :  16
Processed clip :  17
Processed clip :  18
Processed clip :  19
Processed clip :  20
Processed clip :  21
Processed clip :  22
Processed clip :  23
Processed clip :  24
Processed clip :  25
Processed clip :  26
Processed clip :  27
Processed clip :  28
Processed clip :  29
Processed clip :  30
Processed clip :  31
Processed clip :  32
Processed clip :  33
Processed clip :  34
Processed clip :  35
Processed clip :  36
Processed clip :  37
Processed clip :  38
Processed clip :  39
Processed clip :  40
Processed clip :  41
Processed clip :  42
Processed clip :  43
Processed clip :  44
Processed 

MovieWriter imagemagick unavailable; using Pillow instead.
