In [1]:
"""
Process an image that we can pass to our networks.
"""
from keras.preprocessing.image import img_to_array, load_img
import numpy as np

def process_image(image, target_shape):
    """Given an image, process it and return the array."""
    # Load the image.
    h, w, _ = target_shape
    image = load_img(image, target_size=(h, w))

    # Turn it into numpy, normalize and return.
    img_arr = img_to_array(image)
    x = (img_arr / 255.).astype(np.float32)

    return x

Using TensorFlow backend.


In [2]:
"""
Class for managing our data.
"""
import csv
import numpy as np
import random
import glob
import os.path
import pandas as pd
import sys
import operator
from keras.utils import np_utils

class DataSet():

    def __init__(self, seq_length=40, class_limit=None, image_shape=(224, 224, 3)):
        """Constructor.
        seq_length = (int) the number of frames to consider
        class_limit = (int) number of classes to limit the data to.
            None = no limit.
        """
        self.seq_length = seq_length
        self.class_limit = class_limit
        self.sequence_path = './data/sequences/'
        self.max_frames = 300  # max number of frames a video can have for us to use it

        # Get the data.
        self.data = self.get_data()

        # Get the classes.
        self.classes = self.get_classes()

        # Now do some minor data cleaning.
        self.data = self.clean_data()

        self.image_shape = image_shape

    @staticmethod
    def get_data():
        """Load our data from file."""
        with open('./data/data_file.csv', 'r') as fin:
            reader = csv.reader(fin)
            data = list(reader)

        return data

    def clean_data(self):
        """Limit samples to greater than the sequence length and fewer
        than N frames. Also limit it to classes we want to use."""
        data_clean = []
        for item in self.data:
            if int(item[3]) >= self.seq_length and int(item[3]) <= self.max_frames \
                    and item[1] in self.classes:
                data_clean.append(item)

        return data_clean

    def get_classes(self):
        """Extract the classes from our data. If we want to limit them,
        only return the classes we need."""
        classes = []
        for item in self.data:
            if item[1] not in classes:
                classes.append(item[1])

        # Sort them.
        classes = sorted(classes)

        # Return.
        if self.class_limit is not None:
            return classes[:self.class_limit]
        else:
            return classes

    def get_class_one_hot(self, class_str):
        """Given a class as a string, return its number in the classes
        list. This lets us encode and one-hot it for training."""
        # Encode it first.
        label_encoded = self.classes.index(class_str)

        # Now one-hot it.
        label_hot = np_utils.to_categorical(label_encoded, len(self.classes))
        label_hot = label_hot[0]  # just get a single row

        return label_hot

    def split_train_test(self):
        """Split the data into train and test groups."""
        train = []
        test = []
        for item in self.data:
            if item[0] == 'train':
                train.append(item)
            else:
                test.append(item)
        return train, test

    def get_all_sequences_in_memory(self, batch_Size, train_test, data_type, concat=False):
        """
        This is a mirror of our generator, but attempts to load everything into
        memory so we can train way faster.
        """
        # Get the right dataset.
        train, test = self.split_train_test()
        data = train if train_test == 'train' else test

        print("Getting %s data with %d samples." % (train_test, len(data)))

        X, y = [], []
        for row in data:

            sequence = self.get_extracted_sequence(data_type, row)

            if sequence is None:
                print("Can't find sequence. Did you generate them?")
                raise

            if concat:
                # We want to pass the sequence back as a single array. This
                # is used to pass into a CNN or MLP, rather than an RNN.
                sequence = np.concatenate(sequence).ravel()

            X.append(sequence)
            y.append(self.get_class_one_hot(row[1]))

        return np.array(X), np.array(y)

    def frame_generator(self, batch_size, train_test, data_type, concat=False):
        """Return a generator that we can use to train on. There are
        a couple different things we can return:
        data_type: 'features', 'images'
        """
        # Get the right dataset for the generator.
        train, test = self.split_train_test()
        data = train if train_test == 'train' else test

        print("Creating %s generator with %d samples." % (train_test, len(data)))

        while 1:
            X, y = [], []

            # Generate batch_size samples.
            for _ in range(batch_size):
                # Reset to be safe.
                sequence = None

                # Get a random sample.
                sample = random.choice(data)

                # Check to see if we've already saved this sequence.
                if data_type is "images":
                    # Get and resample frames.
                    frames = self.get_frames_for_sample(sample)
                    frames = self.rescale_list(frames, self.seq_length)

                    # Build the image sequence
                    sequence = self.build_image_sequence(frames)
                else:
                    # Get the sequence from disk.
                    sequence = self.get_extracted_sequence(data_type, sample)

                if sequence is None:
                    print("Can't find sequence. Did you generate them?")
                    sys.exit()  # TODO this should raise

                if concat:
                    # We want to pass the sequence back as a single array. This
                    # is used to pass into an MLP rather than an RNN.
                    sequence = np.concatenate(sequence).ravel()

                X.append(sequence)
                y.append(self.get_class_one_hot(sample[1]))

            yield np.array(X), np.array(y)

    def build_image_sequence(self, frames):
        """Given a set of frames (filenames), build our sequence."""
        return [process_image(x, self.image_shape) for x in frames]

    def get_extracted_sequence(self, data_type, sample):
        """Get the saved extracted features."""
        filename = sample[2]
        path = self.sequence_path + filename + '-' + str(self.seq_length) + \
            '-' + data_type + '.txt'
        if os.path.isfile(path):
            # Use a dataframe/read_csv for speed increase over numpy.
            features = pd.read_csv(path, sep=" ", header=None)
            return features.values
        else:
            return None

    @staticmethod
    def get_frames_for_sample(sample):
        """Given a sample row from the data file, get all the corresponding frame
        filenames."""
        path = './data/' + sample[0] + '/' + sample[1] + '/'
        filename = sample[2]
        images = sorted(glob.glob(path + filename + '*jpg'))
        return images

    @staticmethod
    def get_filename_from_image(filename):
        parts = filename.split('/')
        return parts[-1].replace('.jpg', '')

    @staticmethod
    def rescale_list(input_list, size):
        """Given a list and a size, return a rescaled/samples list. For example,
        if we want a list of size 5 and we have a list of size 25, return a new
        list of size five which is every 5th element of the origina list."""
        assert len(input_list) >= size

        # Get the number to skip between iterations.
        skip = len(input_list) // size

        # Build our new output.
        output = [input_list[i] for i in range(0, len(input_list), skip)]

        # Cut off the last one if needed.
        return output[:size]

In [3]:
"""
Train on images split into directories. This assumes we've split
our videos into frames and moved them to their respective folders.
Use keras 2+ and tensorflow 1+
Based on:
https://keras.io/preprocessing/image/
and
https://keras.io/applications/
"""
from keras.applications.inception_v3 import InceptionV3
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping


data = DataSet()

# Helper: Save the min val_loss model in each epoch.
checkpointer = ModelCheckpoint(
    filepath='./data/checkpoint/inception.{epoch:03d}-{val_loss:.2f}.hdf5',
    verbose=1,
    save_best_only=True)

# Helper: Stop when we stop learning.
# patience: number of epochs with no improvement after which training will be stopped.
early_stopper = EarlyStopping(patience=10)

# Helper: TensorBoard
tensorboard = TensorBoard(log_dir='./data/logs/')

def get_generators():
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        horizontal_flip=True,
        rotation_range=10.,
        width_shift_range=0.2,
        height_shift_range=0.2)

    test_datagen = ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_directory(
        './data/train/',
        target_size=(299, 299),
        batch_size=32,
        classes=data.classes,
        class_mode='categorical')

    validation_generator = test_datagen.flow_from_directory(
        './data/test/',
        target_size=(299, 299),
        batch_size=32,
        classes=data.classes,
        class_mode='categorical')

    return train_generator, validation_generator

def get_model(weights='imagenet'):
    # create the base pre-trained model
    base_model = InceptionV3(weights=weights, include_top=False)

    # add a global spatial average pooling layer
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    # let's add a fully-connected layer
    x = Dense(1024, activation='relu')(x)
    # and a logistic layer -- let's say we have 2 classes
    predictions = Dense(len(data.classes), activation='softmax')(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable = False

    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

def fine_tune_inception_layer(model):
    """After we fine-tune the dense layers, train deeper."""
    # we chose to train the top 2 inception blocks, i.e. we will freeze
    # the first 172 layers and unfreeze the rest:
    for layer in model.layers[:172]:
        layer.trainable = False
    for layer in model.layers[172:]:
        layer.trainable = True

    # we need to recompile the model for these modifications to take effect
    # we use SGD with a low learning rate
    model.compile(
        optimizer=SGD(lr=0.0001, momentum=0.9),
        loss='categorical_crossentropy',
        metrics=['accuracy', 'top_k_categorical_accuracy'])

    return model

def train_model(model, nb_epoch, generators, callbacks=[]):
    train_generator, validation_generator = generators
    model.fit_generator(
        train_generator,
        steps_per_epoch=100,
        validation_data=validation_generator,
        validation_steps=10,
        epochs=nb_epoch,
        callbacks=callbacks)
    return model

def main(weights_file):

    model = get_model()
    generators = get_generators()

    if weights_file is None:
        print("Training Top layers.")
        model = train_model(model, 10, generators)
    else:
        print("Loading saved model: %s." % weights_file)
        model.load_weights(weights_file)

    # Get and train the mid layers.
    model = fine_tune_inception_layer(model)
    model = train_model(model, 1000, generators,
                        [checkpointer, early_stopper, tensorboard])

if __name__ == '__main__':
    weights_file = None
    main(weights_file)

W0320 10:21:22.876157 140420457068288 deprecation_wrapper.py:119] From /usr/local/miniconda3/envs/dl/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0320 10:21:22.878096 140420457068288 deprecation_wrapper.py:119] From /usr/local/miniconda3/envs/dl/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0320 10:21:22.881305 140420457068288 deprecation_wrapper.py:119] From /usr/local/miniconda3/envs/dl/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0320 10:21:22.984184 140420457068288 deprecation_wrapper.py:119] From /usr/local/miniconda3/envs/dl/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


W0320 10:23:16.581518 140420457068288 deprecation_wrapper.py:119] From /usr/local/miniconda3/envs/dl/lib/python3.6/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



Found 1788425 images belonging to 101 classes.
Found 697865 images belonging to 101 classes.


W0320 10:25:52.954219 140420457068288 deprecation.py:323] From /usr/local/miniconda3/envs/dl/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Training Top layers.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


W0320 10:45:50.528054 140420457068288 deprecation_wrapper.py:119] From /usr/local/miniconda3/envs/dl/lib/python3.6/site-packages/keras/callbacks.py:850: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

W0320 10:45:50.528777 140420457068288 deprecation_wrapper.py:119] From /usr/local/miniconda3/envs/dl/lib/python3.6/site-packages/keras/callbacks.py:853: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.



Epoch 1/1000

Epoch 00002: val_loss improved from 1.93799 to 1.90033, saving model to ./data/checkpoint/inception.002-1.90.hdf5
Epoch 3/1000

Epoch 00003: val_loss improved from 1.90033 to 1.73900, saving model to ./data/checkpoint/inception.003-1.74.hdf5
Epoch 4/1000

Epoch 00004: val_loss improved from 1.73900 to 1.70393, saving model to ./data/checkpoint/inception.004-1.70.hdf5
Epoch 5/1000

Epoch 00005: val_loss did not improve from 1.70393
Epoch 6/1000

Epoch 00006: val_loss did not improve from 1.70393
Epoch 7/1000

Epoch 00007: val_loss did not improve from 1.70393
Epoch 8/1000

Epoch 00008: val_loss improved from 1.70393 to 1.61399, saving model to ./data/checkpoint/inception.008-1.61.hdf5
Epoch 9/1000

Epoch 00009: val_loss did not improve from 1.61399
Epoch 10/1000

Epoch 00010: val_loss improved from 1.61399 to 1.58097, saving model to ./data/checkpoint/inception.010-1.58.hdf5
Epoch 11/1000

Epoch 00011: val_loss did not improve from 1.58097
Epoch 12/1000

Epoch 00012: val_l

In [6]:
"""
Classify test images set through our CNN.
Use keras 2+ and tensorflow 1+
It takes a long time for hours.
"""
import numpy as np
import operator
import random
import glob
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator

data = DataSet()
def main(nb_images=5):
    # CNN model evaluate

    test_data_gen = ImageDataGenerator(rescale=1. / 255)
    test_data_num = 697865 #the number of test images
    batch_size = 32
    test_generator = test_data_gen.flow_from_directory('./data/test/', target_size=(299, 299),
                                                       batch_size=batch_size, classes=data.classes,
                                                       class_mode='categorical')
    # load the trained model that has been saved in CNN_train_UCF101.py, your model name maybe is not the same as follow
    model = load_model('data/checkpoint/inception.012-1.45.hdf5')
    results = model.evaluate_generator(generator=test_generator, steps=test_data_num // batch_size)
    print(results)
    print(model.metrics)


if __name__ == '__main__':
    main()

Found 697865 images belonging to 101 classes.
[1.6648375314092558, 0.5594262999816582, 0.8258093360234776]
['accuracy', 'top_k_categorical_accuracy']


In [8]:
"""
Classify a few images through our CNN.
"""
import numpy as np
import operator
import random
import glob
from keras.models import load_model

def main(nb_images=5):
    """Spot-check `nb_images` images."""
    data = DataSet()
    model = load_model('data/checkpoint/inception.012-1.45.hdf5') #replaced by your model name

    # Get all our test images.
    images = glob.glob('./data/test/**/*.jpg')

    for _ in range(nb_images):
        print('-'*80)
        # Get a random row.
        sample = random.randint(0, len(images) - 1)
        image = images[sample]

        # Turn the image into an array.
        print(image)
        image_arr = process_image(image, (299, 299, 3))
        image_arr = np.expand_dims(image_arr, axis=0)

        # Predict.
        predictions = model.predict(image_arr)

        # Show how much we think it's each one.
        label_predictions = {}
        for i, label in enumerate(data.classes):
            label_predictions[label] = predictions[0][i]

        sorted_lps = sorted(label_predictions.items(), key=operator.itemgetter(1), reverse=True)
        
        for i, class_prediction in enumerate(sorted_lps):
            # Just get the top five.
            if i > 4:
                break
            print("%s: %.2f" % (class_prediction[0], class_prediction[1]))
            i += 1

if __name__ == '__main__':
    main()

--------------------------------------------------------------------------------
./data/test/BandMarching/v_BandMarching_g02_c07-0008.jpg
BandMarching: 0.95
Drumming: 0.01
BenchPress: 0.01
PlayingDaf: 0.00
Lunges: 0.00
--------------------------------------------------------------------------------
./data/test/SalsaSpin/v_SalsaSpin_g03_c02-0112.jpg
HulaHoop: 0.28
Nunchucks: 0.13
TennisSwing: 0.08
GolfSwing: 0.06
SalsaSpin: 0.05
--------------------------------------------------------------------------------
./data/test/WallPushups/v_WallPushups_g02_c02-0055.jpg
Nunchucks: 0.30
MoppingFloor: 0.14
WallPushups: 0.09
JumpingJack: 0.07
JugglingBalls: 0.05
--------------------------------------------------------------------------------
./data/test/PlayingDaf/v_PlayingDaf_g01_c02-0066.jpg
PlayingDaf: 0.28
YoYo: 0.21
PlayingFlute: 0.08
JugglingBalls: 0.07
PlayingGuitar: 0.07
--------------------------------------------------------------------------------
./data/test/Rowing/v_Rowing_g01_c01-030