# Extract Features from MSR-VTT dataset

In [49]:
video_dir = './videos'
output_dir = './DATA/frames'
features_out = './DATA/features'
jobs = 16

### Extract frames from videos

In [2]:
import csv
import glob
import os
import os.path
from subprocess import call
from joblib import Parallel,delayed

data_file = []

def core_func(video_path):
    global data_file
    video_parts = get_video_parts(video_path)
    filename_no_ext, filename = video_parts

    # Only extract if we haven't done it yet. Otherwise, just get
    # the info.
    if not check_already_extracted(video_parts):
        # Now extract it.
        src = os.path.join(video_dir,filename)
        dest = os.path.join(output_dir,filename_no_ext + '-%04d.jpg')
        #print('in', src, dest)
        call(["ffmpeg", "-i", src,"-r", "4", dest])
    # Now get how many frames it is.
    nb_frames = get_nb_frames_for_video(video_parts)
    #print('written: ',nb_frames)
    data_file.append([filename_no_ext, nb_frames])



def get_nb_frames_for_video(video_parts):
    """Given video parts of an (assumed) already extracted video, return
    the number of frames that were extracted."""
    filename_no_ext, _ = video_parts
    generated_files = glob.glob(os.path.join(output_dir, filename_no_ext + '*.jpg'))
    return len(generated_files)

def get_video_parts(video_path):
    """Given a full path to a video, return its parts."""
    parts = video_path.split(os.path.sep)
    #print(parts)
    filename = parts[2]
    filename_no_ext = filename.split('.')[0]
    return filename_no_ext, filename

def check_already_extracted(video_parts):
    """Check to see if we created the -0001 frame of this file."""
    filename_no_ext, _ = video_parts
    return bool(os.path.exists(os.path.join(output_dir,
                               filename_no_ext + '-0001.jpg')))



vfiles = glob.glob(os.path.join(video_dir, '*.mp4'))
results = Parallel(n_jobs=jobs)(delayed(core_func)(video_path) for video_path in vfiles)               

with open('data_file.csv', 'w') as fout:
    writer = csv.writer(fout)
    writer.writerows(data_file)

print("Extracted and wrote %d video files." % (len(data_file)))

Extracted and wrote 0 video files.


In [3]:
# data_file above did not work in parallel processing need to write a code to count them manually:
from tqdm import tqdm
pbar = tqdm(total=len(vfiles))

data_file = []
for video_path in vfiles:
    video_parts = get_video_parts(video_path)
    filename_no_ext, filename = video_parts
    generated_files = glob.glob(os.path.join(output_dir, filename_no_ext + '*.jpg'))
    data_file.append([filename_no_ext, len(generated_files)])
    pbar.update(1)
pbar.close()
with open('data_file.csv', 'w') as fout:
    writer = csv.writer(fout)
    writer.writerows(data_file)

print("Extracted and wrote %d video files." % (len(data_file)))
    

100%|██████████| 7351/7351 [1:08:43<00:00,  1.62it/s]

Extracted and wrote 7351 video files.





In [72]:
!ls DATA/videos

video0.mp4


### Extract CNN Features

In [8]:
"""
Class for managing our data.
"""
import csv
import numpy as np
import random
import glob
import os.path
import sys
import operator
import threading
from keras.utils import np_utils

class threadsafe_iterator:
    def __init__(self, iterator):
        self.iterator = iterator
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    def __next__(self):
        with self.lock:
            return next(self.iterator)

def threadsafe_generator(func):
    """Decorator"""
    def gen(*a, **kw):
        return threadsafe_iterator(func(*a, **kw))
    return gen

class DataSet():

    def __init__(self, seq_length=40, class_limit=None, image_shape=(224, 224, 3)):
        """Constructor.
        seq_length = (int) the number of frames to consider
        class_limit = (int) number of classes to limit the data to.
            None = no limit.
        """
        self.seq_length = seq_length
        self.class_limit = class_limit
        self.sequence_path = os.path.join('data', 'sequences')
        self.max_frames = 1000  # max number of frames a video can have for us to use it

        # Get the data.
        self.data = self.get_data()

        # Now do some minor data cleaning.
        self.data = self.clean_data()

        self.image_shape = image_shape

    @staticmethod
    def get_data():
        """Load our data from file."""
        with open(os.path.join('data_file.csv'), 'r') as fin:
            reader = csv.reader(fin)
            data = list(reader)
            #print(len(data))

        return data

    def clean_data(self):
        """Limit samples to greater than the sequence length and fewer
        than N frames. Also limit it to classes we want to use."""
        data_clean = []
        for item in self.data:
            if int(item[1]) >= self.seq_length and int(item[1]) <= self.max_frames:
                data_clean.append(item)

        return data_clean


    def build_image_sequence(self, frames):
        """Given a set of frames (filenames), build our sequence."""
        return [process_image(x, self.image_shape) for x in frames]

    def get_extracted_sequence(self, data_type, sample):
        """Get the saved extracted features."""
        filename = sample[2]
        path = os.path.join(self.sequence_path, filename + '-' + str(self.seq_length) + \
            '-' + data_type + '.npy')
        if os.path.isfile(path):
            return np.load(path)
        else:
            return None

    @staticmethod
    def get_frames_for_sample(sample):
        """Given a sample row from the data file, get all the corresponding frame
        filenames."""
        filename = sample[0]
        images = sorted(glob.glob(os.path.join(output_dir, filename + '*jpg')))
        return images

    @staticmethod
    def get_filename_from_image(filename):
        parts = filename.split(os.path.sep)
        return parts[-1].replace('.jpg', '')

    @staticmethod
    def rescale_list(input_list, size):
        """Given a list and a size, return a rescaled/samples list. For example,
        if we want a list of size 5 and we have a list of size 25, return a new
        list of size five which is every 5th element of the origina list."""
        assert len(input_list) >= size

        # Get the number to skip between iterations.
        skip = len(input_list) // size

        # Build our new output.
        output = [input_list[i] for i in range(0, len(input_list), skip)]

        # Cut off the last one if needed.
        return output[:size]

In [9]:
from keras.preprocessing.image import img_to_array, load_img
import numpy as np

def process_image(image, target_shape):
    """Given an image, process it and return the array."""
    # Load the image.
    h, w, _ = target_shape
    image = load_img(image, target_size=(h, w))

    # Turn it into numpy, normalize and return.
    img_arr = img_to_array(image)
    x = (img_arr / 255.).astype(np.float32)

    return x

In [10]:
from keras.preprocessing import image
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model, load_model
from keras.layers import Input
import numpy as np

class Extractor():
    def __init__(self, weights=None):
        """Either load pretrained from imagenet, or load our saved
        weights from our own training."""

        self.weights = weights  # so we can check elsewhere which model

        if weights is None:
            # Get model with pretrained weights.
            base_model = InceptionV3(
                weights='imagenet',
                include_top=True
            )

            # We'll extract features at the final pool layer.
            self.model = Model(
                inputs=base_model.input,
                outputs=base_model.get_layer('avg_pool').output
            )

        else:
            # Load the model first.
            self.model = load_model(weights)

            # Then remove the top so we get features not predictions.
            # From: https://github.com/fchollet/keras/issues/2371
            self.model.layers.pop()
            self.model.layers.pop()  # two pops to get to pool layer
            self.model.outputs = [self.model.layers[-1].output]
            self.model.output_layers = [self.model.layers[-1]]
            self.model.layers[-1].outbound_nodes = []

    def extract(self, image_path):
        img = image.load_img(image_path, target_size=(299, 299))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        # Get the prediction.
        features = self.model.predict(x)

        if self.weights is None:
            # For imagenet/default network:
            features = features[0]
        else:
            # For loaded network:
            features = features[0]

        return features

In [10]:
import numpy as np
import os.path
from tqdm import tqdm

# Set defaults.
seq_length = 30
class_limit = None  # Number of classes to extract. Can be 1-101 or None for all.

# Get the dataset.
data = DataSet(seq_length=seq_length, class_limit=class_limit)

# get the model.
model = Extractor()

print(len(data.data))
# Loop through data.
pbar = tqdm(total=len(data.data))
for video in data.data:

    # Get the path to the sequence for this video.
    path = os.path.join(features_out, video[0] + '-' + str(seq_length) + \
        '-features')  # numpy will auto-append .npy

    # Check if we already have it.
    if os.path.isfile(path):
        pbar.update(1)
        continue

    # Get the frames for this video.
    frames = data.get_frames_for_sample(video)

    # Now downsample to just the ones we need.
    frames = data.rescale_list(frames, seq_length)

    # Now loop through and extract features to build the sequence.
    sequence = []
    for frame in frames:
        features = model.extract(frame)
        sequence.append(features)

    # Save the sequence.
    np.save(path, sequence)
    
    pbar.update(1)

pbar.close()


  0%|          | 0/7273 [00:00<?, ?it/s][A

7273



100%|██████████| 7273/7273 [4:23:07<00:00,  2.11s/it]  


## Extract 3D CNN features:

In [54]:
from keras.models import model_from_json
import os

class Extractor_3d():
    
    def __init__(self):
        self.model_dir = '/home/narain.adithya/c3d-keras/models'
        self.model_weight_filename = os.path.join(model_dir, 'sports1M_weights_tf.h5')
        self.model_json_filename = os.path.join(model_dir, 'sports1M_weights_tf.json')

        self.model = model_from_json(open(model_json_filename, 'r').read())
        self.model.layers.pop()
        self.model.layers.pop()
        self.model.layers.pop()
        self.model.layers.pop()
        self.model.outputs = [self.model.layers[-1].output]
        self.model.output_layers = [self.model.layers[-1]]
        self.model.layers[-1].outbound_nodes = []
        #self.model.summary()
        
    def reshape(self, image_path):
        img = image.load_img(image_path, target_size=(112, 112))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        return x
    
    def extract(self,x):
        
        # Get the prediction.
        features = self.model.predict(x)

        if self.weights is None:
            # For imagenet/default network:
            features = features[0]
        else:
            # For loaded network:
            features = features[0]

        return features


In [None]:
import numpy as np
import os.path
from tqdm import tqdm

# Set defaults.
seq_length = 16
class_limit = None  # Number of classes to extract. Can be 1-101 or None for all.

# Get the dataset.
data = DataSet(seq_length=seq_length, class_limit=class_limit)

# get the model.
model = Extractor_3d()

print(len(data.data))
# Loop through data.
pbar = tqdm(total=len(data.data))
for video in data.data:

    # Get the path to the sequence for this video.
    path = os.path.join(features_out, video[0] + '-' + str(seq_length) + \
        '-3dfeatures')  # numpy will auto-append .npy

    # Check if we already have it.
    if os.path.isfile(path):
        pbar.update(1)
        continue

    # Get the frames for this video.
    frames = data.get_frames_for_sample(video)
    print(len(frames))

    # Now downsample to just the ones we need.
    frames = data.rescale_list(frames, seq_length)

    # Now loop through and extract features to build the sequence.
    imgs = []
    for frame in frames:
        x = model.reshape(frame)
        x = x[0]
        imgs.append(x)
        
    x = []
    x.append(imgs)
    x = np.array(x)
    print(x.shape)
    feature = model.extract(x)
    # Save the sequence.
    np.save(path, feature)
    
    pbar.update(1)

pbar.close()


  0%|          | 0/7273 [00:00<?, ?it/s][A

7273





68
(1, 16, 112, 112, 3)
