In [2]:
pip install mxnet

Collecting mxnet
[?25l  Downloading https://files.pythonhosted.org/packages/64/20/76af36cad6754a15f39d3bff19e09921dec72b85261e455d4edc50ebffa8/mxnet-1.7.0.post2-py2.py3-none-manylinux2014_x86_64.whl (54.7MB)
[K     |████████████████████████████████| 54.7MB 58kB/s 
Collecting graphviz<0.9.0,>=0.8.1
  Downloading https://files.pythonhosted.org/packages/53/39/4ab213673844e0c004bed8a0781a0721a3f6bb23eb8854ee75c236428892/graphviz-0.8.4-py2.py3-none-any.whl
Installing collected packages: graphviz, mxnet
  Found existing installation: graphviz 0.10.1
    Uninstalling graphviz-0.10.1:
      Successfully uninstalled graphviz-0.10.1
Successfully installed graphviz-0.8.4 mxnet-1.7.0.post2


In [3]:
pip install gluoncv

Collecting gluoncv
[?25l  Downloading https://files.pythonhosted.org/packages/7f/5e/98807b36462ad88bf39ed83e392ad2dcc6c81f30e22e5aea1168f65185cb/gluoncv-0.9.4.post1-py3-none-any.whl (1.1MB)
[K     |████████████████████████████████| 1.1MB 6.4MB/s 
Collecting autocfg
  Downloading https://files.pythonhosted.org/packages/95/f9/74e0a42cbc6d871c92288806e7812c7d2628c2a06557930dbab0a17438d2/autocfg-0.0.8-py3-none-any.whl
Collecting tensorboardx
[?25l  Downloading https://files.pythonhosted.org/packages/af/0c/4f41bcd45db376e6fe5c619c01100e9b7531c55791b7244815bac6eac32c/tensorboardX-2.1-py2.py3-none-any.whl (308kB)
[K     |████████████████████████████████| 317kB 47.4MB/s 
Collecting portalocker
  Downloading https://files.pythonhosted.org/packages/82/22/e684c9e2e59b561dbe36538852e81849122c666c423448e3a5c99362c228/portalocker-2.2.1-py2.py3-none-any.whl
Collecting autogluon.core
[?25l  Downloading https://files.pythonhosted.org/packages/0e/74/b3739c06941673f6f01a2140ff39d04db4a173006e4c1e3dc

In [19]:
import os
import sys
import time
import argparse
import logging
import math
import gc
import json

import numpy as np
import mxnet as mx
from mxnet import nd
from mxnet.gluon.data.vision import transforms
from gluoncv.data.transforms import video
from gluoncv.model_zoo import get_model
from gluoncv.data import VideoClsCustom
from gluoncv.utils.filesystem import try_import_decord


class parseArgs:

  def __init__(self,datalist,model,save_dir):
    self.data_dir=''
    self.need_root=True
    self.data_list=datalist
    self.dtype='float32'
    self.gpu_id=-1
    self.mode=''
    self.model=model
    self.input_size=224
    self.use_pretrained=True
    self.hashtag=''
    self.resume_params=''
    self.log_interval=10
    self.new_height=256
    self.new_width=340
    self.new_length=32
    self.new_step=1
    self.num_classes=400
    self.ten_crop=True
    self.three_crop=True
    self.video_loader=True
    self.use_decord=True
    self.slowfast=True
    self.slow_temporal_stride=16
    self.fast_temporal_stride=2
    self.save_dir=save_dir
    self.num_segments=1
    self.data_aug='v1'
    self.num_crop=1




def parse_args():

    
    parser.add_argument('--three-crop', action='store_true',
                        help='whether to use three crop evaluation.')
    parser.add_argument('--video-loader', action='store_true', default=True,
                        help='if set to True, read videos directly instead of reading frames.')
    parser.add_argument('--use-decord', action='store_true', default=True,
                        help='if set to True, use Decord video loader to load data.')
    parser.add_argument('--slowfast', action='store_true',
                        help='if set to True, use data loader designed for SlowFast network.')
    parser.add_argument('--slow-temporal-stride', type=int, default=16,
                        help='the temporal stride for sparse sampling of video frames for slow branch in SlowFast network.')
    parser.add_argument('--fast-temporal-stride', type=int, default=2,
                        help='the temporal stride for sparse sampling of video frames for fast branch in SlowFast network.')
    parser.add_argument('--num-crop', type=int, default=1,
                        help='number of crops for each image. default is 1')
    parser.add_argument('--data-aug', type=str, default='v1',
                        help='different types of data augmentation pipelines. Supports v1, v2, v3 and v4.')
    parser.add_argument('--num-segments', type=int, default=1,
                        help='number of segments to evenly split the video.')
    parser.add_argument('--save-dir', type=str, default='./',
                        help='directory of saved results')
    opt = parser.parse_args()
    return opt

def read_data(opt, video_name, transform, video_utils):

    decord = try_import_decord()
    decord_vr = decord.VideoReader(video_name, width=opt.new_width, height=opt.new_height)
    duration = len(decord_vr)

    opt.skip_length = opt.new_length * opt.new_step
    segment_indices, skip_offsets = video_utils._sample_test_indices(duration)

    if opt.video_loader:
        if opt.slowfast:
            clip_input = video_utils._video_TSN_decord_slowfast_loader(video_name, decord_vr, duration, segment_indices, skip_offsets)
        else:
            clip_input = video_utils._video_TSN_decord_batch_loader(video_name, decord_vr, duration, segment_indices, skip_offsets)
    else:
        raise RuntimeError('We only support video-based inference.')

    clip_input = transform(clip_input)

    if opt.slowfast:
        sparse_sampels = len(clip_input) // (opt.num_segments * opt.num_crop)
        clip_input = np.stack(clip_input, axis=0)
        clip_input = clip_input.reshape((-1,) + (sparse_sampels, 3, opt.input_size, opt.input_size))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))
    else:
        clip_input = np.stack(clip_input, axis=0)
        clip_input = clip_input.reshape((-1,) + (opt.new_length, 3, opt.input_size, opt.input_size))
        clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))

    if opt.new_length == 1:
        clip_input = np.squeeze(clip_input, axis=2)    # this is for 2D input case

    return nd.array(clip_input)

def main(logger):
    # opt = parse_args()
    opt=parseArgs('videos.txt','i3d_resnet50_v1_kinetics400','./features')
    logger.info(opt)
    gc.set_threshold(100, 5, 5)

    if not os.path.exists(opt.save_dir):
        os.makedirs(opt.save_dir)

    # set env
    if opt.gpu_id == -1:
        context = mx.cpu()
    else:
        gpu_id = opt.gpu_id
        context = mx.gpu(gpu_id)

    # get data preprocess
    image_norm_mean = [0.485, 0.456, 0.406]
    image_norm_std = [0.229, 0.224, 0.225]
    if opt.ten_crop:
        transform_test = transforms.Compose([
            video.VideoTenCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 10
    elif opt.three_crop:
        transform_test = transforms.Compose([
            video.VideoThreeCrop(opt.input_size),
            video.VideoToTensor(),
            video.VideoNormalize(image_norm_mean, image_norm_std)
        ])
        opt.num_crop = 3
    else:
        transform_test = video.VideoGroupValTransform(size=opt.input_size, mean=image_norm_mean, std=image_norm_std)
        opt.num_crop = 1

    # get model
    if opt.use_pretrained and len(opt.hashtag) > 0:
        opt.use_pretrained = opt.hashtag
    classes = opt.num_classes
    model_name = opt.model
    net = get_model(name=model_name, nclass=classes, pretrained=opt.use_pretrained,
                    feat_ext=True, num_segments=opt.num_segments, num_crop=opt.num_crop)
    net.cast(opt.dtype)
    net.collect_params().reset_ctx(context)
    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)
    if opt.resume_params != '' and not opt.use_pretrained:
        net.load_parameters(opt.resume_params, ctx=context)
        logger.info('Pre-trained model %s is successfully loaded.' % (opt.resume_params))
    else:
        logger.info('Pre-trained model is successfully loaded from the model zoo.')
    logger.info("Successfully built model {}".format(model_name))

    # get data
    anno_file = opt.data_list
    f = open(anno_file, 'r')
    data_list = f.readlines()
    logger.info('Load %d video samples.' % len(data_list))

    # build a pseudo dataset instance to use its children class methods
    video_utils = VideoClsCustom(root=opt.data_dir,
                                 setting=opt.data_list,
                                 num_segments=opt.num_segments,
                                 num_crop=opt.num_crop,
                                 new_length=opt.new_length,
                                 new_step=opt.new_step,
                                 new_width=opt.new_width,
                                 new_height=opt.new_height,
                                 video_loader=opt.video_loader,
                                 use_decord=opt.use_decord,
                                 slowfast=opt.slowfast,
                                 slow_temporal_stride=opt.slow_temporal_stride,
                                 fast_temporal_stride=opt.fast_temporal_stride,
                                 data_aug=opt.data_aug,
                                 lazy_init=True)

    start_time = time.time()
    arr=np.empty((1,2048))
    for vid, vline in enumerate(data_list):
        video_path = vline.split()[0]
        video_name = video_path.split('/')[-1]
        if opt.need_root:
            video_path = os.path.join(opt.data_dir, video_path)
        video_data = read_data(opt, video_path, transform_test, video_utils)
        video_input = video_data.as_in_context(context)
        video_feat = net(video_input.astype(opt.dtype, copy=False))

        feat_file = '%s_%s_feat.npy' % (model_name, video_name)
        # np.save(os.path.join(opt.save_dir, feat_file), video_feat.asnumpy())
        np.append(arr,video_feat.asnumpy(),axis=)
        print(arr.shape)
        if vid > 0 and vid % opt.log_interval == 0:
            logger.info('%04d/%04d is done' % (vid, len(data_list)))
    np.save(os.path.join(opt.save_dir, 'file'), arr)
    end_time = time.time()
    logger.info('Total feature extraction time is %4.2f minutes' % ((end_time - start_time) / 60))

if __name__ == '__main__':
    logging.basicConfig()
    logger = logging.getLogger('logger')
    logger.setLevel(logging.INFO)

    main(logger)

INFO:logger:<__main__.parseArgs object at 0x7fca89303ad0>
INFO:logger:Pre-trained model is successfully loaded from the model zoo.
INFO:logger:Successfully built model i3d_resnet50_v1_kinetics400
INFO:logger:Load 3 video samples.


(1, 2048)
(1, 2048)


INFO:logger:Total feature extraction time is 0.41 minutes


(1, 2048)


In [12]:
import numpy as np
data=np.load('features/video1.npy')
print(data.shape)
data2=np.load('features/video2.npy')
print(data2.shape)
data3=np.load('features/video3.npy')
print(data3.shape)


(1, 2048)
(1, 2048)
(1, 2048)


In [17]:
import numpy as np
data=np.load('features/file.npy')
print(data)
print(data.shape)

[[ 4.68322845e-310 -2.24670436e-209 -1.00299167e-267 ...  3.40382941e-015
   1.35907912e-013  9.12023447e-016]]
(1, 2048)
