In [41]:
from __future__ import division

import argparse, time, logging, os, sys, math

import numpy as np
import mxnet as mx
import gluoncv as gcv
from mxnet import gluon, nd, init, context
from mxnet import autograd as ag
from mxnet.gluon import nn
from mxnet.gluon.data.vision import transforms

from gluoncv.data.transforms import video
from gluoncv.data import VideoClsCustom
from gluoncv.model_zoo import get_model
from gluoncv.utils import makedirs, LRSequential, LRScheduler, split_and_load, TrainingHistory

In [70]:
label_map={}
def generate_train_txt(path):
    i=0
    labels = os.listdir(path)
    videos = []
    with open('train.txt','w', encoding='utf-8') as f:
        for label in labels:
            j=0
            i+=1
            videos_per_label = os.listdir(f'{path}/{label}')
            for video in videos_per_label:
                os.rename(f'./dataset/train/{label}/{video}',f'./dataset/train/{label}/{label}_{j}.mp4')
                f.write(f'{label}/{label}_{j} 40 {i}\n')
                label_map[i] =label 
                j+=1


In [71]:
generate_train_txt('./dataset/train')

In [72]:
num_gpus = 1
ctx = [mx.gpu(i) for i in range(num_gpus)]
transform_train = video.VideoGroupTrainTransform(size=(224, 224), scale_ratios=[1.0, 0.8], mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
per_device_batch_size = 5
num_workers = 0
batch_size = per_device_batch_size * num_gpus


train_dataset = VideoClsCustom(root=os.path.expanduser('./dataset/train'),
                               setting=os.path.expanduser('./train.txt'),
                               video_loader=True,
                               video_ext='mp4',
                               train=True,
                               new_length=32,
                               transform=transform_train)
print('Load %d training samples.' % len(train_dataset))
train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size,
                                   shuffle=True, num_workers=num_workers)

Load 77 training samples.


In [101]:
model_name= 'i3d_inceptionv1_kinetics400'
net = get_model(name=model_name, nclass=9)
net.collect_params().reset_ctx(ctx)
print(net)

Downloading /home/dhia/.mxnet/models/googlenet-c7c89366.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/googlenet-c7c89366.zip...


100%|██████████| 13680/13680 [00:17<00:00, 801.46KB/s] 


i3d_inceptionv10_conv0_weight is done with shape:  (64, 3, 7, 7, 7)
i3d_inceptionv10_batchnorm0_gamma is done with shape:  (64,)
i3d_inceptionv10_batchnorm0_beta is done with shape:  (64,)
i3d_inceptionv10_batchnorm0_running_mean is done with shape:  (64,)
i3d_inceptionv10_batchnorm0_running_var is done with shape:  (64,)
i3d_inceptionv10_conv1_weight is done with shape:  (64, 64, 1, 1, 1)
i3d_inceptionv10_batchnorm1_gamma is done with shape:  (64,)
i3d_inceptionv10_batchnorm1_beta is done with shape:  (64,)
i3d_inceptionv10_batchnorm1_running_mean is done with shape:  (64,)
i3d_inceptionv10_batchnorm1_running_var is done with shape:  (64,)
i3d_inceptionv10_conv2_weight is done with shape:  (192, 64, 3, 3, 3)
i3d_inceptionv10_batchnorm2_gamma is done with shape:  (192,)
i3d_inceptionv10_batchnorm2_beta is done with shape:  (192,)
i3d_inceptionv10_batchnorm2_running_mean is done with shape:  (192,)
i3d_inceptionv10_batchnorm2_running_var is done with shape:  (192,)
i3d_inceptionv10_Mixe

In [102]:
# Learning rate decay factor
lr_decay = 0.1
# Epochs where learning rate decays
lr_decay_epoch = [40, 80, 100]

# Stochastic gradient descent
optimizer = 'sgd'
# Set parameters
optimizer_params = {'learning_rate': 0.001, 'wd': 0.0001, 'momentum': 0.9}

# Define our trainer for net
trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

In [103]:
loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()

In [104]:
train_metric = mx.metric.Accuracy()
train_history = TrainingHistory(['training-acc'])

In [115]:
epochs = 250
lr_decay_count = 2

for epoch in range(epochs):
    tic = time.time()
    train_metric.reset()
    train_loss = 0

    # Learning rate decay
    if epoch == lr_decay_epoch[lr_decay_count]:
        trainer.set_learning_rate(trainer.learning_rate*lr_decay)
        lr_decay_count += 1

    # Loop through each batch of training data
    for i, batch in enumerate(train_data):
        # Extract data and label
        data = split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        label = split_and_load(batch[1], ctx_list=ctx, batch_axis=0)

        # AutoGrad
        with ag.record():
            output = []
            for _, X in enumerate(data):
                X = X.reshape((-1,) + X.shape[2:])
                pred = net(X)
                output.append(pred)
            loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]

        # Backpropagation
        for l in loss:
            l.backward()

        # Optimize
        trainer.step(batch_size)

        # Update metrics
        train_loss += sum([l.mean().asscalar() for l in loss])
        train_metric.update(label, output)

        if i == 100:
            break

    name, acc = train_metric.get()

    # Update history and print metrics
    train_history.update([acc])
    print('[Epoch %d] train=%f loss=%f time: %f' %
        (epoch, acc, train_loss / (i+1), time.time()-tic))

# We can plot the metric scores with:
train_history.plot()

[Epoch 0] train=0.337662 loss=1.250979 time: 47.223980
[Epoch 1] train=0.428571 loss=1.144966 time: 41.943938
[Epoch 2] train=0.454545 loss=1.187397 time: 41.329581
[Epoch 3] train=0.506494 loss=0.972262 time: 42.408585
[Epoch 4] train=0.493506 loss=0.875509 time: 40.235074
[Epoch 5] train=0.506494 loss=0.867895 time: 40.003082
[Epoch 6] train=0.506494 loss=0.897169 time: 42.179425
[Epoch 7] train=0.519481 loss=0.966040 time: 40.391616
[Epoch 8] train=0.649351 loss=0.664604 time: 39.975979
[Epoch 9] train=0.506494 loss=0.826028 time: 39.560357
[Epoch 10] train=0.662338 loss=0.515671 time: 39.333470
[Epoch 11] train=0.623377 loss=0.627985 time: 38.985063
[Epoch 12] train=0.623377 loss=0.658354 time: 39.092310
[Epoch 13] train=0.623377 loss=0.543455 time: 39.413028


In [106]:
net.save_parameters('./models/kinetics_i3D_default')

In [114]:
from gluoncv.utils.filesystem import try_import_decord
decord = try_import_decord()

video_fname = 'dataset/test/sourd.mp4'
vr = decord.VideoReader(video_fname)
frame_id_list = range(0, 64, 2)
video_data = vr.get_batch(frame_id_list).asnumpy()
clip_input = [video_data[vid, :, :, :] for vid, _ in enumerate(frame_id_list)]

transform_fn = video.VideoGroupValTransform(size=(224, 224), mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
clip_input = transform_fn(clip_input)
clip_input = np.stack(clip_input, axis=0)
clip_input = clip_input.reshape((-1,) + (32, 3, 224, 224))
clip_input = np.transpose(clip_input, (0, 2, 1, 3, 4))
print('Video data preprocessed.')

# Running the prediction
pred = net(nd.array(clip_input,  ctx = mx.gpu(0)))
topK = 9
ind = nd.topk(pred, k=topK)[0].astype('int')
print('The input video clip is classified to be')
for i in range(topK):
    print('\t[%s]: %.3f'%
          (label_map[ind[i].asscalar()+1], nd.softmax(pred)[0][ind[i]].asscalar()))

Video data preprocessed.
The input video clip is classified to be
	[Sourd]: 0.504
	[Douleur]: 0.189
	[Rendez-vous]: 0.150
	[Analyse]: 0.085
	[Epaule]: 0.026
	[Coeur]: 0.024
	[Vaccin]: 0.016
	[diarrhee]: 0.005
	[salut_cava]: 0.002
