# Action Recognition with 3D ResNet

In [22]:
import os
import sys
import json
import subprocess
import numpy as np
import torch
from torch import nn

from opts import parse_opts
#from model import generate_model
from mean import get_mean
from classify import classify_video
import torch
from torch import nn

from models import resnet, pre_act_resnet, wide_resnet, resnext, densenet

In [5]:
import argparse
opt = argparse.ArgumentParser()

In [195]:
opt.input = 'input'
#set video_root to your video files' root absolute path
opt.video_root = '/home/lin/Surfing/'
#set model to your pytorch model file absolute path
opt.model = '/home/lin/resnext-101-kinetics-ucf101_split1.pth'
opt.mode = 'score'
opt.batch_size = 32
opt.n_threads = 4
opt.model_name = 'resnext'
opt.model_depth = 101
opt.resnet_shortcut = 'B'
opt.wide_resnet_k = 2
opt.resnext_cardinality = 32
opt.no_cuda = False
opt.verbose = True
#result will be write to output file
opt.output = 'output.json'

In [198]:
with open('input','w') as f:
    for item in os.listdir(opt.video_root):
        f.write(item+'\n')

In [196]:
opt.mean = get_mean()
opt.arch = '{}-{}'.format(opt.model_name, opt.model_depth)
opt.sample_size = 112
opt.sample_duration = 16
opt.n_classes = 101

In [136]:
def generate_model(opt):
    assert opt.mode in ['score', 'feature']
    if opt.mode == 'score':
        last_fc = True
    elif opt.mode == 'feature':
        last_fc = False

    assert opt.model_name in ['resnet', 'preresnet', 'wideresnet', 'resnext', 'densenet']

    if opt.model_name == 'resnet':
        assert opt.model_depth in [10, 18, 34, 50, 101, 152, 200]

        if opt.model_depth == 10:
            model = resnet.resnet10(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                    sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                    last_fc=last_fc)
        elif opt.model_depth == 18:
            model = resnet.resnet18(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                    sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                    last_fc=last_fc)
        elif opt.model_depth == 34:
            model = resnet.resnet34(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                    sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                    last_fc=last_fc)
        elif opt.model_depth == 50:
            model = resnet.resnet50(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                    sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                    last_fc=last_fc)
        elif opt.model_depth == 101:
            model = resnet.resnet101(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                     sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                     last_fc=last_fc)
        elif opt.model_depth == 152:
            model = resnet.resnet152(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                     sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                     last_fc=last_fc)
        elif opt.model_depth == 200:
            model = resnet.resnet200(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                     sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                     last_fc=last_fc)
    elif opt.model_name == 'wideresnet':
        assert opt.model_depth in [50]

        if opt.model_depth == 50:
            model = wide_resnet.resnet50(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, k=opt.wide_resnet_k,
                                         sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                         last_fc=last_fc)
    elif opt.model_name == 'resnext':
        assert opt.model_depth in [50, 101, 152]

        if opt.model_depth == 50:
            model = resnext.resnet50(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, cardinality=opt.resnext_cardinality,
                                     sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                     last_fc=last_fc)
        elif opt.model_depth == 101:
            model = resnext.resnet101(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, cardinality=opt.resnext_cardinality,
                                      sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                      last_fc=last_fc)
        elif opt.model_depth == 152:
            model = resnext.resnet152(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut, cardinality=opt.resnext_cardinality,
                                      sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                      last_fc=last_fc)
    elif opt.model_name == 'preresnet':
        assert opt.model_depth in [18, 34, 50, 101, 152, 200]

        if opt.model_depth == 18:
            model = pre_act_resnet.resnet18(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                            sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                            last_fc=last_fc)
        elif opt.model_depth == 34:
            model = pre_act_resnet.resnet34(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                            sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                            last_fc=last_fc)
        elif opt.model_depth == 50:
            model = pre_act_resnet.resnet50(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                            sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                            last_fc=last_fc)
        elif opt.model_depth == 101:
            model = pre_act_resnet.resnet101(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                             sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                             last_fc=last_fc)
        elif opt.model_depth == 152:
            model = pre_act_resnet.resnet152(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                             sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                             last_fc=last_fc)
        elif opt.model_depth == 200:
            model = pre_act_resnet.resnet200(num_classes=opt.n_classes, shortcut_type=opt.resnet_shortcut,
                                             sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                             last_fc=last_fc)
    elif opt.model_name == 'densenet':
        assert opt.model_depth in [121, 169, 201, 264]

        if opt.model_depth == 121:
            model = densenet.densenet121(num_classes=opt.n_classes,
                                         sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                         last_fc=last_fc)
        elif opt.model_depth == 169:
            model = densenet.densenet169(num_classes=opt.n_classes,
                                         sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                         last_fc=last_fc)
        elif opt.model_depth == 201:
            model = densenet.densenet201(num_classes=opt.n_classes,
                                         sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                         last_fc=last_fc)
        elif opt.model_depth == 264:
            model = densenet.densenet264(num_classes=opt.n_classes,
                                         sample_size=opt.sample_size, sample_duration=opt.sample_duration,
                                         last_fc=last_fc)

    if not opt.no_cuda:
        model = model.cuda()
        model = nn.DataParallel(model, device_ids=None)

    return model

In [137]:
model = generate_model(opt)
print('loading model {}'.format(opt.model))
model_data = torch.load(opt.model)
print(opt.arch == model_data['arch'])

loading model /home/lin/resnext-101-kinetics-ucf101_split1.pth
True


In [142]:
#from collections import OrderedDict
#new_state_dict = OrderedDict()
#for k,v in 

In [143]:
model.load_state_dict(model_data['state_dict'])

In [144]:
model.eval()
if opt.verbose:
    print(model)

DataParallel(
  (module): ResNeXt(
    (conv1): Conv3d(3, 64, kernel_size=(7, 7, 7), stride=(1, 2, 2), padding=(3, 3, 3), bias=False)
    (bn1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (maxpool): MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): ResNeXtBottleneck(
        (conv1): Conv3d(64, 128, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
        (bn1): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), groups=32, bias=False)
        (bn2): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv3d(128, 256, kernel_size=(1, 1, 1), stride=(1, 1, 1), bias=False)
        (bn3): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): R

In [199]:
input_files = []
with open(opt.input, 'r') as f:
    for row in f:
        input_files.append(row[:-1])

### Display videos in your folder

In [200]:
input_files

['v_Surfing_g02_c04.avi',
 'v_Surfing_g01_c05.avi',
 'v_Surfing_g07_c02.avi',
 'v_Surfing_g04_c04.avi',
 'v_Surfing_g08_c02.avi',
 'v_Surfing_g02_c05.avi',
 'v_Surfing_g07_c03.avi',
 'v_Surfing_g05_c03.avi',
 'v_Surfing_g03_c03.avi',
 'v_Surfing_g02_c06.avi',
 'v_Surfing_g05_c04.avi',
 'v_Surfing_g01_c03.avi',
 'v_Surfing_g06_c01.avi',
 'v_Surfing_g04_c03.avi',
 'v_Surfing_g09_c02.avi',
 'v_Surfing_g09_c04.avi',
 'v_Surfing_g05_c01.avi',
 'v_Surfing_g06_c04.avi',
 'v_Surfing_g06_c02.avi',
 'v_Surfing_g01_c04.avi',
 'v_Surfing_g03_c01.avi',
 'v_Surfing_g09_c03.avi',
 'v_Surfing_g09_c01.avi',
 'v_Surfing_g02_c01.avi',
 'v_Surfing_g01_c07.avi',
 'v_Surfing_g01_c02.avi',
 'v_Surfing_g03_c04.avi',
 'v_Surfing_g08_c06.avi',
 'v_Surfing_g08_c01.avi',
 'v_Surfing_g04_c01.avi',
 'v_Surfing_g01_c01.avi',
 'v_Surfing_g02_c02.avi',
 'v_Surfing_g08_c03.avi',
 'v_Surfing_g04_c02.avi',
 'v_Surfing_g06_c03.avi',
 'v_Surfing_g01_c06.avi',
 'v_Surfing_g08_c05.avi',
 'v_Surfing_g02_c03.avi',
 'v_Surfing_

In [201]:
ucf_dict = {1: 'ApplyEyeMakeup', 2: 'ApplyLipstick', 3: 'Archery', 4: 'BabyCrawling', 5: 'BalanceBeam', 6: 'BandMarching', 7: 'BaseballPitch', 8: 'Basketball', 9: 'BasketballDunk', 10: 'BenchPress', 11: 'Biking', 12: 'Billiards', 13: 'BlowDryHair', 14: 'BlowingCandles', 15: 'BodyWeightSquats', 16: 'Bowling', 17: 'BoxingPunchingBag', 18: 'BoxingSpeedBag', 19: 'BreastStroke', 20: 'BrushingTeeth', 21: 'CleanAndJerk', 22: 'CliffDiving', 23: 'CricketBowling', 24: 'CricketShot', 25: 'CuttingInKitchen', 26: 'Diving', 27: 'Drumming', 28: 'Fencing', 29: 'FieldHockeyPenalty', 30: 'FloorGymnastics', 31: 'FrisbeeCatch', 32: 'FrontCrawl', 33: 'GolfSwing', 34: 'Haircut', 35: 'Hammering', 36: 'HammerThrow', 37: 'HandstandPushups', 38: 'HandstandWalking', 39: 'HeadMassage', 40: 'HighJump', 41: 'HorseRace', 42: 'HorseRiding', 43: 'HulaHoop', 44: 'IceDancing', 45: 'JavelinThrow', 46: 'JugglingBalls', 47: 'JumpingJack', 48: 'JumpRope', 49: 'Kayaking', 50: 'Knitting', 51: 'LongJump', 52: 'Lunges', 53: 'MilitaryParade', 54: 'Mixing', 55: 'MoppingFloor', 56: 'Nunchucks', 57: 'ParallelBars', 58: 'PizzaTossing', 59: 'PlayingCello', 60: 'PlayingDaf', 61: 'PlayingDhol', 62: 'PlayingFlute', 63: 'PlayingGuitar', 64: 'PlayingPiano', 65: 'PlayingSitar', 66: 'PlayingTabla', 67: 'PlayingViolin', 68: 'PoleVault', 69: 'PommelHorse', 70: 'PullUps', 71: 'Punch', 72: 'PushUps', 73: 'Rafting', 74: 'RockClimbingIndoor', 75: 'RopeClimbing', 76: 'Rowing', 77: 'SalsaSpin', 78: 'ShavingBeard', 79: 'Shotput', 80: 'SkateBoarding', 81: 'Skiing', 82: 'Skijet', 83: 'SkyDiving', 84: 'SoccerJuggling', 85: 'SoccerPenalty', 86: 'StillRings', 87: 'SumoWrestling', 88: 'Surfing', 89: 'Swing', 90: 'TableTennisShot', 91: 'TaiChi', 92: 'TennisSwing', 93: 'ThrowDiscus', 94: 'TrampolineJumping', 95: 'Typing', 96: 'UnevenBars', 97: 'VolleyballSpiking', 98: 'WalkingWithDog', 99: 'WallPushups', 100: 'WritingOnBoard', 101: 'YoYo'}

In [202]:
class_names = [ucf_dict[k] for k in ucf_dict.keys()]

In [203]:
ffmpeg_loglevel = 'quiet'
if opt.verbose:
    ffmpeg_loglevel = 'info'
if os.path.exists('tmp'):
    subprocess.call('rm -rf tmp', shell=True)

In [204]:
outputs = []
for input_file in input_files:
    video_path = os.path.join(opt.video_root, input_file)
    if os.path.exists(video_path):
        print(video_path)
        subprocess.call('mkdir tmp', shell=True)
        subprocess.call('ffmpeg -i {} tmp/image_%05d.jpg'.format(video_path),
                        shell=True)
        result = classify_video('tmp', input_file, class_names, model, opt)
        outputs.append(result)
        subprocess.call('rm -rf tmp', shell=True)
    else:
        print('{} does not exist'.format(input_file))
if os.path.exists('tmp'):
    subprocess.call('rm -rf tmp', shell=True)
with open(opt.output, 'w') as f:
    json.dump(outputs, f)

/home/lin/Surfing/v_Surfing_g02_c04.avi


  inputs = Variable(inputs, volatile=True)


/home/lin/Surfing/v_Surfing_g01_c05.avi
/home/lin/Surfing/v_Surfing_g07_c02.avi
/home/lin/Surfing/v_Surfing_g04_c04.avi
/home/lin/Surfing/v_Surfing_g08_c02.avi
/home/lin/Surfing/v_Surfing_g02_c05.avi
/home/lin/Surfing/v_Surfing_g07_c03.avi
/home/lin/Surfing/v_Surfing_g05_c03.avi
/home/lin/Surfing/v_Surfing_g03_c03.avi
/home/lin/Surfing/v_Surfing_g02_c06.avi
/home/lin/Surfing/v_Surfing_g05_c04.avi
/home/lin/Surfing/v_Surfing_g01_c03.avi
/home/lin/Surfing/v_Surfing_g06_c01.avi
/home/lin/Surfing/v_Surfing_g04_c03.avi
/home/lin/Surfing/v_Surfing_g09_c02.avi
/home/lin/Surfing/v_Surfing_g09_c04.avi
/home/lin/Surfing/v_Surfing_g05_c01.avi
/home/lin/Surfing/v_Surfing_g06_c04.avi
/home/lin/Surfing/v_Surfing_g06_c02.avi
/home/lin/Surfing/v_Surfing_g01_c04.avi
/home/lin/Surfing/v_Surfing_g03_c01.avi
/home/lin/Surfing/v_Surfing_g09_c03.avi
/home/lin/Surfing/v_Surfing_g09_c01.avi
/home/lin/Surfing/v_Surfing_g02_c01.avi
/home/lin/Surfing/v_Surfing_g01_c07.avi
/home/lin/Surfing/v_Surfing_g01_c02.avi


## Print Results

In [205]:
for item in outputs:
    print(item['video'])
    for subitem in item['clips']:
        print('\t','Segment:',subitem['segment'],'Predict: ',subitem['label'])

v_Surfing_g02_c04.avi
	 Segment: [1, 16] Predict:  Surfing
	 Segment: [17, 32] Predict:  Surfing
	 Segment: [33, 48] Predict:  Surfing
	 Segment: [49, 64] Predict:  Surfing
	 Segment: [65, 80] Predict:  Surfing
	 Segment: [81, 96] Predict:  Surfing
	 Segment: [97, 112] Predict:  Surfing
	 Segment: [113, 128] Predict:  Surfing
	 Segment: [129, 144] Predict:  Surfing
	 Segment: [145, 160] Predict:  Surfing
	 Segment: [161, 176] Predict:  Surfing
	 Segment: [177, 192] Predict:  Surfing
	 Segment: [193, 208] Predict:  Surfing
	 Segment: [209, 224] Predict:  Surfing
	 Segment: [225, 240] Predict:  Surfing
v_Surfing_g01_c05.avi
	 Segment: [1, 16] Predict:  Surfing
	 Segment: [17, 32] Predict:  Surfing
	 Segment: [33, 48] Predict:  Surfing
	 Segment: [49, 64] Predict:  Surfing
	 Segment: [65, 80] Predict:  Surfing
	 Segment: [81, 96] Predict:  Surfing
	 Segment: [97, 112] Predict:  Surfing
v_Surfing_g07_c02.avi
	 Segment: [1, 16] Predict:  Surfing
	 Segment: [17, 32] Predict:  Surfing
	 Segme

	 Segment: [209, 224] Predict:  Surfing
	 Segment: [225, 240] Predict:  Surfing
	 Segment: [241, 256] Predict:  Surfing
v_Surfing_g02_c03.avi
	 Segment: [1, 16] Predict:  Surfing
	 Segment: [17, 32] Predict:  Surfing
	 Segment: [33, 48] Predict:  Surfing
	 Segment: [49, 64] Predict:  Surfing
	 Segment: [65, 80] Predict:  Surfing
	 Segment: [81, 96] Predict:  Surfing
	 Segment: [97, 112] Predict:  Surfing
	 Segment: [113, 128] Predict:  Surfing
	 Segment: [129, 144] Predict:  Skijet
	 Segment: [145, 160] Predict:  Surfing
	 Segment: [161, 176] Predict:  Surfing
	 Segment: [177, 192] Predict:  Surfing
	 Segment: [193, 208] Predict:  Surfing
	 Segment: [209, 224] Predict:  Surfing
	 Segment: [225, 240] Predict:  Surfing
	 Segment: [241, 256] Predict:  Surfing
	 Segment: [257, 272] Predict:  Surfing
v_Surfing_g03_c02.avi
	 Segment: [1, 16] Predict:  Surfing
	 Segment: [17, 32] Predict:  Surfing
	 Segment: [33, 48] Predict:  Surfing
	 Segment: [49, 64] Predict:  Surfing
	 Segment: [65, 80] 