# two-stream-action-recognition

In [1]:
import numpy as np
import pickle
import os
from PIL import Image
import time
from tqdm import tqdm
import shutil
from random import randint
import argparse
import json

import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
import torch
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.optim.lr_scheduler import ReduceLROnPlateau

import dataloader
from utils import *
from network import *

Specify your GPUs

In [5]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

Modify **path** and **ucf_list** fields

In [3]:
data_loader = dataloader.spatial_dataloader(
                        BATCH_SIZE=32,
                        num_workers=8,
                        path='/mnt/act/jpegs_256',
                        ucf_list ='/home/lin/two-stream-action-recognition/UCF_list/',
                        ucf_split ='01', 
                        )

==> (Training video, Validation video):( 9537 3783 )


In [4]:
train_loader, test_loader, test_video = data_loader.run()

==> sampling testing frames
==> Training data : 9537 frames
torch.Size([3, 224, 224])
==> Validation data : 71877 frames
torch.Size([3, 224, 224])


  "please use transforms.Resize instead.")


Modify the **resume** to the pre-trained model file path

In [5]:
nb_epochs=0
lr=5e-4
batch_size=32
resume='/home/lin/model_best.pth.tar'
start_epoch=0
evaluate='evaluate'
train_loader=train_loader
test_loader=test_loader
test_video=test_video

In [6]:
model = resnet101(pretrained= True, channel=3).cuda()

In [7]:
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr, momentum=0.9)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=1,verbose=True)
tempmodel = model
# if you want to run on multi GPUs, uncomment next line and set device_ids.
#model = nn.DataParallel(model,device_ids=[0,1])

In [8]:
if resume:
    if os.path.isfile(resume):
        print("==> loading checkpoint '{}'".format(resume))
        checkpoint = torch.load(resume)
        start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        tempmodel.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("==> loaded checkpoint '{}' (epoch {}) (best_prec1 {})".format(resume, checkpoint['epoch'], best_prec1))
    else:
        print("==> no checkpoint found at '{}'".format(resume))

==> loading checkpoint '/home/lin/model_best.pth.tar'
==> loaded checkpoint '/home/lin/model_best.pth.tar' (epoch 31) (best_prec1 82.1305847168)


In [9]:
def frame2_video_level_accuracy():      
    correct = 0
    video_level_preds = np.zeros((len(dic_video_level_preds),101))
    video_level_labels = np.zeros(len(dic_video_level_preds))
    ii=0
    for name in sorted(dic_video_level_preds.keys()):
    
        preds = dic_video_level_preds[name]
        label = int(test_video[name])-1
            
        video_level_preds[ii,:] = preds
        video_level_labels[ii] = label
        ii+=1         
        if np.argmax(preds) == (label):
            correct+=1

    #top1 top5
    video_level_labels = torch.from_numpy(video_level_labels).long()
    video_level_preds = torch.from_numpy(video_level_preds).float()
        
    top1,top5 = accuracy(video_level_preds, video_level_labels, topk=(1,5))
    loss = criterion(Variable(video_level_preds).cuda(), Variable(video_level_labels).cuda())     
                        
    top1 = float(top1.numpy())
    top5 = float(top5.numpy())
        
    #print(' * Video level Prec@1 {top1:.3f}, Video level Prec@5 {top5:.3f}'.format(top1=top1, top5=top5))
    return top1,top5,loss.data.cpu().numpy()

In [10]:
dic_video_level_preds={}
def validate_1epoch():
    print('==> Epoch:[{0}/{1}][validation stage]'.format(epoch, nb_epochs))
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    # switch to evaluate mode
    model.eval()
    #dic_video_level_preds={}
    end = time.time()
    progress = tqdm(test_loader)
    print type(progress)
    for i, (keys,data,label) in enumerate(progress):
        print keys,label
        label = label.cuda(async=True)
        data_var = Variable(data, volatile=True).cuda(async=True)
        label_var = Variable(label, volatile=True).cuda(async=True)

        # compute output
        output = model(data_var)
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        #Calculate video level prediction
        preds = output.data.cpu().numpy()
        #print preds
        nb_data = preds.shape[0]
        for j in range(nb_data):
            videoName = keys[j].split('/',1)[0]
            if videoName not in dic_video_level_preds.keys():
                dic_video_level_preds[videoName] = preds[j,:]
            else:
                dic_video_level_preds[videoName] += preds[j,:]
    video_top1, video_top5, video_loss = frame2_video_level_accuracy()
    info = {'Epoch':[epoch],
            'Batch Time':[round(batch_time.avg,3)],
            'Loss':[round(video_loss,5)],
            'Prec@1':[round(video_top1,3)],
            'Prec@5':[round(video_top5,3)]}
    record_info(info, 'record/spatial/rgb_test.csv','test')
    return video_top1, video_loss

run validating

In [3]:
if evaluate:
    epoch = 0
    prec1, val_loss = validate_1epoch()