In [20]:
import os
import shutil
import time

from PIL import Image
import torch.backends.cudnn as cudnn
import torch.nn.parallel
import torch.optim
from torch.nn.utils import clip_grad_norm

from dataset import TSNDataSet
from models import TSN
from opts import parser
from transforms import *

In [2]:
args_str = """ucf101 RGB \
    /media/e/vsd/data/ucf101_preprocessed/split_01/file_lists/train_rgb.txt \
    /media/e/vsd/data/ucf101_preprocessed/split_01/file_lists/test_rgb.txt \
    --arch BNInception --num_segments 3 \
    --gd 20 --lr 0.001 --lr_steps 30 60 --epochs 80 \
    -b 32 -j 4 \
    --snapshot_pref ucf101_bninception_"""

args = parser.parse_args(args_str.split())

In [3]:
arch = 'BNInception'
batch_size = 4
clip_gradient = 20.0
consensus_type = 'avg'
dataset = 'ucf101'
dropout = 0.5
epochs = 80
eval_freq = 5
evaluate = False
flow_prefix = ''
gpus = None
k = 3
loss_type = 'nll'
lr = 0.001
lr_steps = [30.0, 60.0]
modality = 'RGB'
momentum = 0.9
no_partialbn = False
num_segments = 3
print_freq = 10
resume = False
snapshot_pref = 'ucf101_bninception_'
start_epoch = 0
train_list = '/media/e/vsd/data/ucf101_preprocessed/split_01/file_lists/train_rgb.txt'
val_list = '/media/e/vsd/data/ucf101_preprocessed/split_01/file_lists/test_rgb.txt'
weight_decay = 0.0005
workers = 4

num_class = 108

checkpoint_path = '/media/d/vsd/tsn-pytorch/ucf101_bninception__rgb_model_best.pth.tar'

In [4]:
model = TSN(num_class, num_segments, modality,
                base_model=arch,
                consensus_type=consensus_type, dropout=dropout, partial_bn=not no_partialbn)

crop_size = model.crop_size
scale_size = model.scale_size
input_mean = model.input_mean
input_std = model.input_std
policies = model.get_optim_policies()
train_augmentation = model.get_augmentation()

model = torch.nn.DataParallel(model, device_ids=gpus).cuda()


Initializing TSN with base model: BNInception.
TSN Configurations:
    input_modality:     RGB
    num_segments:       3
    new_length:         1
    consensus_module:   avg
    dropout_ratio:      0.5
        


  own_state[name].copy_(param)


In [5]:
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint['state_dict'])

In [6]:
# if args.modality != 'RGBDiff':
#     normalize = GroupNormalize(input_mean, input_std)
# else:
#     normalize = IdentityTransform()

# if args.modality == 'RGB':
#     data_length = 1
# elif args.modality in ['Flow', 'RGBDiff']:
#     data_length = 5

data_length = 1
data_length = 5

normalize = IdentityTransform()

In [12]:
transform = torchvision.transforms.Compose([
                   GroupScale(int(scale_size)),
                   GroupCenterCrop(crop_size),
                   Stack(roll=arch == 'BNInception'),
                   ToTorchFormatTensor(div=arch != 'BNInception'),
                   normalize,
               ])

In [13]:
# val_loader = torch.utils.data.DataLoader(
#     TSNDataSet("", val_list, num_segments=num_segments,
#                new_length=data_length,
#                modality=modality,
#                image_tmpl="{:04d}.jpg" if modality in ['RGB', 'RGBDiff'] else "{:04d}.flo",
#                random_shift=False,
#                transform=torchvision.transforms.Compose([
#                    GroupScale(int(scale_size)),
#                    GroupCenterCrop(crop_size),
#                    Stack(roll=arch == 'BNInception'),
#                    ToTorchFormatTensor(div=arch != 'BNInception'),
#                    normalize,
#                ])),
#     batch_size=1, shuffle=False,
#     num_workers=workers, pin_memory=True)

In [8]:
model.eval()

Freezing BatchNorm2D except the first one.


DataParallel (
  (module): TSN (
    (base_model): BNInception (
      (conv1_7x7_s2): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (conv1_7x7_s2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (conv1_relu_7x7): ReLU (inplace)
      (pool1_3x3_s2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
      (conv2_3x3_reduce): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (conv2_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (conv2_relu_3x3_reduce): ReLU (inplace)
      (conv2_3x3): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2_3x3_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True)
      (conv2_relu_3x3): ReLU (inplace)
      (pool2_3x3_s2): MaxPool2d (size=(3, 3), stride=(2, 2), dilation=(1, 1))
      (inception_3a_1x1): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
      (inception_3a_1x1_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (

In [25]:
criterion = torch.nn.CrossEntropyLoss().cuda()

In [153]:
c = [1, 2, 3, 4]
c + c[-2:]

[1, 2, 3, 4, 3, 4]

In [158]:
SEGMENT_TIME = 3
SEQ_LENGTH = 25
FPS = 25

def split_frames(frames):
    def chunks(l, n):
        """Yield successive n-sized chunks from l."""
        for i in range(0, len(l), n):
            yield l[i:i + n]

    n_frames = len(frames)
    segment_length = FPS * SEGMENT_TIME

    for i, chunk in enumerate(chunks(frames, segment_length)):
        if len(chunk) != segment_length:
            times = segment_length // len(chunk)
            yield (chunk * times + chunk[:len(chunk) - segment_length * times],
                   i * SEGMENT_TIME, (i + 1) * SEGMENT_TIME)
        else:
            yield chunk, i * SEGMENT_TIME, (i + 1) * SEGMENT_TIME
        
        
def get_rgb_frames(rgb_frames_path):
    import glob
    
    return list(sorted(glob.glob(rgb_frames_path + '/' + '*jpg')))

def load_segment(frames):
    return [Image.open(frame).convert('RGB') for frame in frames]

def make_prediction_on_segment(segment):
    images = load_segment(segment)
    images = transform(images)
    images = images.unsqueeze(0)
    
    input_var = torch.autograd.Variable(images, volatile=True)
    
    output = model(input_var)
    
    _, pred = output.mean(dim=0).topk(5)
    return pred.cpu().data.numpy()

def build_class_index():
    return {i: class_ for i, class_ in enumerate(sorted(os.listdir('/media/d/vsd/data/ucf101/UCF-101/')))}

In [159]:
class_index = build_class_index()

In [160]:
rgb_frames_path = '/media/d/vsd/data/imitations_temp/rgb/IMG_0085/'
frames = get_rgb_frames(rgb_frames_path)

In [161]:
from tqdm import tqdm_notebook

preds = {}

for segment, start_time, end_time in tqdm_notebook(split_frames(frames)):
    preds[(start_time, end_time)] = [class_index[int(class_id)] for class_id in make_prediction_on_segment(segment)]

torch.Size([1, 225, 224, 224])
torch.Size([1, 225, 224, 224])

torch.Size([1, 225, 224, 224])
torch.Size([1, 225, 224, 224])
torch.Size([1, 225, 224, 224])
torch.Size([1, 225, 224, 224])
torch.Size([1, 414, 224, 224])



In [162]:
preds

{(0, 3): ['PizzaTossing',
  'shoot_bow',
  'MoppingFloor',
  'HammerThrow',
  'Bowling'],
 (3, 6): ['PizzaTossing',
  'Bowling',
  'BreastStroke',
  'BodyWeightSquats',
  'MoppingFloor'],
 (6, 9): ['Nunchucks',
  'ShavingBeard',
  'CleanAndJerk',
  'CricketBowling',
  'PizzaTossing'],
 (9, 12): ['shoot_bow', 'PlayingFlute', 'PizzaTossing', 'JumpRope', 'Archery'],
 (12, 15): ['BlowDryHair',
  'shoot_bow',
  'ApplyLipstick',
  'PizzaTossing',
  'Skiing'],
 (15, 18): ['PizzaTossing', 'shoot_gun', 'BlowDryHair', 'PushUps', 'Skiing'],
 (18, 21): ['Skiing',
  'PizzaTossing',
  'ApplyLipstick',
  'PlayingFlute',
  'shoot_bow']}

In [None]:
def make_predictions():
    def meke_prediction_on_segment(frames):
        frames = helpers.rescale_list(frames, SEQ_LENGTH)

        inception_frame_features = extractor_model.extract_on_batch(frames)
        inception_frame_features = np.expand_dims(inception_frame_features, axis=0)
        
        return model.model.predict_proba(inception_frame_features, verbose=0).ravel()
    
    def split_frames(frames):
        def chunks(l, n):
            """Yield successive n-sized chunks from l."""
            for i in range(0, len(l), n):
                yield l[i:i + n]
        
        n_frames = len(frames)
        segment_length = FPS * SEGMENT_TIME
        
        for i, chunk in enumerate(chunks(frames, segment_length)):
            if len(chunk) < SEQ_LENGTH:
                continue
            yield chunk, i * SEGMENT_TIME, (i + 1) * SEGMENT_TIME
        
    preds = {}
    
    for video in tqdm_notebook(get_videos(DATA_PATH), desc='Making predictions'):
        video_rgb_frames_path = os.path.join(RGB_FRAMES_PATH, video.name)
        frames = helpers.get_rgb_frames(video_rgb_frames_path)

        if SEGMENT_TIME:
            for segment, start_time, end_time in tqdm_notebook(split_frames(frames), leave=False):
                preds[(video.path, start_time, end_time)] = meke_prediction_on_segment(segment)
        else:
            preds[video.path] = meke_prediction_on_segment(frames)
        
    return preds

In [9]:
y_pred = []

for i, (input, target) in enumerate(val_loader):
    target = target.cuda(async=True)
    input_var = torch.autograd.Variable(input, volatile=True)
    target_var = torch.autograd.Variable(target, volatile=True)

    # compute output
    output = model(input_var)
    
    y_pred.extend(output)
    break