In [None]:
# Install decord if necessary
!pip install --upgrade decord

Collecting decord
[?25l  Downloading https://files.pythonhosted.org/packages/64/5e/e2be6a3a3a46275059574d9c6a1d422aa6c7c3cbf6614939b8a3c3f8f2d5/decord-0.5.2-py3-none-manylinux2010_x86_64.whl (14.1MB)
[K     |████████████████████████████████| 14.1MB 250kB/s 
Installing collected packages: decord
Successfully installed decord-0.5.2


Load pretrained model

In [3]:
#%% Define and load model
from pathlib import Path
import torch
import torch.nn as nn
import torchvision.models as models

with_cuda = False
# Define and load model
from pathlib import Path
import torch
import torch.nn as nn
import torchvision.models as models

with_cuda = False
path_prefix = Path('..')

if with_cuda:
    resnet50 = models.resnet50(pretrained=False, progress=True, num_classes=339).to('cuda')
else:
    resnet50 = models.resnet50(pretrained=False, progress=True, num_classes=339)

# Load pretrained weights (MiTv1)
#path_model = Path('/content/drive/MyDrive/resnet50_moments-fd0c4436.pth')
path_model = path_prefix / 'models/resnet50_moments-fd0c4436.pth'
resnet50.load_state_dict(torch.load(path_model))

# Evaluation mode
resnet50.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

Define transformations

In [4]:
#%% Transformations
import torchvision.transforms as transforms
transformation = transforms.Compose([
                                     transforms.ToPILImage(mode='RGB'), # required if the input image is a nd.array
                                     transforms.Resize(224), # To be changed to rescale to keep the aspect ration?
                                     transforms.CenterCrop((224, 224)),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                          std=[0.229, 0.224, 0.225])
])

Load categories

In [5]:
# %% Load categories

path_labels = path_prefix / 'labels/category_momentsv1.txt'

def load_categories():
    """Load categories."""
    with open(path_labels) as f:
        return [line.rstrip() for line in f.readlines()]

# load categories
categories = load_categories()

In [None]:
Load videos

In [6]:
#%% Sweep through files in subfolders of path_input
import os
path_input = path_prefix / 'data/MIT_sampleVideos_RAW_final'

l_videos = []
for path, subdirs, files in os.walk(path_input):
  for name in files:
    if name[-3:] == 'mp4':
      l_videos.append([path.split('/')[-1],   # category
                       name])                 # file name
    else:
      print('Ignored: ', name)

if l_videos:
  l_videos = sorted(l_videos)
print('Total nr. of MP4s: ', len(l_videos))


Ignored:  .DS_Store
Total nr. of MP4s:  1544


In [7]:
print(l_videos[:10])

[['aiming', 'yt-0gwUV4Ze-Hs_390.mp4'], ['aiming', 'yt-0qYbATyHm2A_59.mp4'], ['aiming', 'yt-2yYb3iQCivw_130.mp4'], ['aiming', 'yt-chT_6aIyhD4_47.mp4'], ['aiming', 'yt-fG9wZzs4jis_124.mp4'], ['aiming', 'yt-fM2iXUuaP7U_48.mp4'], ['aiming', 'yt-iVSy96zolvw_23.mp4'], ['applauding', 'yt-06tUmXhgnSY_4.mp4'], ['applauding', 'yt-A70byjNkwdA_4.mp4'], ['applauding', 'yt-E14-2TmbCD8_12.mp4']]


Extract prediction accuracies on MIFs (most informative frame, i.e. frame w/ highest prediction accuracy)

In [8]:
# %% Sweep through videos
import time
import decord
decord.bridge.set_bridge('native') # Seems to be the fastest option
from decord import cpu, gpu
from decord import VideoReader
from torch.nn import functional as F
import numpy as np
import pandas as pd

start = time.time()

l_mifs = []

# Iterate over entries in l_videos:
for j in range(len(l_videos[:10])):
  # Verbose
  if j%50 == 0:
    print(f'{j}/{len(l_videos)}')
  
  
  # Define path
  category, file_name = l_videos[j]
  cat_idx = categories.index(category)
  path_input_file = str(path_input / category/ file_name)
  
  
  # Load video with Decord.VideoReader
  vr = VideoReader(path_input_file)
  video_frames = vr.get_batch(range(0, len(vr), 1)).asnumpy()
  
  # Define empty array for accuracies
  pred_accuracies = np.zeros((video_frames.shape[0], ))

  # Iterate over frames
  for i in range(video_frames.shape[0]):
    if with_cuda:
      input = transformation(video_frames[i]).to('cuda')
    else:
      input = transformation(video_frames[i])
    
    # Classification:
    logit = resnet50.forward(input.unsqueeze(0))      # extract output to given input 
    h_x = F.softmax(logit, 1).data.squeeze()[cat_idx] # transform to softmax
    pred_accuracies[i]= h_x

  
  # Append to output list
  l_mifs.append([category, file_name,
                 np.argmax(pred_accuracies),
                 pred_accuracies[np.argmax(pred_accuracies)]])
   
stop = time.time()
duration = stop-start
print(f'\nTime elapsed:: {duration:.4f}s (~ {duration/j:.2f}s per file)')

0/1544

Time elapsed:: 199.2183s (~ 22.14s per file)


Stack together and save to csv as: \\
  `category, fname, mif_idx`

In [9]:
import pandas as pd

df = pd.DataFrame(l_mifs, columns=['category', 'fname', 'mif_idx', 'softmax[category]'])
print(df)
df.to_csv(path_prefix / 'saved/mifs.csv')

     category                   fname  mif_idx  softmax[category]
0      aiming  yt-0gwUV4Ze-Hs_390.mp4       58           0.607241
1      aiming   yt-0qYbATyHm2A_59.mp4        4           0.068653
2      aiming  yt-2yYb3iQCivw_130.mp4       41           0.407697
3      aiming   yt-chT_6aIyhD4_47.mp4        8           0.019289
4      aiming  yt-fG9wZzs4jis_124.mp4        1           0.354655
5      aiming   yt-fM2iXUuaP7U_48.mp4        0           0.051184
6      aiming   yt-iVSy96zolvw_23.mp4       43           0.270948
7  applauding    yt-06tUmXhgnSY_4.mp4       48           0.062576
8  applauding    yt-A70byjNkwdA_4.mp4        0           0.026515
9  applauding   yt-E14-2TmbCD8_12.mp4        1           0.540867


TESTING

In [16]:
from pathlib import Path
import pickle
import numpy as np

dict_path = path_prefix / 'saved/ResNet50_MiTv1_accuracies_per_category.pkl'
# Load from file
f = open(dict_path, 'rb')
accuracies_per_category = pickle.load(f)

l_categories = categories

category_name = 'aiming'
video_fname = 'yt-0gwUV4Ze-Hs_390.mp4'

per_frame_accuracies = np.array(accuracies_per_category[category_name][video_fname])

print(f'\t{video_fname} : Max/Min accuracy at frame:' \
f' {np.argmax(per_frame_accuracies)}/{np.argmin(per_frame_accuracies)}' \
f' with value: {per_frame_accuracies[np.argmax(per_frame_accuracies)]}' \
f' / {per_frame_accuracies[np.argmin(per_frame_accuracies)]}')

	yt-0gwUV4Ze-Hs_390.mp4 : Max/Min accuracy at frame: 58/23 with value: [0.60724086] / [0.083207]


In [29]:
df.loc[lambda df: df['fname'] == video_fname]

Unnamed: 0,category,fname,mif_idx,softmax[category]
0,aiming,yt-0gwUV4Ze-Hs_390.mp4,58,0.607241
