Install decord

In [1]:
!pip install --upgrade decord

Collecting decord
[?25l  Downloading https://files.pythonhosted.org/packages/64/5e/e2be6a3a3a46275059574d9c6a1d422aa6c7c3cbf6614939b8a3c3f8f2d5/decord-0.5.2-py3-none-manylinux2010_x86_64.whl (14.1MB)
[K     |████████████████████████████████| 14.1MB 245kB/s 
Installing collected packages: decord
Successfully installed decord-0.5.2


In [1]:
#%% Define and load model
from pathlib import Path
import torch
import torch.nn as nn
import torchvision.models as models

# Define
resnet50 = models.resnet50(pretrained=False, progress=True, num_classes=339).to('cuda')
# Load pretrained weights (MiTv1)
path_model = Path('/content/drive/MyDrive/resnet50_moments-fd0c4436.pth')
resnet50.load_state_dict(torch.load(path_model))
# Evaluation mode
resnet50.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [2]:
#%% Transformations
import torchvision.transforms as transforms
transformation = transforms.Compose([
                                     transforms.ToPILImage(mode='RGB'), # required if the input image is a nd.array
                                     transforms.Resize(224), # To be changed to rescale to keep the aspect ration?
                                     transforms.CenterCrop((224, 224)),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                          std=[0.229, 0.224, 0.225])
])

In [3]:
# %% Load categories
def load_categories():
    """Load categories."""
    with open(Path('/content/drive/MyDrive/category_momentsv1.txt')) as f:
        return [line.rstrip() for line in f.readlines()]

# load categories
categories = load_categories()

In [4]:
#%% Sweep through files in subfolders of path_input
import os
path_input = Path('/content/drive/MyDrive/MIT_sampleVideos_RAW_final_25FPS').absolute()

l_videos = []
for path, subdirs, files in os.walk(path_input):
  for name in files:
    if name[-3:] == 'mp4':
      l_videos.append([path.split('/')[-1],   # category
                       name])                 # file name
    else:
      print('Ignored: ', name)

if l_videos:
  l_videos = sorted(l_videos)
print('Total nr. of MP4s: ', len(l_videos))


Ignored:  readme.txt
Ignored:  .DS_Store
Total nr. of MP4s:  1458


In [None]:
np.array(l_videos)[100]

array(['balancing', 'yt-m3zrcNknWVE_253.mp4'], dtype='<U131')

In [None]:
# %% Sweep through videos
import time
import decord
decord.bridge.set_bridge('native') # Seems to be the fastest option
from decord import cpu, gpu
from decord import VideoReader
from torch.nn import functional as F
import numpy as np
import pandas as pd

vervbose = True

start = time.time()

#arr_mifs = np.empty(len(l_videos), dtype=np.int8)
l_mifs = []

for j in range(len(l_videos)):
  category, file_name = l_videos[j]
  # Verbose
  if j%50 == 0:
    print(f'{j}/{len(l_videos)}')

  cat_idx = categories.index(category)
  path_input_file = str(path_input / category/ file_name)
  
  # Load video with Decord.VideoReader
  vr = VideoReader(path_input_file)
  video_frames = vr.get_batch(range(0, len(vr), 1)).asnumpy()
  
  #pred_accuracies = torch.zeros((video_frames.shape[0], ))
  pred_accuracies = np.zeros((video_frames.shape[0], ))

  for i in range(video_frames.shape[0]):
    input = transformation(video_frames[i]).to('cuda')
    
    # Classification:
    logit = resnet50.forward(input.unsqueeze(0))
    #h_x = F.softmax(logit, 1).data.squeeze().tolist()
    h_x = F.softmax(logit, 1).data.squeeze()[cat_idx]
    pred_accuracies[i]= h_x

  #arr_mifs[j] = np.argmax(pred_accuracies)

  l_mifs.append([category, file_name,
                 np.argmax(pred_accuracies),
                 pred_accuracies[np.argmax(pred_accuracies)]])
  
#print(np.argmax(pred_accuracies), pred_accuracies[np.argmax(pred_accuracies)])
    
stop = time.time()
duration = stop-start
print(f'\nTime spent: {duration:.4f}s (~ {duration/j:.2f}s per file)')

0/1458
50/1458
100/1458


Stack together and save to csv as: \\
  `category, fname, mif_idx`

In [34]:
import pandas as pd

df = pd.DataFrame(l_mifs, columns=['category', 'fname', 'mif_idx', 'softmax[category]'])
print(df)
df.to_csv('/content/drive/MyDrive/MIT_sampleVideos_RAW_final_25FPS/mifs.csv')

     category                   fname  mif_idx  softmax[category]
0      aiming  yt-0gwUV4Ze-Hs_390.mp4       50           0.627705
1      aiming   yt-0qYbATyHm2A_59.mp4        2           0.055494
2      aiming  yt-2yYb3iQCivw_130.mp4       36           0.409956
3      aiming   yt-chT_6aIyhD4_47.mp4        6           0.021016
4      aiming  yt-fG9wZzs4jis_124.mp4        1           0.331710
5      aiming   yt-fM2iXUuaP7U_48.mp4        0           0.050344
6      aiming   yt-iVSy96zolvw_23.mp4       37           0.208761
7  applauding    yt-06tUmXhgnSY_4.mp4       43           0.058240
8  applauding    yt-A70byjNkwdA_4.mp4        0           0.026100
9  applauding   yt-E14-2TmbCD8_12.mp4        1           0.539783


TESTING

In [23]:
# %%
from pathlib import Path
import pickle
import numpy as np
path_prefix = Path().parent.absolute()
dict_path = path_prefix / '/content/drive/MyDrive/accuracies_per_category.pkl'
# Load from file
f = open(dict_path, 'rb')
accuracies_per_category = pickle.load(f)

#%%
l_categories = categories

category_name = 'aiming'
video_fname = 'yt-2yYb3iQCivw_130.mp4'

per_frame_accuracies = np.array(accuracies_per_category[category_name][video_fname])

print(f'\t{video_fname} : Max/Min accuracy at frame:' \
f' {np.argmax(per_frame_accuracies)}/{np.argmin(per_frame_accuracies)}' \
f' with value: {per_frame_accuracies[np.argmax(per_frame_accuracies)]}' \
f' / {per_frame_accuracies[np.argmin(per_frame_accuracies)]}')
# %%

	yt-2yYb3iQCivw_130.mp4 : Max/Min accuracy at frame: 41/83 with value: [0.4076968] / [0.02076938]
