In [1]:
from easydict import EasyDict as edict
from PIL import Image
import torch, torchvision
from torchvision.datasets.video_utils import VideoClips
import os, json
import numpy as np
from torch.utils.data import Dataset, DataLoader
from time import time
import pickle as pkl
from collections import Counter as C

_C = edict()
_C.NETWORK = edict()
_C.NETWORK.BACKBONE = "res18"
_C.NETWORK.BACKBONE_LOAD_PRETRAINED = True

_C.NETWORK.TEMPORAL_MLP_DIMS = 512
_C.NETWORK.TEMPORAL_MLP_ACTIVATION = "LeakyReLU"

_C.NETWORK.TRANSFORMER_DIMS = 512
_C.NETWORK.TRANSFORMER_HEADS = 8
_C.NETWORK.TRANSFORMER_ENCODER_CNT = 8
_C.NETWORK.TRANSFORMER_DROPOUT = 0.1
_C.NETWORK.TRANSFORMER_FEEDFORWARD_DIMS = 2048

_C.NETWORK.POSITIONAL_DROPOUT = 0.1
_C.NETWORK.NUM_CLASSES = 700
_C

{'NETWORK': {'BACKBONE': 'res18',
  'BACKBONE_LOAD_PRETRAINED': True,
  'TEMPORAL_MLP_DIMS': 512,
  'TEMPORAL_MLP_ACTIVATION': 'LeakyReLU',
  'TRANSFORMER_DIMS': 512,
  'TRANSFORMER_HEADS': 8,
  'TRANSFORMER_ENCODER_CNT': 8,
  'TRANSFORMER_DROPOUT': 0.1,
  'TRANSFORMER_FEEDFORWARD_DIMS': 2048,
  'POSITIONAL_DROPOUT': 0.1,
  'NUM_CLASSES': 700}}

In [2]:
basepath = '/data/datasets/kinetics700_2020/'
val_dataset = json.load(open(basepath + "full/val.json"))

In [3]:
list(val_dataset.items())[990]

('47tFTCZxPNc',
 {'annotations': {'label': ['karaoke', 596], 'segment': [18.0, 28.0]},
  'duration': 10.0,
  'subset': 'validate',
  'url': 'https://www.youtube.com/watch?v=47tFTCZxPNc',
  'nb_frames': '150',
  'hw': [144, 192],
  'true_duration': '10.000000'})

In [4]:
from time import time
def bench(cmd,iter):
    tic = time()
    for i in range(iter):
        cmd()
    print(time() - tic)

In [5]:
class load_and_dump(Dataset):
    def __init__(self, dataset, video_folder, save_folder):
        super(load_and_dump, self).__init__()
        assert video_folder[-1] == '/'
        assert save_folder[-1] == '/'

        self.dataset = list(dataset.items())
        self.video_path = video_folder
        self.save_path = save_folder
        assert self.video_path != self.save_path

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        k, v = self.dataset[idx]

        vid_path = self.video_path + k + '.mp4'
        save_path = self.save_path + k + '.mp4' 

        # Remove Audio, Subtitles ; Resize Video to 224X224 and 10 fps
        #os.system("ffmpeg -i " + vid_path + ' -vf "fps=10,scale=224:224" -an -sn ' + save_path)
        os.system("ffmpeg -i " + vid_path + ' -vf "fps=10,scale=\'if(gt(a,1),-2,256)\':\'if(gt(a,1),256,-2)\'" -an -sn ' + save_path)
        #ffmpeg -i "%1" -vf "scale='if(gt(a,1),-2,256)':'if(gt(a,1),256,-2)'" -qscale:v 2 frames/out-%03d.jpg

        
        return k

In [6]:
collate = lambda x: x
batch_size = 60
assert os.path.exists(basepath + 'val_cache_short_fixed/')

In [9]:
dataset = DataLoader(load_and_dump(dict(list(val_dataset.items())),
                                    basepath + 'val/',
                                    basepath + 'val_cache_short_fixed/',
                                ),
                     batch_size=batch_size,
                     num_workers=batch_size,
                     collate_fn=collate
                    )
probes = []
for i, batch in enumerate(dataset):
    if i % 10 == 0:
        print(i*batch_size,'/', len(val_dataset))
    probes.append(batch)

0 / 32732
600 / 32732
1200 / 32732
1800 / 32732
2400 / 32732
3000 / 32732
3600 / 32732
4200 / 32732
4800 / 32732
5400 / 32732
6000 / 32732
6600 / 32732
7200 / 32732
7800 / 32732
8400 / 32732
9000 / 32732
9600 / 32732
10200 / 32732
10800 / 32732
11400 / 32732
12000 / 32732
12600 / 32732
13200 / 32732
13800 / 32732
14400 / 32732
15000 / 32732
15600 / 32732
16200 / 32732
16800 / 32732
17400 / 32732
18000 / 32732
18600 / 32732
19200 / 32732
19800 / 32732
20400 / 32732
21000 / 32732
21600 / 32732
22200 / 32732
22800 / 32732
23400 / 32732
24000 / 32732
24600 / 32732
25200 / 32732
25800 / 32732
26400 / 32732
27000 / 32732
27600 / 32732
28200 / 32732
28800 / 32732
29400 / 32732
30000 / 32732
30600 / 32732
31200 / 32732
31800 / 32732
32400 / 32732


In [None]:
len(probes)

In [None]:
probes[0]

In [10]:
val_json_file = json.load(open('/data/datasets/kinetics700_2020/full/val.json'))

In [12]:
list(val_json_file.items())[0]

('---QUuC4vJs',
 {'annotations': {'label': ['testifying', 104], 'segment': [84.0, 94.0]},
  'duration': 10.0,
  'subset': 'validate',
  'url': 'https://www.youtube.com/watch?v=---QUuC4vJs',
  'nb_frames': '300',
  'hw': [240, 320],
  'true_duration': '10.009972'})

In [24]:
# convert json annotations to CSV files as required in SlowFast code

def convert_json_to_csv_annotations(mode='train'):
    basepath = '/data/datasets/kinetics700_2020/'
    # create a csv file
    csv_file = open(os.path.join(basepath, f'full/{mode}.csv'), 'w+')
    
    # load json file
    json_file = json.load(open(os.path.join(basepath, f'full/{mode}.json')))
    
    print(f"length of the {mode} annotations file is : {len(json_file)}")
                          
    # list
    lines = []
    
    for k, v in json_file.items():
        # create path
        path = os.path.join(basepath, f'{mode}_cache_short_fixed/{k}.mp4')
        lines.append(','.join([path, str(v['annotations']['label'][1])]))
        
    csv_file.write('\n'.join(lines))
        
        
    

In [26]:
convert_json_to_csv_annotations('train')

length of the train annotations file is : 523098
