In [9]:
import torch 
import h5py

class VSMDataset(torch.utils.data.Dataset):
    """Video Summarizer Dataset
        Datasets: TVSum, Summe, VSUMM, CoSum, Visiocity
    """

    def __init__(self, hdfs_path, split=None, 
                 googlenet=False, 
                 resnext=False, 
                 inceptionv3=False,
                 i3d_rgb=False,
                 i3d_flow=False,
                 resnet3d=False
                ):
        """
        Args:
           hdfs_path (string): path of the hdfs processed data
           split (dict): idxs of the train/test split 
        """
        videos_info = h5py.File(hdfs_path)
        self.labels = {}
        self.data = {}
        
        keys_to_avoid = ['gtscore', 'gtsummary', 'user_summary']
        
        if not googlenet:
            keys_to_avoid.append('features')
        if not resnext:
            keys_to_avoid.append('features_rn')
        if not inceptionv3:
            keys_to_avoid.append('features_iv3')
        if not i3d_rgb:
            keys_to_avoid.append('features_rgb')
        if not i3d_flow:
            keys_to_avoid.append('features_flow')
        if not resnet3d:
            keys_to_avoid.append('features_3D')
        
        for it, video in enumerate(list(videos_info)):
            self.labels[it] = dict((key, videos_info[video][key][...])for key in list(videos_info[video]) if key in ('gtscore', 'gtsummary', 'user_summary') )
            self.data[it] = dict((key, videos_info[video][key][...])for key in list(videos_info[video]) if key not in keys_to_avoid )   
            
            if "video_name" in self.data[it].keys():
                self.data[it]["video_name"] = str(self.data[it]["video_name"]) 
            
        if split:
            #TO-DO
            pass
        

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        X = self.data[index]
        y = self.labels[index]        
        return X, y
    
    def get_feature(self, index, feature):
        X = self.data[index][feature]
        return X

def show_sample(idx, hdfs_path):

    vsm_dataset = VSMDataset(hdfs_path=hdfs_path)
    video_info, label = vsm_dataset[idx]

    return video_info, label



In [10]:
params = {'batch_size': 1,
          'num_workers': 2}
max_epochs = 100

In [14]:
paths = [
        "/data/shuaman/video_summarization/datasets/object_features/eccv16_dataset_summe_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/object_features/eccv16_dataset_tvsum_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_ovp_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_summe_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_tvsum_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_youtube_google_pool5.h5",
        "/home/shuaman/video_sm/video_summarization/src/dataset_ovp_processed.h5",
        "/home/shuaman/video_sm/video_summarization/src/dataset_summe_processed.h5",
        "/home/shuaman/video_sm/video_summarization/src/dataset_tvsum_processed.h5",
        "/home/shuaman/video_sm/video_summarization/src/dataset_youtube_processed.h5",
        "/home/shuaman/video_sm/video_summarization/src/dataset_cosum_processed.h5",
        ]

In [15]:

for path in paths:

    training_set = VSMDataset(path, googlenet=True, resnext=True, inceptionv3=True,
                             i3d_rgb=True, i3d_flow=True, resnet3d=True)

    training_generator = torch.utils.data.DataLoader(training_set, **params)
    #for epoch in range(max_epochs):
    for video_info, label in training_generator:
        continue
    print("done", path)
        #break
    #break

done /data/shuaman/video_summarization/datasets/object_features/eccv16_dataset_summe_google_pool5.h5
done /data/shuaman/video_summarization/datasets/object_features/eccv16_dataset_tvsum_google_pool5.h5
done /data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_ovp_google_pool5.h5
done /data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_summe_google_pool5.h5
done /data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_tvsum_google_pool5.h5
done /data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_youtube_google_pool5.h5
done /home/shuaman/video_sm/video_summarization/src/dataset_ovp_processed.h5
done /home/shuaman/video_sm/video_summarization/src/dataset_summe_processed.h5
done /home/shuaman/video_sm/video_summarization/src/dataset_tvsum_processed.h5
done /home/shuaman/video_sm/video_summarization/src/dataset_youtube_processed.h5
done /home/shuaman/video_sm/video_summarization/src/dataset_cosum_processed.h5


In [16]:
video_info

{'change_points': tensor([[[   0,  194],
          [ 195,  254],
          [ 255,  389],
          [ 390,  494],
          [ 495,  974],
          [ 975, 1109],
          [1110, 1154],
          [1155, 1259],
          [1260, 1349],
          [1350, 1409],
          [1410, 1469],
          [1470, 1544],
          [1545, 1679],
          [1680, 1844],
          [1845, 1904],
          [1905, 2039],
          [2040, 2099],
          [2100, 2159],
          [2160, 2219],
          [2220, 2354],
          [2355, 2474],
          [2475, 2519],
          [2520, 2909],
          [2910, 2969],
          [2970, 3044],
          [3045, 3419],
          [3420, 3509],
          [3510, 3599],
          [3600, 3704],
          [3705, 3824],
          [3825, 4259],
          [4260, 4334],
          [4335, 4469],
          [4470, 5189],
          [5190, 5234],
          [5235, 5384],
          [5385, 5500]]]),
 'features': tensor([[[0.0340, 0.0433, 0.2418,  ..., 0.2822, 1.8960, 0.4547],
          [0.0

In [9]:
label

{'gtscore': tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
          1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
     