In [14]:
import torch 
import h5py

class VSMDataset(torch.utils.data.Dataset):
    """Video Summarizer Dataset
        Datasets: TVSum, Summe, VSUMM, CoSum, Visiocity
    """

    def __init__(self, hdfs_path, split=None, transform=None):
        """
        Args:
           hdfs_path (string): path of the hdfs processed data
           split (dict): idxs of the train/test split 
        """
        videos_info = h5py.File(hdfs_path)
        self.labels = {}
        self.data = {}
        self.transform = transform
        
        for it, video in enumerate(list(videos_info)):
            self.labels[it] = dict((key, videos_info[video][key][...])for key in list(videos_info[video]) if key in ('gtscore', 'gtsummary', 'user_summary') )
            self.data[it] = dict((key, videos_info[video][key][...])for key in list(videos_info[video]) if key not in ('gtscore', 'gtsummary', 'user_summary') )   
            
            if "video_name" in self.data[it].keys():
                self.data[it]["video_name"] = str(self.data[it]["video_name"]) 
            
        if split:
            #TO-DO
            pass
        


    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        X = self.data[index]
        y = self.labels[index]
        
        return X, y

def show_sample(idx, hdfs_path):

    vsm_dataset = VSMDataset(hdfs_path=hdfs_path)
    video_info, label = vsm_dataset[idx]

    return video_info, label



In [15]:
params = {'batch_size': 1,
          'num_workers': 2}
max_epochs = 100

In [16]:
paths = ["/home/shuaman/video_sm/video_summarization/src/eccv16_dataset_ovp_google_pool5.h5",
        "/home/shuaman/video_sm/video_summarization/src/eccv16_dataset_summe_google_pool5.h5",
        "/home/shuaman/video_sm/video_summarization/src/eccv16_dataset_tvsum_google_pool5.h5",
        "/home/shuaman/video_sm/video_summarization/src/eccv16_dataset_youtube_google_pool5.h5",
        "/home/shuaman/video_sm/video_summarization/src/eccv16_dataset_cosum_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/object_features/eccv16_dataset_summe_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/object_features/eccv16_dataset_tvsum_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_datvideo_50aset_ovp_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_summe_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_tvsum_google_pool5.h5",
        "/data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_youtube_google_pool5.h5"]

In [17]:

for path in paths:

    training_set = VSMDataset(path)

    training_generator = torch.utils.data.DataLoader(training_set, **params)
    #for epoch in range(max_epochs):
    for video_info, label in training_generator:
        continue
    print("done", path)
        #break
    #break

done /home/shuaman/video_sm/video_summarization/src/eccv16_dataset_ovp_google_pool5.h5
done /home/shuaman/video_sm/video_summarization/src/eccv16_dataset_summe_google_pool5.h5
done /home/shuaman/video_sm/video_summarization/src/eccv16_dataset_tvsum_google_pool5.h5
done /home/shuaman/video_sm/video_summarization/src/eccv16_dataset_youtube_google_pool5.h5
done /home/shuaman/video_sm/video_summarization/src/eccv16_dataset_cosum_google_pool5.h5
done /data/shuaman/video_summarization/datasets/object_features/eccv16_dataset_summe_google_pool5.h5
done /data/shuaman/video_summarization/datasets/object_features/eccv16_dataset_tvsum_google_pool5.h5
done /data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_ovp_google_pool5.h5
done /data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_summe_google_pool5.h5
done /data/shuaman/video_summarization/datasets/dsnet/datasets/eccv16_dataset_tvsum_google_pool5.h5
done /data/shuaman/video_summarization/datasets/dsnet/d

In [18]:
video_info

{'change_points': tensor([[[    0,   149],
          [  150,   479],
          [  480,  1964],
          [ 1965,  2504],
          [ 2505,  2669],
          [ 2670,  2969],
          [ 2970,  3239],
          [ 3240,  3824],
          [ 3825,  4334],
          [ 4335,  4514],
          [ 4515,  4634],
          [ 4635,  4709],
          [ 4710,  5564],
          [ 5565,  5744],
          [ 5745,  6089],
          [ 6090,  7079],
          [ 7080,  7139],
          [ 7140,  7199],
          [ 7200,  7364],
          [ 7365,  7544],
          [ 7545,  7904],
          [ 7905,  8129],
          [ 8130,  8309],
          [ 8310,  8534],
          [ 8535,  8699],
          [ 8700,  8939],
          [ 8940,  9464],
          [ 9465,  9944],
          [ 9945, 10124],
          [10125, 10304],
          [10305, 10499],
          [10500, 10528]]]),
 'features': tensor([[[0.0103, 0.0038, 0.0219,  ..., 0.0000, 0.0000, 0.0004],
          [0.0107, 0.0031, 0.0165,  ..., 0.0000, 0.0000, 0.0000],
    

In [9]:
label

{'gtscore': tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
          1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
     