In [3]:
import h5py

import numpy as np

import argparse
import torch
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader

import os

import json

In [4]:
parser = argparse.ArgumentParser(description='video features to LSTM Language Model')

# Location of data
parser.add_argument('--dataset', type=str, default='ActivityNet',
                    help='Name of the data class to use from data.py')
parser.add_argument('--data', type=str, default='data/ActivityNet/activity_net.v1-3.min.json',
                    help='location of the dataset')
parser.add_argument('--features', type=str, default=r'G:/machine_learning/activitynet/sub_activitynet_v1-3.c3d.hdf5',
                    help='location of the video features')
parser.add_argument('--labels', type=str, default='data/ActivityNet/labels.hdf5',
                    help='location of the proposal labels')
parser.add_argument('--vid-ids', type=str, default='data/ActivityNet/video_ids.json',
                    help='location of the video ids')
parser.add_argument('--save', type=str, default='data/models/default',
                    help='path to folder where to save the final model and log files and corpus')
parser.add_argument('--save-every', type=int, default=1,
                    help='Save the model every x epochs')
parser.add_argument('--clean', dest='clean', action='store_true',
                    help='Delete the models and the log files in the folder')
parser.add_argument('--W', type=int, default=128,
                    help='The rnn kernel size to use to get the proposal features')
parser.add_argument('--K', type=int, default=64,
                    help='Number of proposals')
parser.add_argument('--max-W', type=int, default=256,
                    help='maximum number of windows to return per video')

# Model options
parser.add_argument('--rnn-type', type=str, default='GRU',
                    help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)')
parser.add_argument('--rnn-num-layers', type=int, default=2,
                    help='Number of layers in rnn')
parser.add_argument('--rnn-dropout', type=int, default=0.0,
                    help='dropout used in rnn')
parser.add_argument('--video-dim', type=int, default=500,
                    help='dimensions of video (C3D) features')
parser.add_argument('--hidden-dim', type=int, default=512,
                    help='dimensions output layer of video network')

# Training options
parser.add_argument('--lr', type=float, default=0.1,
                    help='initial learning rate')
parser.add_argument('--dropout', type=float, default=0.0,
                    help='dropout between RNN layers')
parser.add_argument('--momentum', type=float, default=0.9,
                    help='SGD momentum')
parser.add_argument('--weight-decay', type=float, default=0,
                    help='SGD weight decay')
parser.add_argument('--epochs', type=int, default=100,
                    help='upper epoch limit')
parser.add_argument('--batch-size', type=int, default=1,
                    help='batch size')
parser.add_argument('--seed', type=int, default=1111,
                    help='random seed')
parser.add_argument('--cuda', action='store_true',
                    help='use CUDA')
parser.add_argument('--log-interval', type=int, default=100, metavar='N',
                    help='report interval')
parser.add_argument('--debug', dest='debug', action='store_true',
                    help='Print out debug sentences')
parser.add_argument('--num-samples', type=int, default=None,
                    help='Number of training samples to train with')
parser.add_argument('--shuffle', type=int, default=1,
                    help='whether to shuffle the data')
parser.add_argument('--nthreads', type=int, default=0,
                    help='number of worker threas used to load data')
parser.add_argument('--resume', dest='resume', action='store_true',
                    help='reload the model')

# Evaluate options
parser.add_argument('--num-vids-eval', type=int, default=500,
                    help='Number of videos to evaluate at each pass')
parser.add_argument('--iou-threshold', type=float, default=0.5,
                    help='threshold above which we say something is positive')
parser.add_argument('--num-proposals', type=int, default=None,
                    help='number of top proposals to evaluate')


parser.add_argument('--p_cls', type=float, default=0.3,
                    help='p_cls')

parser.add_argument('--cls_dim', type=float, default=273,
                    help='p_cls')

_StoreAction(option_strings=['--cls_dim'], dest='cls_dim', nargs=None, const=None, default=273, type=<class 'float'>, choices=None, help='p_cls', metavar=None)

In [5]:
args = parser.parse_args(args = [])

##### 2018.3.31 test

In [8]:
class ProposalDataset(object):
    """
    All dataset parsing classes will inherit from this class.
    """

    def __init__(self, args):
        """
        args must contain the following:
            data - the file that contains the Activity Net json data.
            features - the location of where the PCA C3D 500D features are.
        """
        assert os.path.exists(args.data)
        assert os.path.exists(args.features)
        self.data = json.load(open(args.data))
        self.features = h5py.File(args.features)
        if not os.path.exists(args.labels) or not os.path.exists(args.vid_ids):
            self.generate_labels(args)
        self.labels = h5py.File(args.labels)
        self.vid_ids = json.load(open(args.vid_ids))

    def generate_labels(self, args):
        """
        Overwrite based on dataset used
        """
        pass

    def iou(self, interval, featstamps, return_index=False):
        """
        Measures temporal IoU
        """
        start_i, end_i = interval[0], interval[1]
        output = 0.0
        gt_index = -1
        for i, (start, end) in enumerate(featstamps):
            intersection = max(0, min(end, end_i) - max(start, start_i))
            union = min(max(end, end_i) - min(start, start_i), end - start + end_i - start_i)
            overlap = float(intersection) / (union + 1e-8)
            if overlap >= output:
                output = overlap
                gt_index = i
        if return_index:
            return output, gt_index
        return output

    def timestamp_to_featstamp(self, timestamp, nfeats, duration):
        """
        Function to measure 1D overlap
        Convert the timestamps to feature indices
        """
        start, end = timestamp
        start = min(int(round(start / duration * nfeats)), nfeats - 1)
        end = max(int(round(end / duration * nfeats)), start + 1)
        return start, end

    def compute_proposals_stats(self, prop_captured):
        """
        Function to compute the proportion of proposals captured during labels generation.
        :param prop_captured: array of length nb_videos
        :return:
        """
        nb_videos = len(prop_captured)
        proportion = np.mean(prop_captured[prop_captured != -1])
        nb_no_proposals = (prop_captured == -1).sum()
        print("Number of videos in the dataset: {}".format(nb_videos))
        print("Proportion of videos with no proposals: {}".format(1. * nb_no_proposals / nb_videos))
        print("Proportion of action proposals captured during labels creation: {}".format(proportion))


class ActivityNet(ProposalDataset):
    """
    ActivityNet is responsible for parsing the raw activity net dataset and converting it into a
    format that DataSplit (defined below) can use. This level of abstraction is used so that
    DataSplit can be used with other dataset and we would only need to write a class similar
    to this one.
    """

    def __init__(self, args):
        super(self.__class__, self).__init__(args)
        self.durations = {}
        self.gt_times = {}
        self.w1 = self.vid_ids['w1']
        for split in ['training', 'validation', 'testing']:
            setattr(self, split + '_ids', self.vid_ids[split])
            for video_id in self.vid_ids[split]:
                self.durations[video_id] = self.data['database'][video_id]['duration']
                self.gt_times[video_id] = [ann['segment'] for ann in self.data['database'][video_id]['annotations']]
        self.cls_dim = args.cls_dim
        

    def generate_labels(self, args):
        """
        Overwriting parent class to generate action proposal labels
        """
        print("| Generating labels for action proposals")
        label_dataset = h5py.File(args.labels, 'w')
        
        #把classes onehot了
        already_appear_list = []
        cls_look_up_dict_emmbeding={}
        rank_cls = 0
        for _ in self.data['taxonomy']:
            if _ ['nodeName'] not in already_appear_list:
                already_appear_list.append(_ ['nodeName'])
                cls_look_up_dict_emmbeding[_ ['nodeName']] = rank_cls
                rank_cls+=1
        self.cls_dim = len(cls_look_up_dict_emmbeding) +1
        
        
        
        # bar = progressbar.ProgressBar(maxval=len(list(self.data['database'].keys()))).start()
        prop_captured = []
        prop_pos_examples = []
        video_ids = list(self.data['database'].keys())[:100]
        split_ids = {'training': [], 'validation': [], 'testing': [],
                     'w1': []}  # maybe find a better name since w1 is not a split
        for progress, video_id in enumerate(video_ids):
            features = self.features['v_' + video_id]['c3d_features']
            nfeats = features.shape[0]
            duration = self.data['database'][video_id]['duration']
            annotations = self.data['database'][video_id]['annotations']
            timestamps = [ann['segment'] for ann in annotations]
            featstamps = [self.timestamp_to_featstamp(x, nfeats, duration) for x in timestamps]
            nb_prop = len(featstamps)
            for i in range(nb_prop):
                if (featstamps[nb_prop - i - 1][1] - featstamps[nb_prop - i - 1][0]) > args.K / args.iou_threshold:
                    # we discard these proposals since they will not be captured for this value of K 
                    del featstamps[nb_prop - i - 1]
            if len(featstamps) == 0:
                if len(timestamps) == 0:
                    # no proposals il this video
                    prop_captured += [-1.]
                else:
                    # no proposals captured in this video since all have a length above threshold
                    prop_captured += [0.]
                continue
                # we keep track of the videos kept to update ids
            split_ids[self.data['database'][video_id]['subset']] += [video_id]
            
            
            gt_captured = []
            #几个输出的lable和weight
            #gru proposal的相关输出
            labels_prop_yk = np.zeros((nfeats, args.K))
            w_prop1 = np.zeros(args.K)########xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx一个视频只有一对w1，w2
            w_prop2 = np.zeros(args.K)
            
            print(w_prop1)#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            
            #gru classifier的相关输出
            labels_cls_yt = np.zeros((nfeats, self.cls_dim))#xxxxxxxxxxxx
            labels_cls_yt[:,-1] = 1
            wcls = np.ones(nfeats)
            
            #最后分类器的相关输出
            label_overlap_iou = np.ones((nfeats,args.K))
 
 

            for t in range(nfeats):            
                for i in range (len(timestamps)):
                    if timestamps[i][0]<=t<=timestamps[i][1]:#判断是不是背景
                        print(len(cls_look_up_dict_emmbeding))
                        labels_cls_yt[ t, cls_look_up_dict_emmbeding[self.data['database'][video_id]['annotations'][i]['label']] ]=1
                        labels_cls_yt[t,-1]=0
                        
                    else:
                        wcls[t] = args.p_cls
                        
                for k in range(args.K):
                    iou, gt_index = self.iou([t - k, t + 1], featstamps, return_index=True)
                    label_overlap_iou[t, k] = np.maximum(iou, 0.001)
                    if iou >= args.iou_threshold:
                        w_prop1[k] += 1
                        labels_prop_yk[t, k] = 1
                        gt_captured += [gt_index]
                        count +=1

                        
                    else:
                        w_prop2[k] += 1
  
            temp_w_prop1 = w_prop1/(w_prop1+w_prop2)
            temp_w_prop2 = w_prop2/(w_prop1+w_prop2)
            w_prop1,w_prop2 = temp_w_prop1,temp_w_prop2
            
                        
            prop_captured += [1. * len(np.unique(gt_captured)) / len(timestamps)]
            if self.data['database'][video_id]['subset'] == 'training':
                prop_pos_examples += [np.sum(labels_prop_yk, axis=0) * 1. / nfeats]
            
            
            video_labels_prop_yk = label_dataset.create_dataset(video_id+'_labels_prop_yk', (nfeats, args.K), dtype='f')
            video_w_prop = label_dataset.create_dataset(video_id+'_w_prop', (2,args.K), dtype='f')#xxxxxxxxxxxxxxxxxxxxxxxxxxxx
            video_labels_cls_yt = label_dataset.create_dataset(video_id+'_labels_cls_yt', (nfeats, self.cls_dim), dtype='f')
            video_wcls = label_dataset.create_dataset(video_id+'_wcls', (nfeats,), dtype='f')
            video_label_overlap_iou = label_dataset.create_dataset(video_id+'_labels_overlap_iou', (nfeats,args.K), dtype='f')

            
            video_labels_prop_yk[...] = labels_prop_yk
            video_w_prop[0] = w_prop1
            video_w_prop[1] = w_prop2
            video_labels_cls_yt[...] = labels_cls_yt
            video_wcls[...] = wcls
            video_label_overlap_iou[...] = label_overlap_iou
            
            # bar.update(progress)
        split_ids['w1'] = np.array(prop_pos_examples).mean(axis=0).tolist()  # this will be used to compute the loss
        json.dump(split_ids, open(args.vid_ids, 'w'))
        self.compute_proposals_stats(np.array(prop_captured))
        # bar.finish()
        
        

In [9]:
class subSetActivityNet(ActivityNet):
    def __init__(self, video_ids, dataset, args):
        '''
        video_ids :should be [dataset.training_ids ,dataset.validation_ids, dataset.testing_ids]
        '''
        self.video_ids = video_ids
        self.features = dataset.features
        self.labels = dataset.labels
        self.durations = dataset.durations
        self.gt_times = dataset.gt_times
        self.cls_dim = dataset.cls_dim
        self.num_samples = args.num_samples
        self.W = args.W
        self.K = args.K
        self.max_W = args.max_W
        

    def __getitem__(self,index):
        video_id = self.video_ids[index]
        features = self.features['v_' + video_id]['c3d_features']

        labels_prop_yk = self.labels[video_id + '_labels_prop_yk']
        w_prop = self.labels[video_id + '_w_prop']
        labels_cls_yt = self.labels[video_id + '_labels_cls_yt']
        wcls = self.labels[video_id + '_wcls']
        label_overlap_iou = self.labels[video_id + '_labels_overlap_iou']


        nfeats = features.shape[0]    
        nWindows = max(1, nfeats - self.W + 1)


        sample = list(range(nWindows))
        if self.max_W < nWindows:
            sample = np.random.choice(nWindows, self.max_W)
            nWindows = self.max_W   

        feature_windows = np.zeros((nWindows, self.W, features.shape[1]))
        labels_prop_yk_windows = np.zeros((nWindows, self.W, args.K))
        labels_cls_yt_windows = np.zeros((nWindows, self.W, self.cls_dim))
        wcls_windows = np.zeros((nWindows, self.W))
        label_overlap_iou_windows = np.zeros((nWindows, self.W, args.K))

        for j, w_start in enumerate(sample):
            w_end = min(w_start + self.W, nfeats)
            feature_windows[j, 0:w_end - w_start, :] = features[w_start:w_end, :]
            labels_prop_yk_windows[j, 0:w_end - w_start, :] = labels_prop_yk[w_start:w_end, :]
            labels_cls_yt_windows[j, 0:w_end - w_start, :] = labels_cls_yt[w_start:w_end, :]
            wcls_windows[j, 0:w_end - w_start] = wcls[w_start:w_end]
            label_overlap_iou_windows[j, 0:w_end - w_start, :]=label_overlap_iou[w_start:w_end,:]



#         return torch.FloatTensor(feature_windows), torch.FloatTensor(labels_prop_yk_windows),\
#                 torch.FloatTensor(w_prop), torch.FloatTensor(labels_cls_yt_windows), \
#                 torch.FloatTensor(wcls_windows), torch.FloatTensor(label_overlap_iou)
            
        return feature_windows[...], labels_prop_yk_windows[...],\
                w_prop[...], labels_cls_yt_windows[...], \
                wcls_windows[...], label_overlap_iou[...]          
            
            
    def __len__(self):
        if self.num_samples is not None:
            # in case num sample is greater than the dataset itself
            return min(self.num_samples, len(self.video_ids))
        return len(self.video_ids)

In [10]:
aa = ActivityNet(args)

In [11]:
aa.cls_dim

273

In [12]:
trainset = subSetActivityNet(aa.training_ids, aa, args)

In [13]:
train_loader = DataLoader(trainset)

In [14]:
bb = enumerate(train_loader)

In [15]:
aa = bb.__next__()

RuntimeError: tried to construct a tensor from a nested float sequence, but found an item of type numpy.float32 at index (0, 0)

In [16]:
aa

<__main__.ActivityNet at 0x1f668ea57b8>

In [48]:
args.labels

'data/ActivityNet/labels.hdf5'

In [6]:
file = h5py.File(args.labels)

In [50]:
[_ for _ in file.keys()]

['0Gr4aKQzGYk_labels_cls_yt',
 '0Gr4aKQzGYk_labels_overlap_iou',
 '0Gr4aKQzGYk_labels_prop_yk',
 '0Gr4aKQzGYk_w_prop',
 '0Gr4aKQzGYk_wcls',
 '3joaQzU05MY_labels_cls_yt',
 '3joaQzU05MY_labels_overlap_iou',
 '3joaQzU05MY_labels_prop_yk',
 '3joaQzU05MY_w_prop',
 '3joaQzU05MY_wcls',
 '5Bo0gFXxDQk_labels_cls_yt',
 '5Bo0gFXxDQk_labels_overlap_iou',
 '5Bo0gFXxDQk_labels_prop_yk',
 '5Bo0gFXxDQk_w_prop',
 '5Bo0gFXxDQk_wcls',
 '6uhLrPgbpUA_labels_cls_yt',
 '6uhLrPgbpUA_labels_overlap_iou',
 '6uhLrPgbpUA_labels_prop_yk',
 '6uhLrPgbpUA_w_prop',
 '6uhLrPgbpUA_wcls',
 '97McCuWAynA_labels_cls_yt',
 '97McCuWAynA_labels_overlap_iou',
 '97McCuWAynA_labels_prop_yk',
 '97McCuWAynA_w_prop',
 '97McCuWAynA_wcls',
 'GCtrfXIBbwA_labels_cls_yt',
 'GCtrfXIBbwA_labels_overlap_iou',
 'GCtrfXIBbwA_labels_prop_yk',
 'GCtrfXIBbwA_w_prop',
 'GCtrfXIBbwA_wcls',
 'IGcsVPa34Hc_labels_cls_yt',
 'IGcsVPa34Hc_labels_overlap_iou',
 'IGcsVPa34Hc_labels_prop_yk',
 'IGcsVPa34Hc_w_prop',
 'IGcsVPa34Hc_wcls',
 'JDg--pjY5gg_labels

In [7]:
file['sjyZWmvTGA4_w_prop'].value

array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.41509435,  0.40566039,
         0.41509435,  0.4245283 ,  0.41509435,  0.4245283 ,  0.4245283 ,
         0.41509435,  0.41509435,  0.41509435,  0.40566039,  0.40566039,
         0.40566039,  0.39622641,  0.39622641,  0.39622641,  0.38679245,
         0.38679245,  0.38679245,  0.3773585 ,  0.3773585 ],
       [ 1.        ,  1.        ,  1.        ,  1.        ,  1.

In [33]:
[_ for _ in aa.labels.keys()]

['0Gr4aKQzGYk_label_overlap_iou',
 '0Gr4aKQzGYk_labels_cls_yt',
 '0Gr4aKQzGYk_labels_prop_yk',
 '0Gr4aKQzGYk_w_prop',
 '0Gr4aKQzGYk_wcls',
 '3joaQzU05MY_label_overlap_iou',
 '3joaQzU05MY_labels_cls_yt',
 '3joaQzU05MY_labels_prop_yk',
 '3joaQzU05MY_w_prop',
 '3joaQzU05MY_wcls',
 '5Bo0gFXxDQk_label_overlap_iou',
 '5Bo0gFXxDQk_labels_cls_yt',
 '5Bo0gFXxDQk_labels_prop_yk',
 '5Bo0gFXxDQk_w_prop',
 '5Bo0gFXxDQk_wcls',
 '6uhLrPgbpUA_label_overlap_iou',
 '6uhLrPgbpUA_labels_cls_yt',
 '6uhLrPgbpUA_labels_prop_yk',
 '6uhLrPgbpUA_w_prop',
 '6uhLrPgbpUA_wcls',
 '97McCuWAynA_label_overlap_iou',
 '97McCuWAynA_labels_cls_yt',
 '97McCuWAynA_labels_prop_yk',
 '97McCuWAynA_w_prop',
 '97McCuWAynA_wcls',
 'GCtrfXIBbwA_label_overlap_iou',
 'GCtrfXIBbwA_labels_cls_yt',
 'GCtrfXIBbwA_labels_prop_yk',
 'GCtrfXIBbwA_w_prop',
 'GCtrfXIBbwA_wcls',
 'IGcsVPa34Hc_label_overlap_iou',
 'IGcsVPa34Hc_labels_cls_yt',
 'IGcsVPa34Hc_labels_prop_yk',
 'IGcsVPa34Hc_w_prop',
 'IGcsVPa34Hc_wcls',
 'JDg--pjY5gg_label_overlap

In [31]:
aa.features

<HDF5 file "sub_activitynet_v1-3.c3d.hdf5" (mode r+)>

In [12]:
len([_ for _ in aa.labels.keys()])

135

In [8]:
myactivitynet = ActivityNet2(args)

In [9]:
self = myactivitynet

In [10]:
label_dataset = h5py.File(args.labels+'3.31', 'w')

In [11]:
        already_appear_list = []
        cls_look_up_dict_emmbeding={}
        rank_cls = 0
        for _ in self.data['taxonomy']:
            if _ ['nodeName'] not in already_appear_list:
                already_appear_list.append(_ ['nodeName'])
                cls_look_up_dict_emmbeding[_ ['nodeName']] = rank_cls
                rank_cls+=1
        cls_dim = len(cls_look_up_dict_emmbeding)+1  

In [18]:
len(cls_look_up_dict_emmbeding)

272

In [15]:
cls_dim

273

In [18]:
labels_cls_yt.shape

(519, 273)

In [13]:
        prop_captured = []
        prop_pos_examples = []
        video_ids = list(self.data['database'].keys())
        split_ids = {'training': [], 'validation': [], 'testing': [],
                     'w1': []} 

In [14]:
enumerateor_video_ids = enumerate(video_ids[:100])

In [15]:
progress, video_id = enumerateor_video_ids.__next__()

features = self.features['v_' + video_id]['c3d_features']
nfeats = features.shape[0]
duration = self.data['database'][video_id]['duration']
annotations = self.data['database'][video_id]['annotations']
timestamps = [ann['segment'] for ann in annotations]
featstamps = [self.timestamp_to_featstamp(x, nfeats, duration) for x in timestamps]

nb_prop = len(featstamps)

print("nb_prop:{}".format(nb_prop))

print('featstamps:{}'.format(featstamps))

for i in range(nb_prop):
    if (featstamps[nb_prop - i - 1][1] - featstamps[nb_prop - i - 1][0]) > args.K / args.iou_threshold:
        # we discard these proposals since they will not be captured for this value of K 
        del featstamps[nb_prop - i - 1]
if len(featstamps) == 0:
    print('need continue')

nb_prop:1
featstamps:[(0, 461)]
need continue


In [131]:
duration

50.48

In [132]:
timestamps

[[26.214528861154445, 40.54198907956318]]

In [16]:
gt_captured = []

labels_prop_yk = np.zeros((nfeats, args.K))
w_prop1 = w_prop2 = np.zeros(args.K)########xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx一个视频只有一对w1，w2

#gru classifier的相关输出
labels_cls_yt = np.zeros((nfeats, cls_dim))#xxxxxxxxxxxx
labels_cls_yt[:,-1] = 1
wcls = np.ones(nfeats)

#最后分类器的相关输出
label_overlap_iou = np.ones((nfeats,args.K))

In [166]:
for t in range(nfeats):            
    for i in range (len(timestamps)):
        if timestamps[i][0]<=t<=timestamps[i][1]:#判断是不是背景

            labels_cls_yt[t][ cls_look_up_dict_emmbeding[self.data['database'][video_id]['annotations'][i]['label']] ]=1
            labels_cls_yt[t,-1]=0

        else:
            wcls[t] = args.p_cls

    for k in range(args.K):
        iou, gt_index = self.iou([t - k, t + 1], featstamps, return_index=True)
        label_overlap_iou[t, k] = np.maximum(iou, 0.001)
        if iou >= args.iou_threshold:
            labels_prop_yk[t, k] = 1
            gt_captured += [gt_index]

            w_prop1[k] += 1######xxxxxxxxxxxxxxxxxxxxxxxxxxxx
        else:
            w_prop2[k] +=1######xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

temp_w_prop1 = w_prop1/(w_prop1+w_prop2)
temp_w_prop2 = w_prop2/(w_prop1+w_prop2)
w_prop1,w_prop2 = temp_w_prop1,temp_w_prop2

In [179]:
labels_prop_yk

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  1.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [178]:
label_overlap_iou

array([[ 0.001     ,  0.001     ,  0.001     , ...,  0.001     ,
         0.001     ,  0.001     ],
       [ 0.001     ,  0.001     ,  0.001     , ...,  0.001     ,
         0.001     ,  0.001     ],
       [ 0.001     ,  0.001     ,  0.001     , ...,  0.001     ,
         0.001     ,  0.001     ],
       ..., 
       [ 0.001     ,  0.001     ,  0.001     , ...,  0.48571429,
         0.5       ,  0.51428571],
       [ 0.001     ,  0.001     ,  0.001     , ...,  0.46478873,
         0.47887324,  0.49295775],
       [ 0.001     ,  0.001     ,  0.001     , ...,  0.44444444,
         0.45833333,  0.47222222]])

In [181]:
wcls

array([ 0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,
        1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3,
        0.3,  0.3,  0.3,  0.3,  0.3,  0.3,  0.3])

In [167]:
w_prop1

array([ 0.0033004 ,  0.0033004 ,  0.0033004 ,  0.0033004 ,  0.0033004 ,
        0.0033004 ,  0.0033004 ,  0.0033004 ,  0.0033004 ,  0.0033004 ,
        0.0033004 ,  0.0033004 ,  0.0033004 ,  0.0033004 ,  0.0033004 ,
        0.0033004 ,  0.0033004 ,  0.0033004 ,  0.0033004 ,  0.0033004 ,
        0.0033004 ,  0.1419173 ,  0.14851811,  0.1419173 ,  0.14851811,
        0.15511891,  0.14851811,  0.15511891,  0.16171972,  0.15511891,
        0.16171972,  0.16832052,  0.16171972,  0.16832052,  0.17492133,
        0.16832052,  0.17492133,  0.18152213,  0.17492133,  0.18152213,
        0.18812294,  0.18152213,  0.18812294,  0.19472374,  0.18812294,
        0.19472374,  0.20132455,  0.19472374,  0.20132455,  0.20792535,
        0.20132455,  0.20792535,  0.21452616,  0.20792535,  0.21452616,
        0.22112696,  0.21452616,  0.22112696,  0.22772777,  0.22112696,
        0.22772777,  0.23432857,  0.22772777,  0.23432857])

In [168]:
w_prop2

array([ 0.9966996 ,  0.9966996 ,  0.9966996 ,  0.9966996 ,  0.9966996 ,
        0.9966996 ,  0.9966996 ,  0.9966996 ,  0.9966996 ,  0.9966996 ,
        0.9966996 ,  0.9966996 ,  0.9966996 ,  0.9966996 ,  0.9966996 ,
        0.9966996 ,  0.9966996 ,  0.9966996 ,  0.9966996 ,  0.9966996 ,
        0.9966996 ,  0.8580827 ,  0.85148189,  0.8580827 ,  0.85148189,
        0.84488109,  0.85148189,  0.84488109,  0.83828028,  0.84488109,
        0.83828028,  0.83167948,  0.83828028,  0.83167948,  0.82507867,
        0.83167948,  0.82507867,  0.81847787,  0.82507867,  0.81847787,
        0.81187706,  0.81847787,  0.81187706,  0.80527626,  0.81187706,
        0.80527626,  0.79867545,  0.80527626,  0.79867545,  0.79207465,
        0.79867545,  0.79207465,  0.78547384,  0.79207465,  0.78547384,
        0.77887304,  0.78547384,  0.77887304,  0.77227223,  0.77887304,
        0.77227223,  0.76567143,  0.77227223,  0.76567143])

##### 2018.3.31 test

In [208]:
features

<HDF5 dataset "c3d_features": shape (150, 500), type "<f8">

In [183]:
args.max_W

256

In [184]:
aa= [1,2,3,4,5,6,7]

In [207]:
np.random.choice(4, 2)

array([3, 1])

In [251]:
def __getitem__(self,idex):
    video_id = self.video_ids[index]
    features = self.features['v_' + video_id]['c3d_features']
    
    labels_prop_yk = self.labels[video_id + '_labels_prop_yk']
    w_prop = self.labels[video_id + '_w_prop']
    labels_cls_yt = self.labels[video_id + 'labels_cls_yt']
    wcls = self.labels[video_id + '_wcls']
    label_overlap_iou = self.labels[video_id + '_overlap_iou']
    
    
    nfeats = features.shape[0]    
    nWindows = max(1, nfeats - self.W + 1)
    
    
    sample = list(range(nWindows))
    if self.max_W < nWindows:
        sample = np.random.choice(nWindows, self.max_W)
        nWindows = self.max_W   
        
    feature_windows = np.zeros((nWindows, self.W, features.shape[1]))
    labels_prop_yk_windows = np.zeros((nWindows, self.W, args.K))
    labels_cls_yt_windows = np.zeros((nWindows, self.W, self.cls_dim))
    wcls_windows = np.zeros((nWindows, self.W))
    label_overlap_iou_windows = np.zeros((nWindows, self.W, args.K))
    
    for j, w_start in enumerate(sample):
        w_end = min(w_start + self.W, nfeats)
        feature_windows[j, 0:w_end - w_start, :] = features[w_start:w_end, :]
        labels_prop_yk_windows[j, 0:w_end - w_start, :] = labels_prop_yk[w_start:w_end, :]
        labels_cls_yt_windows[j, 0:w_end - w_start, :] = labels_cls_yt[w_start:w_end, :]
        wcls_windows[j, 0:w_end - w_start] = wcls[w_start:w_end]
        label_overlap_iou_windows[j, 0:w_end - w_start, :]=label_overlap_iou[w_start:w_end,:]

    
    
    return torch.FloatTensor(feature_windows), torch.FloatTensor(labels_prop_yk_windows),\
            torch.FloatTensor(w_prop), torch.FloatTensor(labels_cls_yt_windows), \
            torch.FloatTensor(wcls_windows), torch.FloatTensor(label_overlap_iou)

In [250]:
label_overlap_iou.shape

(150, 64)

In [234]:
args.K

64

In [224]:
nWindows

23

In [225]:
feature_windows = np.zeros((nWindows, self.W, features.shape[1]))

In [227]:
feature_windows.shape

(23, 128, 500)

In [229]:
sample_enumerate = enumerate(sample)

In [230]:
j, w_start  = sample_enumerate.__next__()

In [231]:
w_end = min(w_start + self.W, nfeats)

In [None]:
feature_windows[j, 0:w_end - w_start, :] = features[w_start:w_end, :]

In [243]:
a = /
1

SyntaxError: invalid syntax (<ipython-input-243-17de48e73390>, line 1)

In [247]:
a = (1,\
    2)

SyntaxError: invalid syntax (<ipython-input-247-cd8aca8d5385>, line 1)

In [245]:
a

123