# Tasks
* graph generator:
    * a graph generator: that generates graphs according to the temporal adjacency(later use other features)
    * nodes are short video clips in each sliding window
    * Edges are defined as the distance of two windows.
    * Weight of edges should be between 0 and 1
    * The parameters should be tweakable: window length, step size, ...
    * each node have the information of start time, end time, and feature vector.
    * The result should be an adjacent list of nodes. also having the weight infomation. 
    * make the graph symetric
    
* Questions:
    - What would be a good way of representing the nodes.
        + Objects: learn python
        
# Steps
## Make the nodes:
short videos, feature are the histogram of 4*4000 dimensional features. 

## Make the edges:
the edges are similarities of nodes, use overlaping for time window, use the following function for feature:
$$e^{\frac{1}{\sigma^2}||x_1-x_2||_2^2}$$
include only the top K edges for each node, where K is a parameter
make it symmetric graph.

## Stats from UFC 101 dataset
| |frames|seconds|
|---|---|--|
|max length|1776|   71.04 |
|min length|29| 1.06666666667 |
|mean|186.528562849|   7.2053465966 |
|median|167|  6.48 |
|std|97.7848059441 |  3.75795650803 |

In [1]:
from pprint import pprint
from scipy import io as sio
import time
BASE_DIR = '/Users/baroc/repos/VideoActionRecognition/'

In [6]:
class Node:
    def __init__(self, start, end, fps, videoname):
        self.id = 0;
        self.start = start
        self.end = end
        self.fps = fps
        self.videoname = videoname 
        self.trajectories = []
#         self.features = dict()
#         # contains six lists of what features are avaible. This info can be used to compute the histogram
#         # use numpy.histogram or scipy.stat.histogram
#         self.features['mean_x'] = []
#         self.features['mean_y'] = []
#         self.features['traj_idx'] = []
#         self.features['hog'] = []
#         self.features['hof'] = []
#         self.features['mbh'] = []
        self.histogram = []
        self.allOverLapLabels = dict() # a dictionary of {overlapping label: [lb_str_t, lb_end_t], overlap}
        self.labels = [] # only the labels that are considered positive.
        # add groud truth
        
    def add_trajectory(self, traj): # call this when you need it
        self.trajectories.append(traj)
    def add_label(self, tlabel_info, overlap): # 
        # add to allOverLapLabels
        # tlabel_info = ['video_validation_0000051', [67.5, 75.9], 'Billiards']
        if tlabel_info[2] in self.allOverLapLabels:
            self.allOverLapLabels[tlabel_info[2]].append([tlabel_info[1],overlap])
        else:
            self.allOverLapLabels[tlabel_info[2]] = [[tlabel_info[1],overlap]]
#             self.allOverLapLabels[tlabel_info[2]].append([tlabel_info[1],overlap])
        # add to labels
        if overlap>0.5:
            self.labels.append(tlabel_info[2]) # it is possible that there are multiple same label for a node.
    def record_feature(self): 
        ###########
        # this method currently counts all existence, and don't take into account the coverage of each trajectory
        # if need the coverage info in the future, can simply modify to:
        # self.feature_cnt['mean_x'].append([traj.mean_x,traj.coverage])
        
        if len(self.features['traj_idx']) == 0:
            raise ValueError("record_features not usable. Because no features. Use add_feature in loop instead. ")
        for traj in self.trajectories:
            self.features['mean_x'].append(traj.mean_x)
            self.features['mean_y'].append(traj.mean_y)
            self.features['traj_idx'].append(traj.traj_idx)
            self.features['hog'].append(traj.hog)
            self.features['hof'].append(traj.hof)
            self.features['mbh'].append(traj.mbh)
        return
    
    def add_feature(self, mean_x, mean_y, traj, hog, hof, mbh):
        # this function computes and adds the histogram of the 16,000 features, 
        # and adds the mean_x mean_y information if necessary
        hist_hog = np.histogram(hog,4000,(0,4000))[0]
        hist_hof = np.histogram(hof,4000,(0,4000))[0]
        hist_mbh = np.histogram(mbh,4000,(0,4000))[0]
        hist_traj = np.histogram(traj,4000,(0,4000))[0]
        self.histogram = np.concatenate((hist_traj, hist_hog, hist_hof, hist_mbh))
#         self.mean_x = mean_x
#         self.mean_y = mean_y
    def set_id(self, idx):
        self.id = idx

# Next: implement the method add_feature
and modify the related code, and add histogram

In [7]:
class Trajectory:
    def __init__(self, frame_num, mean_x, mean_y, traj_idx, hog, hof, mbh, coverage):
        self.frame_num  = frame_num 
        self.mean_x  = mean_x 
        self.mean_y  = mean_y 
        self.traj_idx  = traj_idx 
        self.hog  = hog
        self.hof  = hof
        self.mbh  = mbh
        self.coverage = coverage # The portion of trajectory included in the window


In [8]:
def computeOverlap(window_start, window_end, label_start, label_end):
    
    if window_start < label_end and label_start < window_end:
        # overlap
        if window_start > label_start:
            l_start = window_start
            s_start = label_start
        else:
            l_start = label_start
            s_start = window_start 
        if window_end > label_end:
            l_end = window_end
            s_end = label_end
        else:
            l_end = label_end
            s_end = window_end 
        return (s_end-l_start)/(l_end - s_start)
    else:
        return 0

In [20]:
def generateNode(video_info, video_tLabelList, windowSize = 150, stepSize = 100):# by frame, default value comes from the Thumos report.
    # generate nodes for a video, return a list of Node
    # before calling this function, require to find the video_info and video_tLabelList for this video. 
    video_name = video_info[0][0]
    duration_frame = video_info[8][0][0]
    fps = float(video_info[9][0])
    # read in the file and form a list of trajectory features
    with open(BASE_DIR + 'TH14_validation_features/'+video_name+'.txt','r') as f:
        # later: make the directory as a variable
        trajs = f.readlines()
    trajs = [x.split('\t')[:-1] for x in trajs]
    ####################for debug
#     trajs = trajs[:308001]
    #####################
    traj_start_idx = 0 # the index of the first trajectory of each window
    next_traj_idx = 0
    next_traj_set = False
    # form a list of window start point. 
    frame_step_list = range(1, duration_frame, stepSize)# The start of frame? 
    # Do you also need to consider the window to be full length, not truncated on the last few steps. 
    # Note that in the dataset, there is no trajectory ending on the last frame, so we don't use duration_frame+1
    window_start = 0
    window_end = 0
    node_list = []
    
    for i in range(len(frame_step_list) ):# For each window
        if(frame_step_list[i] + windowSize < duration_frame):
            # Both start and end are inclusive
            window_start = frame_step_list[i]
            window_end = frame_step_list[i] + windowSize - 1
        elif frame_step_list[i] + windowSize < duration_frame+stepSize:
            # The last window
            window_start = frame_step_list[i]
            window_end = duration_frame
        else:
            # Extra windows
            break
#         print window_start,window_end
        
        traj_start_idx = next_traj_idx 
        # next_traj_idx is used to record the start trajectory of next window, when constructing the current node
        next_traj_set = False
        # initialize node
        traj = trajs[traj_start_idx] # Current trajectory
        end_frame = int(traj[0])
        start_frame = end_frame-15+1
        new_node = Node(window_start, window_end, fps, video_name)
        
        # feature list
        features = dict()
        features['mean_x'] = []
        features['mean_y'] = []
        features['traj'] = []
        features['hog'] = []
        features['hof'] = []
        features['mbh'] = []
        # Add trajectories, 
        while(traj_start_idx<len(trajs) and (window_start<=end_frame<=window_end or window_start<=start_frame<=window_end)):
            coverage = 1.0
            traj = trajs[traj_start_idx] # Current trajectory
            end_frame = int(traj[0])
            start_frame = end_frame-15+1
#             print "start and end"+ str(start_frame)+' , '+str(end_frame)
            if not next_traj_set and i+1 < len(frame_step_list) and end_frame >= frame_step_list[i+1]:
                next_traj_idx = traj_start_idx
                next_traj_set = True
            if(end_frame<= window_end and window_start<=start_frame):
                pass
            # this trajectory is totally in the window
            #     coverage = 1.0
            elif start_frame < window_start and window_start <= end_frame: # first few trajs
                # Only the tail of the trajectory is in the window
                coverage = float(end_frame - window_start + 1)/15
#                 print coverage
            elif window_end >= start_frame and end_frame>window_end:
                # Only head of the trajectory is in the window
                coverage = float(window_end-(start_frame)+1 )/15
#                 print coverage
            # add trajectory #         add_trajectory call it only when you need it!!!
#             trj_obj = Trajectory(int(traj[0]) , float(traj[1]), float(traj[2]), int(traj[3]), int(traj[4]), int(traj[5]), int(traj[6]), coverage)
#             new_node.add_trajectory(trj_obj)
                    
            # Generate the feature data, which will be used to generate 4*4000 histogram
            features['mean_x'].append(float(traj[1]))
            features['mean_y'].append(float(traj[2]))
            features['traj'].append(int(traj[3]))
            features['hog'].append(int(traj[4]))
            features['hof'].append(int(traj[5]))
            features['mbh'].append(int(traj[6]))
            traj_start_idx +=1
#         new_node.record_feature() # obsolete function

        # add temporal label to the list:
        window_start_time = window_start * fps
        window_end_time = window_end * fps
        for tlabel_info in video_tLabelList: 
            over_lap_score = computeOverlap(window_start_time, window_end_time, tlabel_info[1][0], tlabel_info[1][1])
            if over_lap_score > 0:
                new_node.add_label(tlabel_info, over_lap_score)
        # compute and add histogram
        new_node.add_feature(features['mean_x'],features['mean_y'],features['traj'],features['hog'],features['hof'],features['mbh'])
        # Add this node into a list. 
        node_list.append(new_node) 
    # return the list of nodes
    for i,node in enumerate(node_list):
        node.set_id(i)
    return node_list

Example: loading and accessing one of the feature file, uncomment to see results

In [11]:
# test_feature_file = '/Users/baroc/repos/VideoDetection/TH14_validation_features/video_validation_0000001.txt'
# with open(test_feature_file,'r') as f:
#     trajs = f.readlines()
# trajs = [x.split('\t')[:-1] for x in trajs]
# pprint(trajs[:5])
# # for x in lines:
# #     if(len(x)!=7 ):
# #         print x
# # Every line is good. 
# # [ end_frame, mean_x, mean_y, traj_id, hog, hof, mbh ]

### Load the labels
This following snippet reads and sorts the temporal labels for videos.

In [12]:
import os
import numpy as np
TLBL_DIR = BASE_DIR + 'TH14_Temporal_annotations_validation/annotation/' #'./''
filelist = os.listdir(TLBL_DIR)
tLabelList = []
for filename in filelist:
    if filename.endswith("_val.txt"): 
        with open(TLBL_DIR+filename,'r') as f:
            tLabels = f.readlines()
        tLabels = [x[:-1].split('  ') for x in tLabels]
        tLabels = [[x[0],map(float, x[1].split(' '))] for x in tLabels]
        tLabels = [x+[filename[:-8]] for x in tLabels]
        tLabelList = tLabelList+tLabels
    else:
        print('Not a txt file: '+filename)
tLabelList = sorted(tLabelList)
videonames = sorted(list(set([x[0] for x in tLabelList])))
pprint(tLabelList[:5])
pprint(tLabelList[170:175])
# print '\n'
pprint(videonames[:10])

[['video_validation_0000051', [67.5, 75.9], 'Billiards'],
 ['video_validation_0000051', [85.9, 90.6], 'Billiards'],
 ['video_validation_0000051', [139.3, 148.2], 'Billiards'],
 ['video_validation_0000052', [24.3, 24.8], 'Billiards'],
 ['video_validation_0000053', [9.1, 13.8], 'Billiards']]
[['video_validation_0000162', [152.5, 155.1], 'Diving'],
 ['video_validation_0000162', [155.8, 158.5], 'CliffDiving'],
 ['video_validation_0000162', [155.8, 158.5], 'Diving'],
 ['video_validation_0000162', [163.0, 164.0], 'Ambiguous'],
 ['video_validation_0000162', [164.1, 167.1], 'CliffDiving']]
['video_validation_0000051',
 'video_validation_0000052',
 'video_validation_0000053',
 'video_validation_0000054',
 'video_validation_0000055',
 'video_validation_0000056',
 'video_validation_0000057',
 'video_validation_0000058',
 'video_validation_0000059',
 'video_validation_0000060']


In the temporal action detection task, we only care about the 200 videos with temporal label. The next snippet extracts the meta data of these videos.

In [13]:
mat_file_str=BASE_DIR+"validation_set_meta/validation_set_meta/validation_set.mat"
mat = sio.loadmat(mat_file_str)
meta_array_1010 = mat['validation_videos'][0] # 1010 entries in meta_array
id_list = [int(x[-7:])-1 for x in videonames] # a list of zero based indices
meta_array_200 = meta_array_1010[id_list]
len(meta_array_200)

200

Now there are two ways to get meta data, meta_1010 is better for access by index,  meta_200 is better for looping through

#### Test Node generation
The following few lines of code **tests** the generate node function:
* First, load the meta data of videos

In [14]:
# mat_file_str="/Users/baroc/repos/VideoDetection/validation_set_meta/validation_set_meta/validation_set.mat"
# mat = sio.loadmat(mat_file_str)
# meta_array = mat['validation_videos'][0] # 1010 entries in meta_array
# NUM_VIDEOS = 1010

pprint([x for x in meta_array_200 if len(x[4])>0 ]) # These are videos with multiple actions
video_info = meta_array_1010[162]
print(video_info[0][0])
print(video_info[8][0][0])
video_tLabelList = [x for x in tLabelList if x[0]==video_info[0][0]]
pprint(video_tLabelList)
meta_array_1010[161]

[([u'video_validation_0000161'], [u'ACAPULCO MEXICO  - CLIFF DIVERS OF LA QUEBRADA.mpeg'], [u'CliffDiving'], [[22]], [[array([u'Diving'], 
      dtype='<U6')]], [[26]], [u'NO'], [[61.83]], [[1852]], [[30]], [[180]], [[320]]),
 ([u'video_validation_0000162'], [u'Celebrating 5 Years of the Red Bull Cliff Diving World Series.mpeg'], [u'CliffDiving'], [[22]], [[array([u'Diving'], 
      dtype='<U6')]], [[26]], [u'NO'], [[198.136]], [[5941]], [[30]], [[180]], [[320]]),
 ([u'video_validation_0000163'], [u'Cliff Diving in Norway - Red Bull Cliff Diving World Series 2012 Grimstad.mpeg'], [u'CliffDiving'], [[22]], [[array([u'Diving'], 
      dtype='<U6'), array([u'Rowing'], 
      dtype='<U6'), array([u'Surfing'], 
      dtype='<U7')]], [[26], [76], [88]], [u'NO'], [[233.976]], [[7016]], [[30]], [[180]], [[320]]),
 ([u'video_validation_0000164'], [u'Cliff diving qualification - Red Bull Cliff Diving World Series 2012.mpeg'], [u'CliffDiving'], [[22]], [[array([u'Diving'], 
      dtype='<U6')]], 

([u'video_validation_0000162'], [u'Celebrating 5 Years of the Red Bull Cliff Diving World Series.mpeg'], [u'CliffDiving'], [[22]], [[array([u'Diving'], 
      dtype='<U6')]], [[26]], [u'NO'], [[198.136]], [[5941]], [[30]], [[180]], [[320]])

* Invoke the `generateNode` function and time it. 150.58s

In [21]:
t = time.time()
node_list = generateNode(video_info, video_tLabelList)#, 100, 50)
print('time elapse %.2f'% (time.time()-t) )# 19.85

time elapse 13.27


19.85 for original node implementation of feature data
13.27 for new histogram implementation + light members

* Test if the result is right for original implemenation

In [23]:
# # node_list[0].start
# # node_list[0].end

# trajs = node_list[2].trajectories
# print('All in all '+str(len(trajs))+' trajectories' )
# # [x.coverage for x in trajs[5000:5010]]
# print "end "+str(trajs[-1].frame_num) # problem
# print trajs[5000].frame_num
# print trajs[0].frame_num
# print trajs[-1].coverage
# print '\n'
# print len(node_list[2].features['hof'])
# print len(node_list[2].features['hog'])
# print len(node_list[2].features['mean_x'])
# print len(node_list[2].features['mean_y'])

In [28]:
hist = node_list[2].histogram
# print('All in all '+str(len(hist))+' histogram dimensions' )
# [x.coverage for x in trajs[5000:5010]]
print len(hist)
hist[:50]

16000


array([ 0,  0,  2,  0,  0,  0,  1,  7,  4,  1, 18,  0,  4,  8,  1,  1,  1,
        0,  0,  4,  0,  0,  0,  1,  0,  7,  0,  7,  4,  1, 11,  2,  6,  2,
        2, 32,  0,  3, 10,  2,  4,  0,  2,  0,  8, 25,  1,  4,  0,  2])

In [24]:
len(node_list)

70

* The result is right

Till now, we are able to generate nodes from feature files.

Next step would be save the node information into a file.

So later we can use it to generate edge/graph

But before that, I have a few questions on saving the data. 
* Of course, I can simply pickle it, but considering the matlab compatibility, might be better to use .mat
* How to save

In [147]:
# [[x.coverage,x.frame_num] for x in trajs[6589:]]
# len(trajs[6589:])

In [30]:
a = 2
b = 3
float(a)/b

0.6666666666666666

### Experiment with pickle
Skip this part, since this is only experimenting

In [47]:
import pickle
import cPickle
print(video_info[0][0])
t = time.time()
cPickle.dump( node_list, open( BASE_DIR + video_info[0][0]+".cp", "wb" ) )
print(time.time()-t)
t = time.time()
pickle.dump( node_list, open( BASE_DIR + video_info[0][0]+".p", "wb" ) )
print(time.time()-t)

video_validation_0000163
26.4432079792
106.701439142


Result show that when pickling one video " ", cPickle takes 26.4432079792s to save to a 251.4MB file, pickle takes 106.701439142 to save to a 224.5MB file. 

In [53]:
cPFile = open(BASE_DIR"+video_info[0][0]+".cp", 'rb')
t = time.time()
cP = cPickle.load(cPFile)
print(time.time()-t)
cPFile.close()
PFile = open("/Users/baroc/repos/VideoDetection/"+video_info[0][0]+".p", 'rb')
t = time.time()
P = cPickle.load(PFile)
print(time.time()-t)
PFile.close()

114.192363024
131.941593885


pickle load .cp 118.874449968 .p 129.021311998
cpickle load .cp 114.192363024 .p 131.941593885

Here I would like to try highest protocol

In [55]:
# protocol=pickle.HIGHEST_PROTOCOL
t = time.time()
cPickle.dump( node_list, open( "/Users/baroc/repos/VideoDetection/"+video_info[0][0]+".cp", "wb" ), protocol=pickle.HIGHEST_PROTOCOL )
print(time.time()-t)
t = time.time()
pickle.dump( node_list, open( "/Users/baroc/repos/VideoDetection/"+video_info[0][0]+".p", "wb" ), protocol=pickle.HIGHEST_PROTOCOL )
print(time.time()-t)

68.3370029926
189.640047789


with highest protocal, cpickle take 68.3370029926s to save the video to a 122.5MB file, pickle take 189.640047789s to save to a 122.5MB file

In [57]:
cPFile = open("/Users/baroc/repos/VideoDetection/"+video_info[0][0]+".cp", 'rb')
t = time.time()
cP = cPickle.load(cPFile)
print(time.time()-t)
cPFile.close()
PFile = open("/Users/baroc/repos/VideoDetection/"+video_info[0][0]+".p", 'rb')
t = time.time()
P = cPickle.load(PFile)
print(time.time()-t)
PFile.close()

124.124447107
129.871397972


Loading cP: 124.124447107 .p: 129.871397972

In [58]:
len(P[2].trajectories)

17989

### Final pickle

import cPickle
t = time.time()
cPickle.dump( node_list, open( BASE_DIR+video_info[0][0]+".p", "wb" ), protocol=cPickle.HIGHEST_PROTOCOL )
print(time.time()-t) # 14.753661871

In [32]:
cPFile = open(BASE_DIR+video_info[0][0]+".p", 'rb')
t = time.time()
cP = cPickle.load(cPFile)
print(time.time()-t)
cPFile.close()

0.0107200145721


14.753661871 to save, 131.248260021 to load for original heavy implementation, about 122 mb

0.0320661067963 to save, 0.0107200145721 to load for new light implementation, only 9 mb

### Computing the histogram of a node
#### Test histogram

In [52]:
hist_list = []
hist_hog = np.histogram(node_list[0].features['hog'],4000,(0,4000))[0]
hist_hof = np.histogram(node_list[0].features['hof'],4000,(0,4000))[0]
hist_mbh = np.histogram(node_list[0].features['mbh'],4000,(0,4000))[0]
hist_traj = np.histogram(node_list[0].features['traj_idx'],4000,(0,4000))[0]
# node_list[0].features['hog'] #2955, 3883, 2955, 2332, 2311, 2216,
# hist_feature = np.append(hist_hog, hist_hof)
# hist_feature = np.append(hist_feature, hist_mbh)
# hist_feature = np.append(hist_feature, hist_traj)
hist_feature = np.concatenate((hist_traj, hist_hog, hist_hof, hist_mbh))
len(hist_feature)
# type(hist_hog)
print(hist_feature[0:20])
print(hist_traj[0:20])

[ 6 20  3 14  9  2  9 17 12  6 26  0 13 15  1  0  3  2  8  0]
[ 6 20  3 14  9  2  9 17 12  6 26  0 13 15  1  0  3  2  8  0]


In [51]:
print(hist_feature[4000:4020])
print(hist_hog[0:20])

[ 2  1  8  3  3  0  0  0  0  0 17  0  4 16  0  0  0  0  0 14]
[ 2  1  8  3  3  0  0  0  0  0 17  0  4 16  0  0  0  0  0 14]


#### Final histogram
is in the code

**OMG, I should have just computed the histogram, fkr**

#### After finishing all the functions this will be the start point? Maybe
The program start point: 

In [None]:
USR_WIN = 150
USR_STEP = 100
mat_file_str="/Users/baroc/repos/VideoDetection/validation_set_meta/validation_set_meta/validation_set.mat"
mat = sio.loadmat(mat_file_str)
meta_array = mat['validation_videos'][0]
NUM_VIDEOS = 1010
for i in range(NUM_VIDEOS):
    video_info = meta_array[i]
# a structure of ([u'video_validation_0000102'], [u'Boxing Tips  - How to Punch a Boxing Bag.mpeg'], [u'BoxingPunchingBag'], [[17]], 
#    [[array([u'Punch'], dtype='<U5')]], [[71]], [u'NO'], [[152.84]], [[4582]], [[30]], [[180]], [[320]])
    video_name = video_info[0][0]
    duration_frame = video_info[8][0][0]
    fps = video_info[9][0][0]
    generateNode(video_info, USR_WIN, USR_STEP)

# New Task of Classification
* Mainly extract the nodes, and have a list of objects to represent the nodes.
* each node should contain as much information as possible
* 