In [1]:
import os
import pandas as pd

path_dir = "label/UCF-101"

def new_csv(path_dir):
    '''
    [Args]
    path_dir : path to folder contain train.csv, val.csv and newClassInd.txt which hold labels of class remained

    [Result]
    Create new_train.csv and new_val.csv save at path_dir 

    '''
    # Read path
    train = os.path.join(path_dir,'train.csv')
    val = os.path.join(path_dir,'val.csv')
    new_class = os.path.join(path_dir,'newClassInd.txt')
    new_train = os.path.join(path_dir,'new_train.csv')
    new_val = os.path.join(path_dir,'new_val.csv')

    if not os.path.exists(new_train):
        open(new_train, 'w').close() 
    if not os.path.exists(new_val):
        open(new_val, 'w').close() 

    df_csv_train= pd.read_csv(train)
    df_csv_val = pd.read_csv(val)

    # Take lables to keep in new file csv
    with open(new_class,'r') as f:
        labels_keep = set(line.strip().split(' ')[1] for line in f)

    # Filter train.csv
    df_filtered_train = df_csv_train[df_csv_train['label'].isin(labels_keep)]
    df_filtered_train.to_csv(new_train, index=False)

    # Filter val.csv
    df_filtered_val = df_csv_val[df_csv_val['label'].isin(labels_keep)]
    df_filtered_val.to_csv(new_val, index=False)

new_csv(path_dir)




In [1]:
import glob


file = glob.glob('data/UCF-101-DB/*/*/n_frames')

In [4]:
temp = []

for f in file:
    with open(f) as p:
        temp.append(int(p.read()))

In [7]:
import numpy as np 

np.mean(temp)

186.6584084084084

In [1]:
import math
import random

class LoopPadding(object):

    def __init__(self, size):
        self.size = size

    def __call__(self, frame_indices):
        out = frame_indices

        for index in out:
            if len(out) >= self.size:
                break
            out.append(index)

        return out

class TemporalEvenCrop(object):

    def __init__(self, size, n_samples=1, percentage = .5):
        self.size = size
        self.n_samples = n_samples
        self.loop = LoopPadding(size)
        self.per = percentage

    def sampling(self, frame_count):

        frame_indices = [i for i in range(frame_count)]

        n_frames = int(len(frame_indices)*self.per)
        stride = max(
            1, math.ceil((n_frames - 1 - self.size) / (self.n_samples - 1)))
        
        # stride = 1

        out = []
        for begin_index in frame_indices[::stride]:
            if len(out) >= self.n_samples:
                break
            end_index = min(frame_indices[-1] + 1, begin_index + self.size)
            sample = list(range(begin_index, end_index))

            if len(sample) < self.size:
                out.append(self.loop(sample))
                break
            else:
                out.append(sample)

        return out

In [1]:
import os
import glob

def num_images(dataset_path,dst_path):

    if not os.path.exists(dst_path):
        os.makedirs(dst_path)

    for image_file in glob.glob(dataset_path+'/*/*/'):
        frames_count =0
        for files in os.listdir(image_file):
            if files.endswith('.jpg'):
                frames_count +=1

        if frames_count<=0:
            print('{} does not have any frames'.format(image_file))
            continue
        
        count_frame_path = os.path.join(dst_path,image_file.split('/',maxsplit=1)[-1])
        if not os.path.exists(count_frame_path):
            os.makedirs(count_frame_path)

        with open(os.path.join(count_frame_path,'n_frames'),'w') as dst_file:
            dst_file.write(str(frames_count))

num_images("UCF-101-JPG","UCF-101-JPG")

In [2]:
class SequenceSampler(object):
    def __init__(self, len_scale, frame_skip = 4, **kwags):

        assert  len_scale < frame_skip, "len_scale should be smaller than frame_skip"

        self.len_scale = len_scale
        self.frame_skip = frame_skip
        self.lenght_video = 187

    def sampling(self, video_len):
        
        start_index = 0

        end_index = int(self.lenght_video)

        offset = 0
        if end_index > video_len:
            offset = end_index - video_len

            indices = [i for i in range(start_index, video_len)]
        
            indices += [video_len - 1 for i in range(0, offset)]
        else:
            indices = [i for i in range(start_index, end_index)]

        # indices = [indices[i] for i in range(0, len(indices), self.frame_skip + 1)]
        # return indices
        indices_list = []
        for i in range(0, len(indices), self.frame_skip + self.len_scale):
            indices = []
            for j in range(self.len_scale):
                indices.append(i + j)
            indices_list.append(indices)

        return indices_list

In [5]:
class FixLenghtSampler(object):
    def __init__(self, lenght = 0, skip = 0):
        
        self.lenght = lenght
        self.skip = skip
        
        pass

    def sampling(self, video_len = None):
        indices_list = [i*(self.skip + 1) for i in range(0, self.lenght)]
        return indices_list

a = FixLenghtSampler(10, 2)
print(a.sampling())
a = FixLenghtSampler(10, 1)
print(a.sampling())
a = FixLenghtSampler(10, 0)
print(a.sampling())


[0, 3, 6, 9, 12, 15, 18, 21, 24, 27]
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [6]:
a = SequenceSampler(2, 4)
a.sampling(187)

[[0, 1],
 [6, 7],
 [12, 13],
 [18, 19],
 [24, 25],
 [30, 31],
 [36, 37],
 [42, 43],
 [48, 49],
 [54, 55],
 [60, 61],
 [66, 67],
 [72, 73],
 [78, 79],
 [84, 85],
 [90, 91],
 [96, 97],
 [102, 103],
 [108, 109],
 [114, 115],
 [120, 121],
 [126, 127],
 [132, 133],
 [138, 139],
 [144, 145],
 [150, 151],
 [156, 157],
 [162, 163],
 [168, 169],
 [174, 175],
 [180, 181],
 [186, 187]]

In [1]:
import csv
import os
import pandas as pd

def load_train_test(train_file, val_file):
    with open(train_file) as f:
        t_lines = f.readlines()
        t_lines = [line.rstrip() for line in t_lines]

    with open(val_file) as f:
        v_lines = f.readlines()
        v_lines = [line.rstrip() for line in v_lines]

    t_vids = []
    t_labels = []
    for line in t_lines:
        label, vid = line.split('/')
        vid = vid.split('.avi')[0]
        t_vids.append(vid)
        t_labels.append(label)

    v_vids = []
    v_labels = []
    for line in v_lines:
        label, vid = line.split('/')
        vid = vid.split('.avi')[0]
        v_vids.append(vid)
        v_labels.append(label)

    train_file = train_file.split('.')[0]+'.csv'
    val_file = val_file.split('.')[0]+'.csv'
    true_file = train_file.split('train')[0]+'truelist.csv'

    train_dict = {'label':t_labels,
                  'id':t_vids,
                  'split':['train' for _ in t_labels]}

    val_dict = {'label':v_labels,
                'id':v_vids,
                'split':['val' for _ in v_labels]}

    true_dict = {'label':t_labels + v_labels,
                'id':t_vids + v_vids,
                'split':['train' for _ in t_labels] + ['val' for _ in v_labels]}


    (pd.DataFrame.from_dict(data=train_dict).to_csv(train_file, header=True, sep=',', index=False))
    (pd.DataFrame.from_dict(data=val_dict).to_csv(val_file, header=True, sep=',', index=False))
    (pd.DataFrame.from_dict(data=true_dict).to_csv(true_file, header=True, sep=',', index=False))


file_pairs = [['label/UCF-101/trainlist01.txt','label/UCF-101/testlist01.txt'],
              ['label/UCF-101/trainlist02.txt','label/UCF-101/testlist02.txt'],
              ['label/UCF-101/trainlist03.txt','label/UCF-101/testlist03.txt']]
for f_pair in file_pairs:
    load_train_test(f_pair[0],f_pair[1])


In [15]:
import random

class Auto_skip(object):
    def __init__(self, num_frame, percentage = 0.5):
        self.num_frame = num_frame
        self.per = percentage

    def sampling(self, frame_count = 187):

        n_frames = int(frame_count*self.per)
        
        if n_frames >= self.num_frame:
            frames = random.sample(range(0, n_frames), self.num_frame)
            frames.sort()
        else:
            frames = [i for i in range(0, n_frames)]

            for i in range(n_frames, self.num_frame):
                frames.append(n_frames - 1)
        

        return frames

In [16]:
a = Auto_skip(10, 0.5)

In [26]:
a.sampling(25)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [87]:
import math, random
class LoopPadding(object):

    def __init__(self, size):
        self.size = size

    def __call__(self, frame_indices):
        out = frame_indices

        for index in out:
            if len(out) >= self.size:
                break
            out.append(index)

        return out

class TemporalRandomEvenCrop(object):

    def __init__(self, size, n_samples=1, percentage = .5):
        self.size = size
        self.n_samples = n_samples
        self.loop = LoopPadding(size)
        self.per = percentage

    def sampling(self, frame_count):


        n_frames = int(frame_count*self.per)
        frame_indices = [i for i in range(n_frames)]

        stride = 1
        if self.n_samples != 1:
            stride = max(
                1, math.ceil((n_frames - 1 - self.size) / (self.n_samples - 1)))
        print(frame_indices[::stride])
        # stride = 1

        out = []
        count = self.n_samples
        while count != 0:
            for begin_index in frame_indices[::stride]:
                count -= 1
                if len(out) >= self.n_samples:
                    break

                if stride != 1 and stride > self.size:
                    end_index = min(frame_indices[-1] + 1, begin_index + stride)
                else:
                    end_index = min(frame_indices[-1] + 1, begin_index + self.size)


                offset = end_index - begin_index
                if offset >= self.size:
                    sample = random.sample(range(begin_index, end_index), self.size)
                else:
                    sample = random.sample(range(begin_index, end_index), offset)

                sample.sort()

                if len(sample) < self.size:
                    out.append(self.loop(sample))
                    break
                else:
                    out.append(sample)

                if count == 0:
                    break

        return out

class TemporalEvenCrop(object):

    def __init__(self, size, n_samples=1, percentage = .5):
        self.size = size
        self.n_samples = n_samples
        self.loop = LoopPadding(size)
        self.per = percentage

    def sampling(self, frame_count):


        n_frames = int(frame_count*self.per)
        frame_indices = [i for i in range(n_frames)]

        stride = 1
        if self.n_samples != 1:
            stride = max(
                1, math.ceil((n_frames - 1 - self.size) / (self.n_samples - 1)))
        print(frame_indices[::stride])
        # stride = 1

        out = []
        count = self.n_samples
        while count != 0:
            for begin_index in frame_indices[::stride]:
                count -= 1
                if len(out) >= self.n_samples:
                    break

                if stride != 1 and stride > self.size:
                    end_index = min(frame_indices[-1] + 1, begin_index + stride)
                else:
                    end_index = min(frame_indices[-1] + 1, begin_index + self.size)


                offset = end_index - begin_index
                if offset >= self.size:
                    start = random.sample(range(begin_index, end_index - self.size + 1), 1)[0]
                    sample = list(range(start, start + self.size))
                else:
                    sample = random.sample(range(begin_index, end_index), offset)

                sample.sort()


                if len(sample) < self.size:
                    out.append(self.loop(sample))
                    break
                else:
                    out.append(sample)

                if count == 0:
                    break

        return out

In [88]:
a = TemporalRandomEvenCrop(16, 3, 0.5)
b = TemporalEvenCrop(16, 3, 0.5)

In [105]:
b.sampling(50)

[0, 4, 8, 12, 16, 20, 24]


[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
 [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
 [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]]

In [103]:
a.sampling(187)

[0, 38, 76]


[[3, 6, 8, 10, 11, 13, 14, 16, 19, 20, 22, 26, 33, 35, 36, 37],
 [38, 39, 40, 43, 46, 53, 54, 55, 58, 61, 62, 65, 68, 70, 73, 74],
 [76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 91, 92]]