MIT License

Copyright (c) 2021 Taiki Miyagawa and Akinori F. Ebihara

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

# Train/Valid/Test Splitting (png files)
DATADIR/HMDB51clip79tvt and DATADIR/HMDB51clip200tvt will be created.

### HMDB51 Stat
- duration = 79
    - split1
        - Nums of clips in tr:va:te = 5335 : 524 : 2478
        - Nums of frames in tr:va:te = 421.465k : 41.396k : 195.762k
        - Num of clips unused (ID=0): 2347
        
- duration = 200
    - split1
        - Nums of clips in tr:va:te = 3565 : 328 : 1697
        - Num of frames in tr:va:te = 713.0k : 65.6k : 339.4k
        - Num skipped clips: 1660

In [1]:
from glob import glob
import os, shutil
from copy import copy
import statistics
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# User-defined params
DATADIR = "Define this first. E.g., /data/t-miyagawa"
duration = 200 # 79 or 200
splitnum = 1 # Official splitting. 1, 2, or 3.

In [2]:
# Paths
newtrdir = "{}/HMDB51clip{}tvt/train0{}".format(DATADIR, duration, splitnum) # train images are to be stored here.
newvadir = "{}/HMDB51clip{}tvt/valid0{}".format(DATADIR, duration, splitnum) # validation images are to be stored here.
newtedir = "{}/HMDB51clip{}tvt/test0{}".format(DATADIR, duration, splitnum) # test images are to be stored here.
allclippaths = sorted(glob("{}/HMDB51clip{}/*/*".format(DATADIR, duration))) # all images after clipping.

# Get train/val/test split lists
trtxt = "{}/HMDB51/labelstvt/trainlist0{}.txt".format(DATADIR, splitnum)
vatxt = "{}/HMDB51/labelstvt/validlist0{}.txt".format(DATADIR, splitnum)
tetxt = "{}/HMDB51/labelstvt/testlist0{}.txt".format(DATADIR, splitnum)

if not os.path.exists(newtrdir):
    os.makedirs(newtrdir)
if not os.path.exists(newvadir):
    os.makedirs(newvadir)
if not os.path.exists(newtedir):
    os.makedirs(newtedir)
    
with open(trtxt, mode="r") as f:
    trf = f.readlines()
    trf = [i[:-2] for i in trf]
        
with open(vatxt, mode="r") as f:
    vaf = f.readlines()
    vaf = [i[:-2] for i in vaf]
    
with open(tetxt, mode="r") as f:
    tef = f.readlines()
    tef = [i[:-2] for i in tef]
    
# """
# Returns:
#     newtrdir: A string. Name of the new training dataset directory.
#     newvadir: A string. Name of the new validaiton dataset directory.
#     newtedir: A string. Name of the new test dataset directory.
#     allclippaths: A list of string paths to all clipped files with length = duration.
#         Sorted in alphabetical order.
#         Len = Num of total clips after the re-clipping.
#         E.g., 
#             ['DATADIR/HMDB51clip79/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_0_cc01',
#              'DATADIR/HMDB51clip79/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_0_cc02',
#              'DATADIR/HMDB51clip79/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_0_cc03', ...]
#
#     trf: A list. Len = num of training clips (before re-clipped to length=duration).
#         Therefore, the total num of training data clips with length = duration is more than len(trf).
#         E.g.,
#             ['/April_09_brush_hair_u_nm_np1_ba_goo_0',
#              '/April_09_brush_hair_u_nm_np1_ba_goo_1',
#              '/April_09_brush_hair_u_nm_np1_ba_goo_2',
#              '/Aussie_Brunette_Brushing_Hair_II_brush_hair_u_nm_np1_ri_med_3',
#              '/Aussie_Brunette_Brushing_Hair_II_brush_hair_u_nm_np2_le_goo_0',
#              '/Aussie_Brunette_Brushing_Hair_II_brush_hair_u_nm_np2_le_goo_1',
#              '/Aussie_Brunette_Brushing_Hair_II_brush_hair_u_nm_np2_le_med_2',
#     vaf: A list. Validation set.
#     tef: A list. Test set.
# """

In [3]:
# Get clip paths of tr/va/te 
# It may take a few minutes.
trclippaths = []
vaclippaths = []
teclippaths = []

for _c, v in enumerate(allclippaths):
    # Verbose
    if ((_c + 1) % 1000 == 0) or _c == 0:
        print("Iter {}/{}".format(_c + 1, len(allclippaths) + 1))
        
    # Initialization for assertion
    _tmptr = 0
    _tmpva = 0
    _tmpte = 0
    
    # Pick out all train images
    for i in trf:
        if i in v:
            trclippaths.append(v)
            _tmptr = 1

    # Pick out all validation images
    for i in vaf:
        if i in v:
            vaclippaths.append(v)
            _tmpva = 1

    # Pick out all test images
    for i in tef:
        if i in v:
            teclippaths.append(v)
            _tmpte = 1
            
    if (_tmptr, _tmpva, _tmpte) == (0, 0, 0):
        print("Skipped because ID = 0: {}, {}".format(_c, v))
        continue
    assert _tmptr + _tmpva + _tmpte == 1, "Multi-hit is not impossible in principle: {}, {}, {}, {}".format(_tmptr, _tmpva, _tmpte, v)

print("Done.")
trclippaths = sorted(trclippaths)
vaclippaths = sorted(vaclippaths)
teclippaths = sorted(teclippaths)

Iter 1/7251
Skipped because ID = 0: 7, /data/t-miyagawa/HMDB51clip200/brush_hair/Aussie_Brunette_Brushing_Hair_II_brush_hair_u_nm_np1_ba_goo_4_cc01
Skipped because ID = 0: 8, /data/t-miyagawa/HMDB51clip200/brush_hair/Aussie_Brunette_Brushing_Hair_II_brush_hair_u_nm_np1_ba_goo_4_cc02
Skipped because ID = 0: 51, /data/t-miyagawa/HMDB51clip200/brush_hair/Brushing_Her_Hair__[_NEW_AUDIO_]_UPDATED!!!!_brush_hair_h_cm_np1_le_goo_3_cc01
Skipped because ID = 0: 61, /data/t-miyagawa/HMDB51clip200/brush_hair/Brushing_my_Long_Hair__February_2009_brush_hair_u_nm_np1_fr_goo_0_cc01
Skipped because ID = 0: 87, /data/t-miyagawa/HMDB51clip200/brush_hair/Chanta!____THAT_IS_long_blonde_hair!_brush_hair_u_cm_np1_le_goo_1_cc01
Skipped because ID = 0: 88, /data/t-miyagawa/HMDB51clip200/brush_hair/Chanta!____THAT_IS_long_blonde_hair!_brush_hair_u_cm_np1_le_goo_1_cc02
Skipped because ID = 0: 133, /data/t-miyagawa/HMDB51clip200/brush_hair/Slave_brush_my_hair_brush_hair_u_cm_np2_le_goo_0_cc01
Skipped because ID 

Skipped because ID = 0: 905, /data/t-miyagawa/HMDB51clip200/dive/50_FIRST_DATES_dive_f_cm_np1_ri_bad_35_cc01
Skipped because ID = 0: 906, /data/t-miyagawa/HMDB51clip200/dive/96_Meter_Bungy-Jump__Jauntal_02_dive_f_cm_np1_ri_bad_0_cc01
Skipped because ID = 0: 909, /data/t-miyagawa/HMDB51clip200/dive/BASE_JUMPING_COMPILATION_PART_3_AMAZING!!!!!!!_dive_f_cm_np1_fr_bad_5_cc01
Skipped because ID = 0: 910, /data/t-miyagawa/HMDB51clip200/dive/BASE_JUMPING_COMPILATION_PART_3_AMAZING!!!!!!!_dive_f_cm_np1_le_bad_0_cc01
Skipped because ID = 0: 915, /data/t-miyagawa/HMDB51clip200/dive/BASE_Jumping_Compilation_-_Brilliant_dive_f_cm_np1_le_bad_1_cc01
Skipped because ID = 0: 919, /data/t-miyagawa/HMDB51clip200/dive/BASE_Jumping_Compilation_-_Brilliant_dive_l_cm_np1_ba_bad_2_cc01
Skipped because ID = 0: 922, /data/t-miyagawa/HMDB51clip200/dive/BASE_Jumping_Compilation_-_Brilliant_dive_u_cm_np1_ri_bad_6_cc01
Skipped because ID = 0: 924, /data/t-miyagawa/HMDB51clip200/dive/Basejumping_Compilation_Part_3_

Skipped because ID = 0: 1667, /data/t-miyagawa/HMDB51clip200/fall_floor/THE_PROTECTOR_fall_floor_f_nm_np1_ba_bad_74_cc01
Skipped because ID = 0: 1668, /data/t-miyagawa/HMDB51clip200/fall_floor/THE_PROTECTOR_fall_floor_f_nm_np1_ba_bad_94_cc01
Skipped because ID = 0: 1676, /data/t-miyagawa/HMDB51clip200/fall_floor/THE_PROTECTOR_fall_floor_f_nm_np1_le_bad_67_cc01
Skipped because ID = 0: 1683, /data/t-miyagawa/HMDB51clip200/fall_floor/TheBoondockSaints_fall_floor_h_cm_np1_ri_bad_117_cc01
Skipped because ID = 0: 1685, /data/t-miyagawa/HMDB51clip200/fall_floor/TheBoondockSaints_fall_floor_u_cm_np1_fr_med_108_cc01
Skipped because ID = 0: 1686, /data/t-miyagawa/HMDB51clip200/fall_floor/TheBoondockSaints_fall_floor_u_cm_np1_fr_med_62_cc01
Skipped because ID = 0: 1689, /data/t-miyagawa/HMDB51clip200/fall_floor/TheBoondockSaints_fall_floor_u_nm_np1_fr_med_70_cc01
Skipped because ID = 0: 1692, /data/t-miyagawa/HMDB51clip200/fall_floor/TheLittleShopofHorrors_fall_floor_u_cm_np1_ri_med_10_cc01
Skipp

Skipped because ID = 0: 2493, /data/t-miyagawa/HMDB51clip200/jump/St__Louis_Goalkeeping__Academy_elite_training_jump_f_cm_np1_le_bad_6_cc01
Skipped because ID = 0: 2508, /data/t-miyagawa/HMDB51clip200/jump/St__Louis_Goalkeeping__Academy_elite_training_jump_f_nm_np1_ri_bad_2_cc01
Skipped because ID = 0: 2514, /data/t-miyagawa/HMDB51clip200/jump/THE_PROTECTOR_jump_f_cm_np1_ba_bad_14_cc01
Skipped because ID = 0: 2517, /data/t-miyagawa/HMDB51clip200/jump/THE_PROTECTOR_jump_f_nm_np1_ba_bad_11_cc01
Skipped because ID = 0: 2519, /data/t-miyagawa/HMDB51clip200/jump/THE_PROTECTOR_jump_f_nm_np1_ba_bad_90_cc01
Skipped because ID = 0: 2524, /data/t-miyagawa/HMDB51clip200/jump/TheBoondockSaints_jump_u_cm_np1_ri_bad_103_cc01
Skipped because ID = 0: 2526, /data/t-miyagawa/HMDB51clip200/jump/TheBoondockSaints_jump_u_nm_np1_fr_bad_73_cc01
Skipped because ID = 0: 2527, /data/t-miyagawa/HMDB51clip200/jump/ThePerfectScore_jump_u_cm_np1_fr_bad_4_cc01
Skipped because ID = 0: 2528, /data/t-miyagawa/HMDB51cli

Skipped because ID = 0: 2933, /data/t-miyagawa/HMDB51clip200/laugh/American_History_X_laugh_h_nm_np1_fr_med_14_cc01
Skipped because ID = 0: 2939, /data/t-miyagawa/HMDB51clip200/laugh/Best_Of_Skype_Laughter_Chain_laugh_h_nm_np1_fr_goo_13_cc01
Skipped because ID = 0: 2940, /data/t-miyagawa/HMDB51clip200/laugh/Best_Of_Skype_Laughter_Chain_laugh_h_nm_np1_fr_goo_14_cc01
Skipped because ID = 0: 2945, /data/t-miyagawa/HMDB51clip200/laugh/Best_Of_Skype_Laughter_Chain_laugh_h_nm_np1_fr_med_0_cc01
Skipped because ID = 0: 2946, /data/t-miyagawa/HMDB51clip200/laugh/Best_Of_Skype_Laughter_Chain_laugh_h_nm_np1_fr_med_0_cc02
Skipped because ID = 0: 2952, /data/t-miyagawa/HMDB51clip200/laugh/Best_Of_Skype_Laughter_Chain_laugh_h_nm_np1_fr_med_27_cc01
Skipped because ID = 0: 2977, /data/t-miyagawa/HMDB51clip200/laugh/Can_t_stop_laughing_laugh_u_cm_np1_fr_goo_1_cc01
Skipped because ID = 0: 2978, /data/t-miyagawa/HMDB51clip200/laugh/Can_t_stop_laughing_laugh_u_cm_np1_fr_goo_1_cc02
Skipped because ID = 0: 

Skipped because ID = 0: 3754, /data/t-miyagawa/HMDB51clip200/pushup/DC_Personal_Trainer_-_How_To_Do_A_Push-up_The_RIGHT_Way_pushup_f_nm_np1_ri_goo_1_cc01
Skipped because ID = 0: 3756, /data/t-miyagawa/HMDB51clip200/pushup/Day_19__100_Pushups_a_Day_Challenge!_(_Break_Your_Rules_)_pushup_f_nm_np1_fr_goo_1_cc01
Skipped because ID = 0: 3854, /data/t-miyagawa/HMDB51clip200/ride_bike/Fahrrad_fahren_mit_Albert_ride_bike_f_cm_np1_ba_med_3_cc01
Skipped because ID = 0: 3864, /data/t-miyagawa/HMDB51clip200/ride_bike/Justin_lernt_Fahrrad_fahren_ride_bike_f_cm_np1_fr_med_0_cc01
Skipped because ID = 0: 3901, /data/t-miyagawa/HMDB51clip200/ride_bike/Radfahren_um_die_Aggertalsperre_06_09_2009_ride_bike_f_cm_np2_ri_med_1_cc01
Skipped because ID = 0: 3927, /data/t-miyagawa/HMDB51clip200/ride_horse/ALeapToFreedom_ride_horse_f_nm_np1_ba_med_1_cc01
Skipped because ID = 0: 3937, /data/t-miyagawa/HMDB51clip200/ride_horse/Alifestyle_ride_horse_f_cm_np1_le_med_7_cc01
Skipped because ID = 0: 3938, /data/t-miyag

Skipped because ID = 0: 4570, /data/t-miyagawa/HMDB51clip200/shoot_ball/ReggieMillerTakesonThreeAverageGuysinaShootout_shoot_ball_u_nm_np1_ba_med_2_cc01
Skipped because ID = 0: 4573, /data/t-miyagawa/HMDB51clip200/shoot_ball/ReggieMillerTakesonThreeAverageGuysinaShootout_shoot_ball_u_nm_np1_ba_med_5_cc01
Skipped because ID = 0: 4576, /data/t-miyagawa/HMDB51clip200/shoot_ball/ReggieMillerTakesonThreeAverageGuysinaShootout_shoot_ball_u_nm_np1_ba_med_8_cc01
Skipped because ID = 0: 4608, /data/t-miyagawa/HMDB51clip200/shoot_bow/ArcheryFastShooting_shoot_bow_u_nm_np1_fr_med_0_cc01
Skipped because ID = 0: 4624, /data/t-miyagawa/HMDB51clip200/shoot_bow/ArcheryFastShooting_shoot_bow_u_nm_np1_fr_med_2_cc01
Skipped because ID = 0: 4625, /data/t-miyagawa/HMDB51clip200/shoot_bow/ArcheryFastShooting_shoot_bow_u_nm_np1_fr_med_3_cc01
Skipped because ID = 0: 4626, /data/t-miyagawa/HMDB51clip200/shoot_bow/ArcheryFastShooting_shoot_bow_u_nm_np1_fr_med_4_cc01
Skipped because ID = 0: 4630, /data/t-miyagaw

Skipped because ID = 0: 5439, /data/t-miyagawa/HMDB51clip200/somersault/Purzelbaum_bei_der_Aufstiegsfeier_somersault_f_cm_np1_ri_med_1_cc01
Skipped because ID = 0: 5441, /data/t-miyagawa/HMDB51clip200/somersault/Rush_Hour_4_Turnk_r_somersault_f_cm_np2_le_med_0_cc01
Skipped because ID = 0: 5444, /data/t-miyagawa/HMDB51clip200/somersault/Schwebebalken_Uni_07_08_somersault_f_cm_np1_le_med_1_cc01
Skipped because ID = 0: 5447, /data/t-miyagawa/HMDB51clip200/somersault/So_turnt_man_richtig!!!_somersault_f_cm_np1_fr_med_0_cc01
Skipped because ID = 0: 5450, /data/t-miyagawa/HMDB51clip200/somersault/Sport_LK_Bodenturnk_r_somersault_f_cm_np1_le_bad_3_cc01
Skipped because ID = 0: 5453, /data/t-miyagawa/HMDB51clip200/somersault/Sport_LK_Bodenturnk_r_somersault_f_cm_np3_ba_bad_0_cc01
Skipped because ID = 0: 5455, /data/t-miyagawa/HMDB51clip200/somersault/Turn_pr_fung_glaser_schule_somersault_f_cm_np1_le_med_1_cc01
Skipped because ID = 0: 5458, /data/t-miyagawa/HMDB51clip200/somersault/Turnen_Lisa_e

Skipped because ID = 0: 5863, /data/t-miyagawa/HMDB51clip200/sword/Medieval_Times_Knights_Battle_Sword_Fight_sword_f_cm_np2_le_med_1_cc01
Skipped because ID = 0: 5867, /data/t-miyagawa/HMDB51clip200/sword/Medieval_knight_fight_sword_f_cm_np1_ba_med_5_cc01
Skipped because ID = 0: 5868, /data/t-miyagawa/HMDB51clip200/sword/Medieval_knight_fight_sword_f_cm_np1_le_med_0_cc01
Skipped because ID = 0: 5869, /data/t-miyagawa/HMDB51clip200/sword/Medieval_knight_fight_sword_f_cm_np1_le_med_2_cc01
Skipped because ID = 0: 5875, /data/t-miyagawa/HMDB51clip200/sword/Medieval_sword_fight_sword_f_cm_np2_ba_med_3_cc01
Skipped because ID = 0: 5881, /data/t-miyagawa/HMDB51clip200/sword/Obi-Wan_Kenobi__Anakin_Skywalker_and_Yoda_vs__Count_Dooku_sword_u_cm_np2_ri_bad_0_cc01
Skipped because ID = 0: 5885, /data/t-miyagawa/HMDB51clip200/sword/Pirates_6_sword_f_cm_np2_ba_med_6_cc01
Skipped because ID = 0: 5888, /data/t-miyagawa/HMDB51clip200/sword/Return_of_the_King_4_sword_u_cm_np1_ba_med_1_cc01
Skipped becaus

Skipped because ID = 0: 6171, /data/t-miyagawa/HMDB51clip200/talk/Two_Towers_5_talk_h_nm_np1_fr_goo_5_cc01
Skipped because ID = 0: 6172, /data/t-miyagawa/HMDB51clip200/talk/Two_Towers_7_talk_h_cm_np1_fr_goo_0_cc01
Skipped because ID = 0: 6178, /data/t-miyagawa/HMDB51clip200/talk/jonhs_netfreemovies_holygrail_talk_h_nm_np1_fr_med_1_cc01
Skipped because ID = 0: 6179, /data/t-miyagawa/HMDB51clip200/talk/jonhs_netfreemovies_holygrail_talk_h_nm_np1_fr_med_1_cc02
Skipped because ID = 0: 6186, /data/t-miyagawa/HMDB51clip200/talk/jonhs_netfreemovies_holygrail_talk_u_nm_np1_le_med_17_cc01
Skipped because ID = 0: 6187, /data/t-miyagawa/HMDB51clip200/talk/jonhs_netfreemovies_holygrail_talk_u_nm_np1_le_med_17_cc02
Skipped because ID = 0: 6188, /data/t-miyagawa/HMDB51clip200/talk/jonhs_netfreemovies_holygrail_talk_u_nm_np1_le_med_17_cc03
Skipped because ID = 0: 6194, /data/t-miyagawa/HMDB51clip200/talk/prelinger_LetsPlay1949_talk_h_nm_np1_fr_goo_17_cc01
Skipped because ID = 0: 6196, /data/t-miyagaw

Skipped because ID = 0: 6730, /data/t-miyagawa/HMDB51clip200/walk/EVOLUTION_walk_f_nm_np1_fr_med_24_cc01
Skipped because ID = 0: 6731, /data/t-miyagawa/HMDB51clip200/walk/EVOLUTION_walk_f_nm_np1_ri_med_19_cc01
Skipped because ID = 0: 6732, /data/t-miyagawa/HMDB51clip200/walk/Eurotrip_walk_f_nm_np1_fr_med_5_cc01
Skipped because ID = 0: 6734, /data/t-miyagawa/HMDB51clip200/walk/Eurotrip_walk_u_cm_np1_fr_med_6_cc01
Skipped because ID = 0: 6735, /data/t-miyagawa/HMDB51clip200/walk/EverythingisIlluminated_walk_f_nm_np1_fr_med_1_cc01
Skipped because ID = 0: 6737, /data/t-miyagawa/HMDB51clip200/walk/Fellowship_2_walk_f_nm_np2_ri_bad_2_cc01
Skipped because ID = 0: 6738, /data/t-miyagawa/HMDB51clip200/walk/Fellowship_2_walk_u_nm_np2_fr_bad_3_cc01
Skipped because ID = 0: 6739, /data/t-miyagawa/HMDB51clip200/walk/Fellowship_6_walk_f_cm_np1_fr_med_11_cc01
Skipped because ID = 0: 6740, /data/t-miyagawa/HMDB51clip200/walk/Finding_Forrester_1_walk_f_nm_np1_ba_med_5_cc01
Skipped because ID = 0: 6741, 

In [4]:
# Nums clips in tr, va, and te.
print("Nums of clips in tr:va:te = {} : {} : {}".format(len(trclippaths), len(vaclippaths), len(teclippaths)))

# Num skipped clips
print("Num skipped clips: {}".format(len(allclippaths) - (len(trclippaths) + len(vaclippaths) + len(teclippaths))))

Nums of clips in tr:va:te = 3565 : 328 : 1697
Num skipped clips: 1660


In [5]:
# Get image paths of tr/va/te
trimgpaths = []
vaimgpaths = []
teimgpaths = []

for v in trclippaths:
    trimgpaths.extend(glob(v.replace("[", "[[").replace("]", "[]]").replace("[[", "[[]") + "/*.png"))
    
for v in vaclippaths:
    vaimgpaths.extend(glob(v.replace("[", "[[").replace("]", "[]]").replace("[[", "[[]") + "/*.png"))
    
for v in teclippaths:
    teimgpaths.extend(glob(v.replace("[", "[[").replace("]", "[]]").replace("[[", "[[]") + "/*.png"))
    
trimgpaths = sorted(trimgpaths)
vaimgpaths = sorted(vaimgpaths)
teimgpaths = sorted(teimgpaths)

assert len(trimgpaths) == len(trclippaths) * duration
assert len(vaimgpaths) == len(vaclippaths) * duration
assert len(teimgpaths) == len(teclippaths) * duration

print("Num of frames in tr:va:te = {}k : {}k : {}k".format(len(trimgpaths)/1000, len(vaimgpaths)/1000, len(teimgpaths)/1000))

# """
# Returns:
#     trimgpaths: A list with len = num of all frames in the training dataset.
#         E.g.,
#             ['DATADIR/UCF101clip50/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01_cc01/0001.png',
#              'DATADIR/UCF101clip50/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01_cc01/0002.png',
#              'DATADIR/UCF101clip50/ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01_cc01/0003.png',
#                ...]
#     vaimgpaths: A list with len = num of all frames in the validation dataset.
#     teimgpaths: A list with len = num of all frames in the test dataset.
# """

Num of frames in tr:va:te = 713.0k : 65.6k : 339.4k


## Copy Image Data

In [6]:
# Copy training images
for _c, oldpath in enumerate(trimgpaths):
    if (_c + 1) % 5000 == 0:
        print("Train set iter {} / {}: {}".format(_c + 1, len(trimgpaths), oldpath))

        # oldpath: 'DATADIR/HMDB51clip79/wave/prideandprejudice1_wave_f_nm_np1_ri_med_14_cc02/0079.png'
    imgname = oldpath[oldpath.rfind("/") + 1 :]
        # '0079.png'
    oldpath2 = oldpath[: oldpath.rfind("/")]
        # 'DATADIR/HMDB51clip79/wave/prideandprejudice1_wave_f_nm_np1_ri_med_14_cc02'
    clipname = oldpath2[oldpath2.rfind("/") + 1:]
        # 'prideandprejudice1_wave_f_nm_np1_ri_med_14_cc02'
    oldpath3 = oldpath2[:oldpath2.rfind("/")]
        # 'DATADIR/HMDB51clip79/wave'
    classname = oldpath3[oldpath3.rfind("/") + 1:]
        # 'wave'
    
    newdir = newtrdir + "/{}/{}".format(classname, clipname)
    if not os.path.exists(newdir):
        os.makedirs(newdir)

    newpath = newdir + "/{}".format(imgname)
    assert newpath[newpath[:newpath[:newpath.rfind("/")].rfind("/")].rfind("/"):] == oldpath[oldpath[:oldpath[:oldpath.rfind("/")].rfind("/")].rfind("/"):],\
    "{}\n{}".format(newpath[newpath[:newpath[:newpath.rfind("/")].rfind("/")].rfind("/"):], oldpath[oldpath[:oldpath[:oldpath.rfind("/")].rfind("/")].rfind("/"):])
    shutil.copy(oldpath, newpath)  

Train set iter 5000 / 713000: /data/t-miyagawa/HMDB51clip200/brush_hair/Brunette_Foxyanya_ultra_silky_long_hair_brushing_hairjob_brush_hair_h_cm_np2_ri_goo_6_cc01/0200.png
Train set iter 10000 / 713000: /data/t-miyagawa/HMDB51clip200/brush_hair/Brushing_my_waist_lenth_hair_brush_hair_u_nm_np1_ba_goo_0_cc02/0200.png
Train set iter 15000 / 713000: /data/t-miyagawa/HMDB51clip200/brush_hair/My_Hair_Routine_brush_hair_h_nm_np1_le_goo_0_cc02/0200.png
Train set iter 20000 / 713000: /data/t-miyagawa/HMDB51clip200/brush_hair/brushing_hair_2_brush_hair_h_nm_np1_ba_med_0_cc01/0200.png
Train set iter 25000 / 713000: /data/t-miyagawa/HMDB51clip200/brush_hair/indianrapunzels_com---silky_long_hair_brushing_brush_hair_u_cm_np2_ri_goo_1_cc01/0200.png
Train set iter 30000 / 713000: /data/t-miyagawa/HMDB51clip200/cartwheel/Cartwheel_Contest_cartwheel_f_cm_np1_fr_med_2_cc01/0200.png
Train set iter 35000 / 713000: /data/t-miyagawa/HMDB51clip200/cartwheel/Jessica_Heinz_-_Leverkusen_Cup_2006__floor_routine_c

Train set iter 305000 / 713000: /data/t-miyagawa/HMDB51clip200/kiss/BestKisses_kiss_h_nm_np2_le_med_3_cc01/0200.png
Train set iter 310000 / 713000: /data/t-miyagawa/HMDB51clip200/kiss/Best_Kisses_in_TV_Movies_1_3_kiss_u_nm_np2_le_goo_9_cc02/0200.png
Train set iter 315000 / 713000: /data/t-miyagawa/HMDB51clip200/kiss/Hangingbyamomentfavoritekisses_kiss_u_cm_np2_le_goo_8_cc01/0200.png
Train set iter 320000 / 713000: /data/t-miyagawa/HMDB51clip200/kiss/TVs_Best_Kisses_Top_50-_(40_to_31)_kiss_u_nm_np2_le_goo_6_cc02/0200.png
Train set iter 325000 / 713000: /data/t-miyagawa/HMDB51clip200/laugh/Best_Of_Skype_Laughter_Chain_laugh_h_nm_np2_fr_med_19_cc01/0100.png
Train set iter 330000 / 713000: /data/t-miyagawa/HMDB51clip200/laugh/Crazy_Drunk_Girl_Laugh_laugh_h_cm_np1_fr_goo_1_cc01/0200.png
Train set iter 335000 / 713000: /data/t-miyagawa/HMDB51clip200/laugh/Skype_Laughter_Chain_reaction_1_laugh_f_nm_np3_fr_med_2_cc02/0200.png
Train set iter 340000 / 713000: /data/t-miyagawa/HMDB51clip200/pick/

Train set iter 615000 / 713000: /data/t-miyagawa/HMDB51clip200/sword/Baddest_Fight_Scenes_EVER!_-_Kill_Bill__Vol___1_-_vs__Crazy_88_s_sword_f_nm_np6_ri_med_1_cc01/0200.png
Train set iter 620000 / 713000: /data/t-miyagawa/HMDB51clip200/sword/Fechten_mit_dem_langen_Schwert_sword_u_cm_np2_fr_med_4_cc01/0200.png
Train set iter 625000 / 713000: /data/t-miyagawa/HMDB51clip200/sword/Stage_Combat_4_-_Sword_Fight_sword_f_cm_np2_le_bad_1_cc01/0200.png
Train set iter 630000 / 713000: /data/t-miyagawa/HMDB51clip200/sword_exercise/Eishin-Ryu_Iaido_sword_exercise_f_cm_np1_ri_bad_7_cc01/0200.png
Train set iter 635000 / 713000: /data/t-miyagawa/HMDB51clip200/sword_exercise/Iaido_13_katas_sword_exercise_f_nm_np1_fr_bad_6_cc01/0200.png
Train set iter 640000 / 713000: /data/t-miyagawa/HMDB51clip200/sword_exercise/Sword_and_Targe_Solo_Exercises_sword_exercise_f_cm_np1_ri_med_7_cc01/0200.png
Train set iter 645000 / 713000: /data/t-miyagawa/HMDB51clip200/talk/Fellowship_6_talk_h_cm_np1_fr_goo_7_cc01/0200.pn

In [7]:
# Copy validation images
for _c, oldpath in enumerate(vaimgpaths):
    if (_c + 1) % 5000 == 0:
        print("Validation set iter {} / {}: {}".format(_c + 1, len(vaimgpaths), oldpath))

        # oldpath: 'DATADIR/HMDB51clip79/wave/prideandprejudice1_wave_f_nm_np1_ri_med_14_cc02/0079.png'
    imgname = oldpath[oldpath.rfind("/") + 1 :]
        # '0079.png'
    oldpath2 = oldpath[: oldpath.rfind("/")]
        # 'DATADIR/HMDB51clip79/wave/prideandprejudice1_wave_f_nm_np1_ri_med_14_cc02'
    clipname = oldpath2[oldpath2.rfind("/") + 1:]
        # 'prideandprejudice1_wave_f_nm_np1_ri_med_14_cc02'
    oldpath3 = oldpath2[:oldpath2.rfind("/")]
        # 'DATADIR/HMDB51clip79/wave'
    classname = oldpath3[oldpath3.rfind("/") + 1:]
        # 'wave'
    
    newdir = newvadir + "/{}/{}".format(classname, clipname)
    if not os.path.exists(newdir):
        os.makedirs(newdir)

    newpath = newdir + "/{}".format(imgname)
    assert newpath[newpath[:newpath[:newpath.rfind("/")].rfind("/")].rfind("/"):] == oldpath[oldpath[:oldpath[:oldpath.rfind("/")].rfind("/")].rfind("/"):],\
    "{}\n{}".format(newpath[newpath[:newpath[:newpath.rfind("/")].rfind("/")].rfind("/"):], oldpath[oldpath[:oldpath[:oldpath.rfind("/")].rfind("/")].rfind("/"):])
    shutil.copy(oldpath, newpath)  

Validation set iter 5000 / 65600: /data/t-miyagawa/HMDB51clip200/climb_stairs/Treppensteigen_climb_stairs_l_cm_np1_ba_med_1_cc01/0200.png
Validation set iter 10000 / 65600: /data/t-miyagawa/HMDB51clip200/eat/WeddingCrashers_eat_h_cm_np1_fr_med_4_cc01/0200.png
Validation set iter 15000 / 65600: /data/t-miyagawa/HMDB51clip200/golf/rory_hie_high_school__now_USC_golf_f_nm_np1_ri_goo_0_cc01/0200.png
Validation set iter 20000 / 65600: /data/t-miyagawa/HMDB51clip200/kick/kick__bruce_lee_v_s_japanese_kick_f_cm_np1_le_bad_19_cc01/0200.png
Validation set iter 25000 / 65600: /data/t-miyagawa/HMDB51clip200/pick/pick_up_trash_says_yeti_pick_f_cm_np1_le_med_2_cc01/0200.png
Validation set iter 30000 / 65600: /data/t-miyagawa/HMDB51clip200/punch/kick__bruce_lee_v_s_japanese_punch_u_cm_np1_ri_bad_2_cc01/0200.png
Validation set iter 35000 / 65600: /data/t-miyagawa/HMDB51clip200/run/metacafe_coolsoccer_run_f_cm_np10_le_bad_2_cc01/0200.png
Validation set iter 40000 / 65600: /data/t-miyagawa/HMDB51clip200/

In [8]:
# Copy test images
for _c, oldpath in enumerate(teimgpaths):
    if (_c + 1) % 5000 == 0:
        print("Test set iter {} / {}: {}".format(_c + 1, len(teimgpaths), oldpath))

        # oldpath: 'DATADIR/HMDB51clip79/wave/prideandprejudice1_wave_f_nm_np1_ri_med_14_cc02/0079.png'
    imgname = oldpath[oldpath.rfind("/") + 1 :]
        # '0079.png'
    oldpath2 = oldpath[: oldpath.rfind("/")]
        # 'DATADIR/HMDB51clip79/wave/prideandprejudice1_wave_f_nm_np1_ri_med_14_cc02'
    clipname = oldpath2[oldpath2.rfind("/") + 1:]
        # 'prideandprejudice1_wave_f_nm_np1_ri_med_14_cc02'
    oldpath3 = oldpath2[:oldpath2.rfind("/")]
        # 'DATADIR/HMDB51clip79/wave'
    classname = oldpath3[oldpath3.rfind("/") + 1:]
        # 'wave'
    
    newdir = newtedir + "/{}/{}".format(classname, clipname)
    if not os.path.exists(newdir):
        os.makedirs(newdir)

    newpath = newdir + "/{}".format(imgname)
    assert newpath[newpath[:newpath[:newpath.rfind("/")].rfind("/")].rfind("/"):] == oldpath[oldpath[:oldpath[:oldpath.rfind("/")].rfind("/")].rfind("/"):],\
    "{}\n{}".format(newpath[newpath[:newpath[:newpath.rfind("/")].rfind("/")].rfind("/"):], oldpath[oldpath[:oldpath[:oldpath.rfind("/")].rfind("/")].rfind("/"):])
    shutil.copy(oldpath, newpath)  

Test set iter 5000 / 339400: /data/t-miyagawa/HMDB51clip200/brush_hair/Chanta!____THAT_IS_long_blonde_hair!_brush_hair_u_cm_np1_fr_goo_2_cc01/0200.png
Test set iter 10000 / 339400: /data/t-miyagawa/HMDB51clip200/brush_hair/brushing_jrs_hair_brush_hair_u_cm_np2_le_goo_1_cc02/0200.png
Test set iter 15000 / 339400: /data/t-miyagawa/HMDB51clip200/cartwheel/Parada_de_mao_cartwheel_f_cm_np1_fr_med_3_cc01/0200.png
Test set iter 20000 / 339400: /data/t-miyagawa/HMDB51clip200/catch/Goalkeeper_Training_Day_#_2_catch_f_cm_np1_fr_bad_1_cc01/0200.png
Test set iter 25000 / 339400: /data/t-miyagawa/HMDB51clip200/chew/How_to_Do_Basic_Mime_Skills_-_How_to_Mime_Chewing_Gum_chew_h_cm_np1_fr_med_0_cc01/0200.png
Test set iter 30000 / 339400: /data/t-miyagawa/HMDB51clip200/clap/103_years_old_japanese_woman__Nao_is_clapping_with_piano_music_by_beethoven_clap_u_nm_np1_fr_med_0_cc01/0200.png
Test set iter 35000 / 339400: /data/t-miyagawa/HMDB51clip200/clap/The_Slow_Clap_clap_u_nm_np1_fr_med_14_cc01/0200.png
Te

Test set iter 305000 / 339400: /data/t-miyagawa/HMDB51clip200/sword_exercise/samurai_training_sword_exercise_f_cm_np1_fr_med_3_cc02/0200.png
Test set iter 310000 / 339400: /data/t-miyagawa/HMDB51clip200/talk/Prelinger_FamilyLi1949_talk_h_nm_np1_fr_goo_2_cc01/0200.png
Test set iter 315000 / 339400: /data/t-miyagawa/HMDB51clip200/throw/KnifeThrowingJackDaggerDemoReel_throw_f_nm_np1_fr_med_2_cc03/0200.png
Test set iter 320000 / 339400: /data/t-miyagawa/HMDB51clip200/turn/A_Beautiful_Mind_1_turn_h_nm_np1_ri_goo_10_cc01/0200.png
Test set iter 325000 / 339400: /data/t-miyagawa/HMDB51clip200/turn/Veoh_Alpha_Dog_2_turn_u_nm_np2_ri_med_18_cc01/0200.png
Test set iter 330000 / 339400: /data/t-miyagawa/HMDB51clip200/walk/Panic_in_the_Streets_walk_u_nm_np1_fr_med_4_cc01/0200.png
Test set iter 335000 / 339400: /data/t-miyagawa/HMDB51clip200/wave/Bush_Wave_vs__Obama_Wave_and_Which_Commander-in-chief_Salutes_the_Best__You_Decide_wave_u_cm_np1_fr_med_3_cc01/0200.png
