In [1]:
import glob, json, os, sys, time
from jsonmerge import merge
from shutil import copyfile

### Create Train and Validate Folders

In [2]:
now = time.strftime("%Y%m%d%H%M", time.localtime(int(time.time())))

print('Training Dataset:')
print(now)

Training Dataset:
201807250642


In [3]:
file_system = '/mnt'

input_frames_path = os.path.join(file_system, 'ntfisheriesstoreeastdev', 'labeledframes')

model_training_path = os.path.join(file_system, 'ntfisheriesstoreeastdev', 'modeltraining')

output_model_training_train_path = os.path.join(model_training_path, now, 'train')
output_model_training_validate_path = os.path.join(model_training_path, now, 'validate')

output_model_training_train_json = os.path.join(output_model_training_train_path, 'via_region_data_train.json')
output_model_training_validate_json = os.path.join(output_model_training_validate_path, 'via_region_data_validate.json')

In [4]:
os.makedirs(output_model_training_train_path)
os.makedirs(output_model_training_validate_path)

In [5]:
def print_list(l):
    for item in l:
        print(item)

In [6]:
folders = []

for dirpath, dirnames, filenames in os.walk(input_frames_path):
    if not dirnames:
        if len([file_path for file_path in glob.iglob(os.path.join(dirpath, '*.json'))]) > 0:
            folders.append(dirpath)

print('Folders containing labeled frames:')
print_list(folders)

Folders containing labeled frames:
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20160415/BICPB3-20160417-1.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB1-20161011-3.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB2-20161011-1.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB2-20161011-3.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB3-20161011-3.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BIH1B3-20161010-1.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BIH2B3-20161010-2.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BIH2B3-20161010-3.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BOH3B2-20161011-2.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Lorna/Lorna-20170705/LIS3B2-20170706-3.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Lorna/Lo

### Train and Validate Sets

In [7]:
folders_count = len(folders)
folders_train_count = int(folders_count * .8)

folders_train = folders[0:folders_train_count]
folders_validate = folders[folders_train_count:]

print('Folders being used to create Training dataset')
print_list(folders_train)

print('Folders being used to create Validation dataset')
print_list(folders_validate)

Folders being used to create Training dataset
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20160415/BICPB3-20160417-1.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB1-20161011-3.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB2-20161011-1.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB2-20161011-3.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB3-20161011-3.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BIH1B3-20161010-1.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BIH2B3-20161010-2.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BIH2B3-20161010-3.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BOH3B2-20161011-2.MP4
/mnt/ntfisheriesstoreeastdev/labeledframes/Lorna/Lorna-20170705/LIS3B2-20170706-3.MP4
Folders being used to create Validation 

### Train and Validate JSON

In [8]:
def load_json(file_path):
    if file_path:
        with open(file_path, 'r') as f:
            data = json.load(f)
            return data

In [9]:
def merge_json(folders):
    merged_json = {};

    for folder in folders:
        for file_path in glob.iglob(os.path.join(folder, '*.json')):
            json_file = load_json(file_path)

            if(merged_json == None):
                merged_json = json_file
            else:
                merged_json = merge(merged_json, json_file)

    return merged_json

In [10]:
json_train = merge_json(folders_train)

In [11]:
json_validate = merge_json(folders_validate)

In [12]:
with open(output_model_training_train_json, 'w') as f:
    json.dump(json_train, f)

In [13]:
with open(output_model_training_validate_json, 'w') as f:
    json.dump(json_validate, f)

### Train and Validate Data

In [14]:
def copy_frames(folders, output_folder_path):
    for folder in folders:
        print('Copying folder {0}'.format(folder))

        files = [file for file in os.listdir(folder) if not file.endswith(".csv") and not file.endswith(".db") and not file.endswith(".json")]

        for file_name in files:
            src = os.path.join(folder, file_name)
            dst = os.path.join(output_folder_path, file_name)
            copyfile(src, dst)

    print('Finished copying...')

In [15]:
copy_frames(folders_train, output_model_training_train_path)

Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20160415/BICPB3-20160417-1.MP4
Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB1-20161011-3.MP4
Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB2-20161011-1.MP4
Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB2-20161011-3.MP4
Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BICPB3-20161011-3.MP4
Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BIH1B3-20161010-1.MP4
Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BIH2B3-20161010-2.MP4
Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BIH2B3-20161010-3.MP4
Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Bathurst/Bathurst-20161010/BOH3B2-20161011-2.MP4
Copying folder /mnt/ntfisheriesstoree

In [16]:
copy_frames(folders_validate, output_model_training_validate_path)

Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Lorna/Lorna-20170705/LOS5B2-20170705-3.MP4
Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Lorna/Lorna-20170817/LIS2B4-20170817-1.MP4
Copying folder /mnt/ntfisheriesstoreeastdev/labeledframes/Lorna/Lorna-20171030/LIS2B3-20171030-4.MP4
Finished copying...
