In [123]:
import json
import re

class fubot_config():
    class feat_group():
        class feat_type():
            lut_C = {'w': 1, 'x':2, 'y':3, 'z':4}
            lut_C_inv = list(lut_C.keys())
            lut_S = {'l': 1, 'w':2}
            lut_T = {'pos': 1, 'qrot':2, 'rot':3}

            def __init__(self, S, T, C):              
                self.S = self.lut_S[S]
                self.T = self.lut_T[T]
                self.C = self.lut_C[C]
                self.C_id = self.C - (1 if self.T == 2 else 2)
                self.valid = (self.S * self.T * self.C) > 0

            def getValue(self, v_local, v_world):
                return v_local[self.C_id] if self.S == 1 else v_world[self.C_id]

            def getPostFix(self):
                pt = 'p' if self.T == 1 else 'r'
                pc = self.lut_C_inv[self.C - 1]
                
                return f'_{pt}{pc}'

        def __init__(self, feature_config):
            self.name = None
            self.bone_name = None
            self.elements = []
            self.size = 0
            self.rgx_el = re.compile(r'(.+)\[(.+)\]_(.)')
            self.__parse__(feature_config)

        def __parse__(self, feature_config):
            self.name = feature_config[0]
            self.bone_name = feature_config[1] if len(feature_config[1]) > 0 else self.name

            #Feature Elements
            for f_el in feature_config[2]:
                m = self.rgx_el.match(f_el)
                g = m.groups()
                if len(g) is not 3:
                    print(f'Invalid feature element found. ({self.name} >> {f_el})')
                    continue

                for comp in g[1]:
                    ft = self.feat_type(g[2], g[0], comp)
                    if not ft.valid:
                        print(f'Invalid feature element found. ({self.name} >> {f_el} [{g[2]}, {g[0]}, {comp}])')
                        continue

                    self.elements.append(ft)
            
            self.size = len(self.elements)

    def __init__(self, config_path):
        self.features_output:self.feat_group = []
        self.features_output_header = []
        self.features_output_size = 0

        self.features_input:self.feat_group = []
        self.features_input_header = []
        self.features_input_size = 0
        self.params = None

        self.__loadConfig__(config_path)

    def __loadConfig__(self, config_path):
        with open(config_path) as f:
            self.params = json.loads(f.read())

        #Training Features
        #input
        features = self.params['training']['input_features']
        if features is not None:
            for feat in features:
                fg = self.feat_group(feat)
                self.features_input.append(fg)
                self.features_input_size += fg.size

                #Create Header
                for ft in fg.elements:
                    self.features_input_header.append(f'{fg.bone_name}{ft.getPostFix()}')

                

        #output
        features = self.params['training']['output_features']
        if features is not None:
            for feat in features:
                fg = self.feat_group(feat)
                self.features_output.append(fg)
                self.features_output_size += fg.size

                #Create Header
                for ft in fg.elements:
                    self.features_output_header.append(f'{fg.bone_name}{ft.getPostFix()}')

config = fubot_config('../samples/generator_config.json')
print(config.params)

{'fubot_root': '../../', 'output_root': '../Samples/output', 'output_name': 'dataset_6p', 'split': {'categories': ['train', 'val', 'test'], 'ratios': [0.65, 0.25, 0.1], 'min_chunk_size': 30, 'max_chunk_size': 300, 'seed': -1}, 'training': {'input_features': [['Head', '', ['pos[xyz]_w', 'qrot[wxyz]_w']], ['Hips', '', ['pos[xyz]_w', 'qrot[wxyz]_w']], ['LeftHand', '', ['pos[xyz]_w', 'qrot[wxyz]_w']], ['RightHand', '', ['pos[xyz]_w', 'qrot[wxyz]_w']], ['LeftToes', '', ['pos[xyz]_w', 'qrot[wxyz]_w']], ['RightToes', '', ['pos[xyz]_w', 'qrot[wxyz]_w']]], 'output_features': [['Hips', '', ['pos[xyz]_l', 'qrot[wxyz]_l']], ['Spine', '', ['qrot[wxyz]_l']], ['Chest', '', ['qrot[wxyz]_l']], ['UpperChest', '', ['qrot[wxyz]_l']], ['Neck', '', ['qrot[wxyz]_l']], ['Head', '', ['qrot[wxyz]_l']], ['LeftShoulder', '', ['qrot[wxyz]_l']], ['LeftUpperArm', '', ['qrot[wxyz]_l']], ['LeftLowerArm', '', ['qrot[wxyz]_l']], ['LeftHand', '', ['qrot[wxyz]_l']], ['RightShoulder', '', ['qrot[wxyz]_l']], ['RightUpperArm

In [140]:
import os
import json
import random
import datetime

def calculate_frames_and_chunks(num_frames, split, minC, maxC):
    f = int(num_frames * split)
    c = int(f / maxC)
    if f - (c * maxC) > minC:
        c += 1

    return f, c

def chunk_frames(num_frames, split_params):
    #read params
    min_chunk_size = split_params['min_chunk_size']
    max_chunk_size = split_params['max_chunk_size']
    ratios = split_params['split_ratios']
    categories = split_params['split_categories']
    num_categories = len(categories)

    sp_frames = [0] * num_categories
    sp_chunks = [0] * num_categories
    tot_frames = 0
    tot_chunks = 0
    rnd_distribution = list()
    for cat_id in range(num_categories):
        sp_frames[cat_id], sp_chunks[cat_id] = calculate_frames_and_chunks(num_frames, ratios[cat_id], min_chunk_size, max_chunk_size)
        tot_frames += sp_frames[cat_id]
        tot_chunks += sp_chunks[cat_id]

        rnd_distribution.extend([cat_id] * sp_chunks[cat_id])

    #store params
    split_params['sp_frames'] = sp_frames.copy()
    split_params['sp_chunks'] = sp_chunks
    split_params['tot_frames'] = tot_frames
    split_params['tot_chunks'] = tot_chunks

    random.shuffle(rnd_distribution)

    split_order = list()
    for rnd_id in rnd_distribution:
        chunkSize = max_chunk_size
        if sp_frames[rnd_id] < chunkSize:
            chunkSize = sp_frames[rnd_id]
        sp_frames[rnd_id] -= chunkSize
        
        split_order.append(f'{categories[rnd_id]}_{chunkSize}')

    return split_order, split_params

def split_source_file(split_params):
    #read params
    sample_name = split_params['sample_name']
    source_file = split_params['sample_source_path']
    output_root = split_params['sample_output']    

    with open(source_file) as f_in:
        #collect hierarchy
        hierarchy_str = ''

        while True:
            line = f_in.readline()
            hierarchy_str += line
            if line.startswith('MOTION'):
                break

        #split bvh motion
        source_num_frames = int(f_in.readline().split('    ')[1])
        split_key, split_params = chunk_frames(source_num_frames, split_params)
        frame_time = f_in.readline()

        split_cat_counter = {}
        for split_id, split_entry in enumerate(split_key):
            split_cat = split_entry.split('_')[0]
            num_frames = int(split_entry.split('_')[1])

            if split_cat in split_cat_counter:
                split_cat_counter[split_cat] += 1
            else:
                split_cat_counter[split_cat] = 0
            
            filepath = os.path.join(output_root, split_cat, 'bvh')
            os.makedirs(filepath, exist_ok=True)
            filepath = os.path.join(filepath, f'{sample_name}_{split_cat_counter[split_cat]}_b.bvh')
            #filepath = os.path.join(filepath, f'{sample_name}_{split_id}_b.bvh')

            with open(filepath, 'w') as f_out:
                f_out.write(hierarchy_str)
                f_out.write(f'Frames:    {num_frames}\n')
                f_out.write(frame_time)

                #Frames
                for _ in range(num_frames):
                    f_out.write(f_in.readline())

    return split_params

def create_sample_metafile(split_params, gds_meta):
    game_meta_root = os.path.join(split_params['sample_output'], 'meta')
    os.makedirs(game_meta_root, exist_ok=True)

    sample_data = {
        'name':split_params['sample_name'],
        'game_id':split_params['game_id'],
        'sample_id':split_params['sample_id'],
        'source_file':split_params['source_file_path'],
        'min_chunk_size':split_params['min_chunk_size'],
        'max_chunk_size':split_params['max_chunk_size'],
        'split_categories':split_params['split_categories'],
        'split_ratios':split_params['split_ratios'],
        'num_chunks':{
            'total':split_params['tot_chunks']
        },
        'num_frames':{
            'total':split_params['tot_frames']
        }
    }

    #update game ds meta
    gds_meta['sample_ids'].append(sample_data['name'])
    gds_meta['total_chunks']['total'] += split_params['tot_chunks']
    gds_meta['total_frames']['total'] += split_params['tot_frames']

    for cat_id, cat_name in enumerate(split_params['split_categories']):   
        sample_data['num_chunks'][cat_name] = split_params['sp_chunks'][cat_id]
        sample_data['num_frames'][cat_name] = split_params['sp_frames'][cat_id]

        #gds meta
        gds_meta['total_chunks'][cat_name] += split_params['sp_chunks'][cat_id]
        gds_meta['total_frames'][cat_name] += split_params['sp_frames'][cat_id]

    metafile_path = os.path.join(game_meta_root, split_params['sample_name'] + '.json')
    with open(metafile_path, 'w') as f:
        json.dump(sample_data, f, indent=2)

def chunk_dataset(config:fubot_config):
    #Random Seed
    rnd_seed = config.params['split']['seed']
    if rnd_seed < 0:
        rnd_seed = datetime.now()
    
    random.seed(rnd_seed)


    #Split Params
    sp = {
        'dataset_root': config.params['fubot_root'],
        'output_root': os.path.join(config.params['output_root'],config.params['output_name']),
        'split_categories':config.params['split']['categories'],
        'split_ratios':config.params['split']['ratios'],
        'min_chunk_size':config.params['split']['min_chunk_size'],
        'max_chunk_size':config.params['split']['max_chunk_size']
    }

    dataset_root = sp['dataset_root']
    meta_root_path = os.path.join(dataset_root, 'meta')
    game_ids = next(os.walk(meta_root_path))[1]

    def summary_dict():
        d = {'total':0}
        for c in sp['split_categories']:
            d[c] = 0

        return d

    def append_dicts(a, b):
        a['total'] += b['total']
        for c in sp['split_categories']:
            a[c] += b[c]

    ds_meta = {
        'name':config.params['output_name'],
        'sample_rate':30, #fixed for now
        'length':-1,
        'split_categories':sp['split_categories'],
        'split_ratios':sp['split_ratios'],
        'split_seed':rnd_seed,
        'min_chunk_size':sp['min_chunk_size'],
        'max_chunk_size':sp['max_chunk_size'],
        'total_chunks': summary_dict(),
        'total_frames': summary_dict(),
        'game_ids':list()
    }

    for game_id in game_ids:
        meta_game_root_path = os.path.join(meta_root_path, game_id)
        game_samples = next(os.walk(meta_game_root_path))[2]

        #update DS META
        game_ds_meta = {
            'name':game_id,
            'sample_ids': list(),
            'total_chunks':summary_dict(),
            'total_frames':summary_dict(),
        }
        ds_meta['game_ids'].append(game_ds_meta)

        for game_sample in game_samples:
            meta_sample_path = os.path.join(meta_game_root_path, game_sample)
            with open(meta_sample_path) as f:
                sample_json = json.loads(f.read())

            sp['sample_source_path'] = os.path.join(dataset_root, sample_json['source_file'])
            sp['source_file_path'] = sample_json['source_file']
            sample_name = os.path.basename(sample_json['source_file'])
            sample_name = os.path.splitext(sample_name)[0]
            sp['sample_name'] = sample_name
            sp['game_id'] = sample_json['game_id'].lower()
            sp['sample_id'] = sample_json['sample_id']
            sp['sample_output'] = os.path.join(sp['output_root'], sp['game_id'])

            sp = split_source_file(sp)
            #Generate Metafile
            create_sample_metafile(sp, game_ds_meta)

        append_dicts(ds_meta['total_chunks'], game_ds_meta['total_chunks'])
        append_dicts(ds_meta['total_frames'], game_ds_meta['total_frames'])

    ds_meta['length'] = str(datetime.timedelta(seconds=ds_meta['total_frames']['total']/ds_meta['sample_rate']))

    #Save DS Meta
    with open(os.path.join(sp['output_root'], config.params['output_name'] + '_meta.json'), 'w') as f:
        json.dump(ds_meta, f, indent=2)

config = fubot_config('../samples/generator_config.json')
chunk_dataset(config)
