In [1]:
import json
import argparse
from os import listdir, mkdir
from os.path import isfile, join, exists

from pymo.preprocessing import *
from sklearn.pipeline import Pipeline
from pymo.parsers import BVHParser
from pymo.viz_tools import *


In [2]:
# parser = argparse.ArgumentParser()
# parser.add_argument('--transcript_dir', dest='transcript_dir',
#                     type=str, help='Directory for transcripts')
# parser.add_argument('--motion_dir', dest='transcript_dir',
#                     type=str, help='Directory for transcripts')
# parser.add_argument('--transcript_out_dir', dest='transcript_out_dir',
#                     type=str, help='Directory for transcripts')
# args = parser.parse_args()

# motion_dir = args.motion_dir
# transcript_dir = args.transcript_dir
# transcript_out_dir = args.transcript_out_dir


In [3]:
motion_dir = "../data/V1/allRec/"
transcript_dir = "../data/V1/allRecTranscripts/"
transcript_out_dir = "../data/V1/transcriptsProcessed/"

In [4]:
recording_files = [f for f in listdir(
    motion_dir) if isfile(join(motion_dir, f))]
recording_files.sort()
motion_files = [join(motion_dir, recording_files[i]) for i in range(len(recording_files))]
transcript_files = [join(transcript_dir, recording_files[i].replace(
    'bvh', 'json')) for i in range(len(recording_files))]


In [5]:
clip_size = 5 # ms

In [6]:
def string_to_float(str_time: str) -> float:
  return float(str_time[:-1])
string_to_float("507.300s")


507.3

In [7]:
def sectionize(transcript_files, clip_size, start_offset=0):
  all_transcripts = []
  for file_id in range(len(transcript_files)):
    with open(transcript_files[file_id]) as f:
      transcript = json.load(f)

    # Collect all sections into a list
    words = []
    for section_id in range(len(transcript)):
      section = transcript[section_id]
      if len(section["alternatives"]) != 1:
        print("\n\n\nBe careful, there are more than an alternative!\n\n\n")
      words_in_section = section["alternatives"][0]["words"]
      words += words_in_section
    
    # Sectionize the words
    if not exists(transcript_out_dir):
      mkdir(transcript_out_dir)
    
    words_sectioned = []
    current_clip_start = start_offset
    words_counter = 0

    # Set the sections empty until there the first word occurance
    first_detected_word_start = string_to_float(words[words_counter]["start_time"])
    while first_detected_word_start > current_clip_start:
      words_sectioned += [{"start": current_clip_start,
                          "end": current_clip_start + clip_size, "words": []}]
      current_clip_start += clip_size

    while words_counter < len(words):
      # If a words start is before section start but end is after the section start, we want to include that word in the section
      while words_counter > 1 and \
        current_clip_start < string_to_float(words[words_counter - 1]["end_time"]) < current_clip_start + clip_size:
        words_counter -= 1

      # Add all words in the section to a list
      words_in_section = []

      while words_counter < len(words) and string_to_float(words[words_counter]["start_time"]) < current_clip_start + clip_size:
        words_in_section.append(words[words_counter]["word"])
        words_counter += 1
      words_sectioned += [{"start": current_clip_start,
                          "end": current_clip_start + clip_size, "words": words_in_section}]
      current_clip_start += clip_size
    all_transcripts.append(words_sectioned)
  return all_transcripts

In [8]:
all_transcripts_0 = sectionize(transcript_files, clip_size, start_offset=0)
all_transcripts_2_5 = sectionize(transcript_files, clip_size, start_offset=2.5)

In [12]:
max_num_words_per_section = max([max([len(all_transcripts_0[j][i]['words']) for i in range(
    len(all_transcripts_0[j]))]) for j in range(len(all_transcripts_0))])
max_num_words_per_section
max_num_words_per_section2_5 = max([max([len(all_transcripts_2_5[j][i]['words']) for i in range(
    len(all_transcripts_2_5[j]))]) for j in range(len(all_transcripts_2_5))])
max_num_words_per_section, max_num_words_per_section2_5


(45, 38)

In [10]:
num_sections_total_0 = sum([len(all_transcripts_0[j])
                         for j in range(len(all_transcripts_0))])

num_sections_total_2_5 = sum([len(all_transcripts_2_5[j])
                         for j in range(len(all_transcripts_2_5))])
num_sections_total_0, num_sections_total_2_5


(2641, 2629)

In [11]:
with open(join(transcript_out_dir, f"processed_words_into_sections_{clip_size}s_offset{0}s.json"), "w") as f:
  json.dump(all_transcripts_0, f)
with open(join(transcript_out_dir, f"processed_words_into_sections_{clip_size}s_offset{2_5}s.json"), "w") as f:
  json.dump(all_transcripts_2_5, f)


In [None]:
data_pipe = Pipeline([
    ('param', MocapParameterizer('position')),
    ('rcpn', RootCentricPositionNormalizer()),
    ('delta', RootTransformer('abdolute_translation_deltas')),
    ('const', ConstantsRemover()),
    ('np', Numpyfier()),
    ('down', DownSampler(2)),
    ('stdscale', ListStandardScaler())
])
parsed_data_list = []
for motion_file in motion_files:
  parser = BVHParser()
  parsed_data = parser.parse(motion_file)
  parsed_data_list.append(parsed_data)
  break

In [None]:
piped_data = data_pipe.fit_transform([parsed_data])

In [None]:
np.save("./processed_motion", piped_data)