In [1]:
import datetime
import glob
import numpy as np
import os
import soundfile as sf
import sys

sys.path.append(os.path.join("..", "src"))
import localmodule

In [2]:
data_dir = localmodule.get_data_dir()
dataset_name = localmodule.get_dataset_name()
dataset_wav_name = "_".join([dataset_name, "audio-clips"])
dataset_wav_dir = os.path.join(data_dir, dataset_wav_name)
original_dataset_wav_dir = os.path.join(dataset_wav_dir, "original")
concatenated_wav_name = "_".join([dataset_name, "concatenated-audio"])
concatenated_dir = os.path.join(data_dir, concatenated_wav_name)
if not os.path.exists(concatenated_dir):
    os.makedirs(concatenated_dir)

units = localmodule.get_units()
sr = 24000
clip_duration = 0.5
mid_clip_position = int(0.5 * sr * clip_duration)
truncated_clip_duration = 0.5
truncated_clip_length = int(sr * truncated_clip_duration)
clip_start = mid_clip_position - int(0.5 * truncated_clip_length)
clip_stop = mid_clip_position + int(0.5 * truncated_clip_length)

print(str(datetime.datetime.now()) + " Start")
print("")

for unit_str in units:
    in_unit_dir = os.path.join(original_dataset_wav_dir, unit_str)
    clip_paths = sorted(glob.glob(os.path.join(in_unit_dir, "*.wav")))
    clip_names = [os.path.split(clip_path)[1] for clip_path in clip_paths]
    print(str(datetime.datetime.now()) + " " + unit_str)
  
    negative_names = [name for name in clip_names if name[23] == "0"]
    n_negatives = len(negative_names)
    concatenated_negatives = []
    for clip_id in range(n_negatives):
        clip_name = negative_names[clip_id]
        clip_path = os.path.join(in_unit_dir, clip_name)
        clip, _ = sf.read(clip_path)
        truncated_clip = clip[range(clip_start, clip_stop)]
        concatenated_negatives.append(truncated_clip)
    concatenated_negatives = np.concatenate(concatenated_negatives)
    output_str = unit_str + "_negatives.wav"
    output_path = os.path.join(concatenated_dir, output_str)
    sf.write(output_path, concatenated_negatives, sr)
    print(str(datetime.datetime.now()) + " Finished negatives")

    positive_names = [name for name in clip_names if name[23] == "1"]
    n_positives = len(positive_names)
    concatenated_positives = []
    for clip_id in range(n_positives):
        clip_name = positive_names[clip_id]
        clip_path = os.path.join(in_unit_dir, clip_name)
        clip, _ = sf.read(clip_path)
        truncated_clip = clip[range(clip_start, clip_stop)]
        concatenated_positives.append(truncated_clip)
    concatenated_positives = np.concatenate(concatenated_positives)
    output_str = unit_str + "_positives.wav"
    output_path = os.path.join(concatenated_dir, output_str)
    sf.write(output_path, concatenated_positives, sr)
    print(str(datetime.datetime.now()) + " Finished positives")
    print("")

2017-07-28 13:50:36.221153 Start

2017-07-28 13:50:36.311076 unit01
2017-07-28 13:50:44.707109 Finished negatives
2017-07-28 13:50:52.888792 Finished positives

2017-07-28 13:50:53.033885 unit02
2017-07-28 13:51:06.383031 Finished negatives
2017-07-28 13:51:18.305327 Finished positives

2017-07-28 13:51:18.586122 unit03
2017-07-28 13:51:44.052889 Finished negatives
2017-07-28 13:52:06.869934 Finished positives

2017-07-28 13:52:07.031918 unit05
2017-07-28 13:52:21.341356 Finished negatives
2017-07-28 13:52:34.398722 Finished positives

2017-07-28 13:52:34.596440 unit07
2017-07-28 13:52:52.224234 Finished negatives
2017-07-28 13:53:08.374621 Finished positives

2017-07-28 13:53:08.584289 unit10
2017-07-28 13:53:25.721165 Finished negatives
2017-07-28 13:53:42.928726 Finished positives

