In [1]:
import datetime
import numpy as np
import os
import soundfile as sf
import sys

sys.path.append(os.path.join("..", "src"))
import paths

In [None]:
data_dir = paths.get_data_dir()
clips_dir = os.path.join(data_dir, "BirdVox-70k")
original_clips_dir = os.path.join(clips_dir, "original")
concatenated_dir = os.path.join(data_dir, "BirdVox-70k_concatenated")
if not os.path.exists(concatenated_dir):
    os.makedirs(concatenated_dir)

units = [1, 2, 3, 5, 7, 10]
n_units = len(units)
sr = 24000
clip_duration = 0.5
mid_clip_position = int(0.5 * sr * clip_duration)
truncated_clip_duration = 0.185
truncated_clip_length = int(sr * truncated_clip_duration)
clip_start = mid_clip_position - int(0.5 * truncated_clip_length)
clip_stop = mid_clip_position + int(0.5 * truncated_clip_length)

print(str(datetime.datetime.now()) + " Start")
print("")

for unit_id in range(n_units):
    unit = units[unit_id]
    unit_str = "unit" + str(unit).zfill(2)
    in_unit_dir = os.path.join(original_clips_dir, unit_str)
    clip_names = sorted(os.listdir(in_unit_dir))
    print(str(datetime.datetime.now()) + " Unit " + str(unit).zfill(2))
  
    negative_names = [name for name in clip_names if name[23] == "0"]
    n_negatives = len(negative_names)
    concatenated_negatives = []
    for clip_id in range(n_negatives):
        clip_name = negative_names[clip_id]
        clip_path = os.path.join(in_unit_dir, clip_name)
        clip, _ = sf.read(clip_path)
        truncated_clip = clip[range(clip_start, clip_stop)]
        concatenated_negatives.append(truncated_clip)
    concatenated_negatives = np.concatenate(concatenated_negatives)
    output_str = unit_str + "_negatives.wav"
    output_path = os.path.join(concatenated_dir, output_str)
    sf.write(output_path, concatenated_negatives, sr)
    print(str(datetime.datetime.now()) + " Finished negatives")

    positive_names = [name for name in clip_names if name[23] == "1"]
    n_positives = len(positive_names)
    concatenated_positives = []
    for clip_id in range(n_positives):
        clip_name = positive_names[clip_id]
        clip_path = os.path.join(in_unit_dir, clip_name)
        clip, _ = sf.read(clip_path)
        truncated_clip = clip[range(clip_start, clip_stop)]
        concatenated_positives.append(truncated_clip)
    concatenated_positives = np.concatenate(concatenated_positives)
    output_str = unit_str + "_positives.wav"
    output_path = os.path.join(concatenated_dir, output_str)
    sf.write(output_path, concatenated_positives, sr)
    print(str(datetime.datetime.now()) + " Finished positives")
    print("")

2017-07-21 14:59:18.118875 Start

2017-07-21 14:59:18.126544 Unit 01
2017-07-21 14:59:21.832480 Finished negatives
2017-07-21 14:59:25.588164 Finished positives

2017-07-21 14:59:25.599505 Unit 02
