In [21]:
import datetime
import h5py
import librosa
import numpy as np
import os
import pandas as pd
import soundfile as sf
import sys
import time

sys.path.append('../src') #                                    DISABLE
import localmodule



# Define constants.
data_dir = localmodule.get_data_dir()
dataset_name = localmodule.get_dataset_name()
# args = sys.argv[1:]                                           ENABLE
args = ["unit01"] #                                             DISABLE
unit_str = args[0]
logmelspec_settings = localmodule.get_logmelspec_settings()
n_clips_per_chunk = 1000


# Print header.
start_time = int(time.time())
print(str(datetime.datetime.now()) + " Start.")
print("Computing log-mel-spectrograms (logmelspec) for full " + dataset_name + ".")
print("Unit: " + unit_str + ".")
print("")
print("h5py version: {:s}".format(h5py.__version__))
print("librosa version: {:s}".format(librosa.__version__))
print("numpy version: {:s}".format(np.__version__))
print("pandas version: {:s}".format(pd.__version__))
print("soundfile version: {:s}".format(sf.__version__))
print("")


# Create HDF5 container of logmelspecs
full_logmelspec_name = "_".join([dataset_name, "full_logmelspec"])
full_logmelspec_dir = os.path.join(data_dir, full_logmelspec_name)
os.makedirs(full_logmelspec_dir, exist_ok=True)
out_name = unit_str
out_path = os.path.join(full_logmelspec_dir, out_name + ".hdf5")
os.system("rm " + out_path) #                                    DISABLE
out_file = h5py.File(out_path)


# Load GPS coordinates.
gps_name = "_".join([dataset_name, "gps-coordinates.csv"])
gps_path = os.path.join(data_dir, gps_name)
gps_df = pd.read_csv(gps_path)
gps_row = gps_df.loc[gps_df["Unit"] == unit_str].iloc[0]


# Load UTC starting times.
utc_name = "_".join([dataset_name, "utc-start-times.csv"])
utc_path = os.path.join(data_dir, utc_name)
utc_df = pd.read_csv(utc_path)
utc_row = utc_df.loc[utc_df["Unit"] == unit_str].iloc[0]


# Copy over metadata.
out_file["dataset_name"] = localmodule.get_dataset_name()
out_file["unit"] = unit_str
out_file["utc_start_time"] = utc_row["UTC"]
gps_group = out_file.create_group("gps_coordinates")
gps_group["latitude"] =  gps_row["Latitude"]
gps_group["longitude"] = gps_row["Longitude"]
settings_group = out_file.create_group("logmelspec_settings")
settings_group["fmax"] = logmelspec_settings["fmax"]
settings_group["fmin"] = logmelspec_settings["fmin"]
settings_group["hop_length"] = logmelspec_settings["hop_length"]
settings_group["n_fft"] = logmelspec_settings["n_fft"]
settings_group["n_mels"] = logmelspec_settings["n_mels"]
settings_group["sr"] = logmelspec_settings["sr"]
settings_group["win_length"] = logmelspec_settings["win_length"]
settings_group["window"] = logmelspec_settings["window"]

2017-08-06 18:01:33.096693 Start.
Computing log-mel-spectrograms (logmelspec) for full BirdVox-70k.
Unit: unit01.

h5py version: 2.6.0
librosa version: 0.5.1
numpy version: 1.13.1
pandas version: 0.20.3
soundfile version: 0.9.0



In [22]:
# Start HDF5 group for log-mel-spectrograms (logmelspec).
lms_group = out_file.create_group("logmelspec")

In [23]:
# Open
recordings_name = "_".join([dataset_name, "full-audio"])
recordings_dir = os.path.join(data_dir, recordings_name)
recording_name = unit_str + ".flac"
recording_path = os.path.join(recordings_dir, recording_name)
full_audio = sf.SoundFile(recording_path)
full_audio_length = len(full_audio)

In [92]:
sample_rate = localmodule.get_sample_rate()
lms_sample_rate = logmelspec_settings["sr"]
lms_hop_length = logmelspec_settings["hop_length"]
sample_float_step = 64 * lms_hop_length * sample_rate / lms_sample_rate

sample_start = np.round(0.5 * sample_float_step)
sample_stop = full_audio_length - np.round(0.5 * sample_float_step)
sample_range = np.arange(sample_start, sample_stop, sample_float_step)
sample_range = np.round(sample_range).astype('int')
padding_duration = 0.5 # in seconds
padding_length = int(np.round(padding_duration * sample_rate))
n_clips = len(sample_range)
n_chunks = int(np.ceil(n_clips / n_clips_per_chunk))

# for chunk_id in range(n_chunks):                                       ENABLE
chunk_id = 0 #                                                    DISABLE
first_clip_id = chunk_id * n_clips_per_chunk
last_clip_id = min((chunk_id+1) * n_clips_per_chunk, n_clips)
chunk_range = range(first_clip_id, last_clip_id)
chunk_sample_range = sample_range[chunk_range]
chunk_start = chunk_sample_range[0] - padding_length
chunk_stop = chunk_sample_range[-1] + padding_length


In [94]:
chunk_stop

2240002

In [86]:
logmelspec_settings["win_length"]

256

In [88]:
2048 / 32

64.0