In [3]:
import numpy as np
import os
import pandas as pd


data_dir = '/beegfs/vl1019/BirdVox_datasets/'
icassp_dir = os.path.join(data_dir, 'BirdVox-full-night_icassp2018')
in_csv_dir = os.path.join(icassp_dir, 'BirdVox-70k_annotations')
in_flac_dir = os.path.join(icassp_dir, 'BirdVox-70k_full-audio')

full_night_dir = os.path.join(data_dir, "BirdVox-full-night")
out_csv_dir = os.path.join(full_night_dir, "BirdVox-full-night_csv-annotations")
os.makedirs(out_csv_dir, exist_ok=True)
out_flac_dir = os.path.join(full_night_dir, "BirdVox-full-night_flac-audio")
os.makedirs(out_flac_dir, exist_ok=True)

units = ["unit" + str(n).zfill(2) for n in [1, 2, 3, 5, 7, 10]]

# Loop over units, i.e. sensors.
for unit_str in units:
    # Copy FLAC files.
    in_flac_path = os.path.join(in_flac_dir, unit_str + ".flac")
    out_flac_path = os.path.join(out_flac_dir, 
        "BirdVox-full-night_flac-audio_" + unit_str + ".flac")
    os.system(" ".join(["mv", in_flac_path, out_flac_path]))
    
    # Create simplified CSV files.
    unit_name = unit_str + ".txt"
    unit_path = os.path.join(in_csv_dir, unit_name)
    df_in = pd.read_csv(unit_path, '\t')
    if "Calls" in df_in:
        df_in = df_in[df_in["Calls"] != "alarm"]
    center_times = 0.5 * (df_in["Begin Time (s)"] + df_in["End Time (s)"])
    center_time_strs = ["{:9.3f}".format(t) for t in center_times]

    center_freqs = 0.5 * (df_in["Low Freq (Hz)"] + df_in["High Freq (Hz)"])
    center_freq_strs = ["{:5d}".format(int(np.round(f))) for f in center_freqs]

    df_out = pd.DataFrame({"Time (s)": center_time_strs, "Freq (Hz)": center_freq_strs},
        columns=["Time (s)", "Freq (Hz)"])
    out_csv_path = os.path.join(out_csv_dir,
        "BirdVox-full-night_csv-annotations_" + unit_str + ".csv")
    df_out.to_csv(out_csv_path, index=False)
    
    
# I mixed latitude and longitude in the preliminary version of the dataset.
# Here is a fix.
in_gps_path = os.path.join(icassp_dir, "BirdVox-70k_gps-coordinates.csv")
df_gps_in = pd.read_csv(in_gps_path)
df_gps_out = pd.DataFrame({
    "Unit": units,
    "Latitude": df_gps_in["Longitude"],
    "Longitude": df_gps_in["Latitude"]},
    columns = ["Unit", "Latitude", "Longitude"])
out_gps_path = os.path.join(full_night_dir, "BirdVox-full-night_gps-coordinates.csv")
df_gps_out.to_csv(out_gps_path, index=False)


# Copy UTC timestamps.
in_utc_path = os.path.join(icassp_dir, "BirdVox-70k_utc-start-times.csv")
out_utc_path = os.path.join(full_night_dir, 
    "BirdVox-full-night_utc-start-times.csv")
os.system(" ".join(["mv", in_utc_path, out_utc_path]));