In [129]:
from scipy.io import wavfile
import numpy as np
import os
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import h5py

In [22]:

dataset_path = "../datasets/tdoa_20201016/data"
experiment = "music_0014"


n_audio_files = len(glob(os.path.join(dataset_path,experiment,"*.wav")))

sounds = []
for i in range(n_audio_files):
    fs, temp = wavfile.read(os.path.join(dataset_path,experiment,"Track " + str(i+1) + ".wav"))
    sounds.append(temp)
sounds = np.stack(sounds)

#read gt
df = pd.read_csv(os.path.join(dataset_path, experiment, "gt_positions.csv")) # Note time-column = 0 when audio-recordings started

dims = ["x","y","z"]
time = df["time"]
senders = df[["speaker" + "_" +dim for dim in dims]].to_numpy()
receivers = np.zeros((n_audio_files,3))
for i in range(n_audio_files):
    for j,dim in enumerate(["x","y","z"]):
        temp = df["mic" + str(i+1) + "_" + dim]
        receivers[i,j] = temp[temp.notnull()].median()



In [125]:
#settings
downsampling_factor = 6 # going from 96 kHz to 16 kHz which models are trained on
chunk_length = 10000
max_freq = 2500 #component of fft, NOT in HZ

#constants
n_chunks = (sounds.shape[1]//downsampling_factor) // (chunk_length)
n_mics = sounds.shape[0]
n_pairs = n_mics*(n_mics-1)//2
speed_of_sound = 343
max_diff_meters = chunk_length*speed_of_sound/(2*fs)

# divide sound into chunks
chunks = sounds[:,::downsampling_factor][:,:n_chunks*chunk_length].reshape(n_mics, n_chunks, chunk_length)
chunk_times = np.array([downsampling_factor*chunk_length/fs*i for i in range(n_chunks)])

# time sync ground truth to chunks
sender_position_at_chunk = np.stack([np.interp(chunk_times,time,senders[:,i]) for i in range(3)]).T

# create all pairs
pairs = np.zeros(((chunks.shape[0]*(chunks.shape[0]-1))//2, chunks.shape[1], 4 ,max_freq))
pairs_gt = np.zeros(((chunks.shape[0]*(chunks.shape[0]-1))//2, chunks.shape[1]))
counter = 0
for mic1 in range(12):
    for mic2 in range(mic1+1,12):
        pairs[counter,:,0] = np.real(sp.fft.rfft(chunks[mic1])[:,:max_freq])
        pairs[counter,:,1] = np.real(sp.fft.rfft(chunks[mic2])[:,:max_freq])
        pairs[counter,:,2] = np.imag(sp.fft.rfft(chunks[mic1])[:,:max_freq])
        pairs[counter,:,3] = np.imag(sp.fft.rfft(chunks[mic2])[:,:max_freq])
        

        pairs_gt[counter] = (np.linalg.norm(sender_position_at_chunk - receivers[mic2],axis=1) - np.linalg.norm(sender_position_at_chunk - receivers[mic1],axis=1))
        counter += 1 
pairs = pairs.reshape(-1, 4, pairs.shape[3])
pairs_gt = pairs_gt.reshape(-1)

# filter chunks
keep_indx = []
for i in range(pairs.shape[0]):
    if not np.isnan(pairs[i]).any() and not np.isnan(pairs_gt[i]).any():
        keep_indx.append(i)
pairs = pairs[keep_indx]
pairs_gt = pairs_gt[keep_indx]





In [134]:
# store as dataset
dataset_path = "../datasets/music_0014_tdoa_test_data.hdf5"

with h5py.File(dataset_path,"w") as hdf5_file:
    
    X = hdf5_file.create_dataset("input", pairs.shape, dtype="f")
    Y = hdf5_file.create_dataset("gt", pairs_gt.shape, dtype="f")


    for i in range(pairs.shape[0]):
        X[i] = pairs[i]
        Y[i] = pairs_gt[i]
    
    X.attrs.create("dataset_type", "unpackaged")
    




In [141]:
with h5py.File(dataset_path,"r") as hdf5_file:
    print(hdf5_file["input"].attrs["dataset_type"] == "unpackaged")

True
