# Pre-processing the Data

This script will be used to prepare and featurize the train and test splits from the Direction of Voice Dataset from the FIGLAB.

Training Data:
- Room setup 1
- All utterance types
- All speakers except speakers 2 and 9

Test Data:
- Room setup 2
- All utterance types
- Only speakers 2 and 9

In [5]:
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display

from IPython import display
from matplotlib import pyplot

In [6]:
# A simple wrapper class for (1-channel) audio data
# data is a 1-D NumPy array containing the data
# rate is a number expressing the samples per second
class Audio:
    def __init__(self, data, rate):
        self.data = data
        self.rate = rate
    def play(self):
        return display.Audio(self.data, rate=self.rate)
    def plot_wave(self):
        librosa.display.waveplot(self.data, sr=self.rate)
    def plot_spectrum(self):
        n_fft = int(self.rate / 20)
        D = librosa.amplitude_to_db(np.abs(librosa.stft(self.data, n_fft)), ref=np.max)
        librosa.display.specshow(D, y_axis='linear', sr=self.rate, hop_length=n_fft/4)
    @classmethod
    def fromfile(cls, fn):
        return cls(*librosa.load(fn, sr=None))

In [12]:
# Credit to Yihui Xiong, 2017, 
# https://github.com/xiongyihui/tdoa/blob/master/gcc_phat.py

def gcc_phat(sig, refsig, fs=1, max_tau=None, interp=16):
    '''
    This function computes the offset between the signal sig and the reference signal refsig
    using the Generalized Cross Correlation - Phase Transform (GCC-PHAT)method.
    '''
    
    # make sure the length for the FFT is larger or equal than len(sig) + len(refsig)
    n = sig.shape[0] + refsig.shape[0]

    # Generalized Cross Correlation Phase Transform
    SIG = np.fft.rfft(sig, n=n)
    REFSIG = np.fft.rfft(refsig, n=n)
    R = SIG * np.conj(REFSIG)

    cc = np.fft.irfft(R / np.abs(R), n=(interp * n))

    max_shift = int(interp * n / 2)
    if max_tau:
        max_shift = np.minimum(int(interp * fs * max_tau), max_shift)

    cc = np.concatenate((cc[-max_shift:], cc[:max_shift+1]))

    # find max cross correlation index
    shift = np.argmax(np.abs(cc)) - max_shift

    tau = shift / float(interp * fs)
    
    return tau, cc

In [31]:
def get_recordings(subject_nums, trial_num):
    data_dir = os.path.join(os.path.abspath('.'), 'data/raw/')
    subject_names = ['s' + str(i) for i in subject_nums]
    trial_names = [f'{s}_trial{trial_num}'
               for s in ['downstairs_nowall', 'downstairs_wall',
                         'upstairs_nowall', 'upstairs_wall']]
    angle_names = [pos[0] + angle[0] + '_' + pos[1] + '_' + angle[1]
                   for pos in [('A', '1'), ('B', '3'), ('C', '5')]
                   for angle in [(str(i), str(45*i)) for i in range(3)]]
    trial_paths = [os.path.join(s, s + '_' + t)
                   for s in subject_names
                   for t in trial_names]
    recording_paths = [os.path.join(data_dir, t, a) 
                       for t in trial_paths
                       for a in angle_names]
    rel_recording_paths = [(s, t, a) for s in subject_names
                                     for t in trial_names
                                     for a in angle_names]
    return(recording_paths, rel_recording_paths)

In [34]:
# Training
train_subjects = ['1', '3', '4', '5', '6', '7', '8', '10']
train_recording_paths, train_rel_recording_paths = get_recordings(train_subjects, 1)

# Testing
test_subjects = ['2', '9']
test_recording_paths, test_rel_recording_paths = get_recordings(test_subjects, 2)

In [35]:
col_names = ['sample', 'trial', 'angle_pos', 'dov',
             *[f'gccphat_{i}_{j}_{d}' for i in range(4) for j in range(i+1, 4) for d in ['maxshift', 'auc', 'peakval']],
             *[f'gccphatval_{i}_{j}_{k}' for i in range(4) for j in range(i+1, 4) for k in range(23)]]

In [44]:
def get_featurized_data(recording_paths, rel_recording_paths):
    df = pd.DataFrame(columns=col_names)
    for pth_ind in range(len(recording_paths)):
        for angle in map(lambda i:45*i, range(360//45)):
            channel_recordings = [os.path.join(recording_paths[pth_ind], f'recording{r}_{angle}_{i}.wav') for r in range(2) for i in range(1,5)]
            print(channel_recordings)
            audio_files = [Audio.fromfile(r) for r in channel_recordings]
            four_channels = [a.data for a in audio_files]
            data_row = {
                'sample': rel_recording_paths[pth_ind][0],
                'trial': rel_recording_paths[pth_ind][1],
                'angle_pos': rel_recording_paths[pth_ind][2],
                'dov': angle 
            }        
            for i in range(4):
                for j in range(i+1,4):
                    gcc_phat_data = gcc_phat(four_channels[i], four_channels[j], 
                                          fs = audio_files[0].rate, max_tau=0.236 * 1e-3, interp=1)
                    data_row[f'gccphat_{i}_{j}_peakval'] = gcc_phat_data[1][11]
                    data_row[f'gccphat_{i}_{j}_auc'] = np.sum(gcc_phat_data[1])
                    data_row[f'gccphat_{i}_{j}_maxshift'] = gcc_phat_data[0]
                    for k in range(23):
                        data_row[f'gccphatval_{i}_{j}_{k}'] = gcc_phat_data[1][k]
            data_rows.append(data_row)
            df = df.append(data_row, ignore_index=True)
    return df    

In [40]:
train_df = get_featurized_data(train_recording_paths, train_rel_recording_paths)
test_df = get_featurized_data(test_recording_paths, test_rel_recording_paths)



In [41]:
train_df

Unnamed: 0,sample,trial,angle_pos,dov,gccphat_0_1_maxshift,gccphat_0_1_auc,gccphat_0_1_peakval,gccphat_0_2_maxshift,gccphat_0_2_auc,gccphat_0_2_peakval,...,gccphatval_2_3_13,gccphatval_2_3_14,gccphatval_2_3_15,gccphatval_2_3_16,gccphatval_2_3_17,gccphatval_2_3_18,gccphatval_2_3_19,gccphatval_2_3_20,gccphatval_2_3_21,gccphatval_2_3_22
0,s1,downstairs_nowall_trial1,A0_1_0,0,0.000000,0.929901,0.918024,-0.000125,0.802662,-0.081439,...,0.000874,0.057902,0.000011,0.001541,-0.052348,0.000403,0.000849,0.023651,0.000048,0.001163
1,s1,downstairs_nowall_trial1,A0_1_0,45,0.000000,0.935565,0.861144,-0.000125,0.829486,-0.119773,...,0.000171,-0.050401,0.000652,0.000966,0.017136,0.000918,-0.000259,-0.035492,0.000443,0.000769
2,s1,downstairs_nowall_trial1,A0_1_0,90,0.000000,0.977008,0.580438,-0.000063,0.830345,0.062272,...,-0.000743,-0.007790,-0.000195,0.001031,0.055236,0.002341,-0.001265,-0.038105,-0.000551,0.000891
3,s1,downstairs_nowall_trial1,A0_1_0,135,0.000000,0.909018,0.372427,-0.000063,0.872800,0.160522,...,-0.001413,0.076345,-0.000457,0.001607,0.070554,0.003886,-0.000515,-0.046689,-0.001151,0.001109
4,s1,downstairs_nowall_trial1,A0_1_0,180,0.000000,0.897188,0.428891,-0.000063,0.823948,0.131519,...,-0.000334,0.157349,0.002033,0.001508,0.063899,-0.000289,0.000181,-0.036023,0.000971,0.000908
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2299,s10,upstairs_wall_trial1,C2_5_90,135,0.000063,0.812608,0.144442,0.000125,0.799162,0.163425,...,-0.001268,0.212161,-0.000079,0.004395,0.123927,0.001528,-0.001783,-0.000628,-0.000909,0.002311
2300,s10,upstairs_wall_trial1,C2_5_90,180,-0.000063,0.830793,0.129913,0.000125,0.827165,0.177814,...,0.003051,0.244270,0.001614,0.003537,0.125961,0.002035,0.001809,-0.020537,0.000163,0.000440
2301,s10,upstairs_wall_trial1,C2_5_90,225,0.000063,0.841011,0.106989,-0.000125,0.817855,0.114305,...,0.001666,0.232318,0.001054,0.003141,0.161984,0.003869,-0.001171,-0.018376,-0.000446,0.000373
2302,s10,upstairs_wall_trial1,C2_5_90,270,-0.000063,0.835862,0.125797,0.000125,0.805296,0.133513,...,0.001030,0.231768,0.001851,0.001683,0.138793,0.004217,-0.000450,-0.008545,-0.003352,0.001861


In [42]:
test_df

Unnamed: 0,sample,trial,angle_pos,dov,gccphat_0_1_maxshift,gccphat_0_1_auc,gccphat_0_1_peakval,gccphat_0_2_maxshift,gccphat_0_2_auc,gccphat_0_2_peakval,...,gccphatval_2_3_13,gccphatval_2_3_14,gccphatval_2_3_15,gccphatval_2_3_16,gccphatval_2_3_17,gccphatval_2_3_18,gccphatval_2_3_19,gccphatval_2_3_20,gccphatval_2_3_21,gccphatval_2_3_22
0,s2,downstairs_nowall_trial2,A0_1_0,0,0.000000,0.902283,0.832167,-0.000125,0.827140,-0.027040,...,0.000064,0.092199,0.000443,0.000384,-0.072380,7.316951e-04,-0.000119,0.036885,-0.000321,0.000839
1,s2,downstairs_nowall_trial2,A0_1_0,45,0.000000,0.896489,0.830898,-0.000125,0.814318,-0.094053,...,0.000500,-0.045744,0.000978,0.001385,0.026229,5.011663e-07,-0.000058,-0.049395,-0.000732,0.000847
2,s2,downstairs_nowall_trial2,A0_1_0,90,0.000000,0.891952,0.507027,-0.000063,0.784551,0.027260,...,-0.001323,-0.030926,-0.001853,0.003307,0.086846,3.034136e-03,-0.001703,-0.082512,0.000062,0.001197
3,s2,downstairs_nowall_trial2,A0_1_0,135,0.000000,0.911088,0.317716,-0.000063,0.858547,0.126692,...,0.000976,0.112668,0.001509,0.002284,0.083430,8.705843e-04,-0.001928,-0.076979,0.000095,-0.000177
4,s2,downstairs_nowall_trial2,A0_1_0,180,0.000000,0.862331,0.410316,-0.000125,0.891017,0.170486,...,-0.003373,0.179284,0.001355,0.004418,0.043853,1.890471e-03,-0.001668,-0.057378,-0.000405,0.002111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,s9,upstairs_wall_trial2,C2_5_90,135,0.000063,0.810723,0.087034,0.000125,0.803437,0.171247,...,0.003042,0.194054,0.002628,0.000793,0.158648,3.243046e-03,-0.000390,-0.022326,0.001453,0.001150
572,s9,upstairs_wall_trial2,C2_5_90,180,0.000063,0.800126,0.112726,0.000125,0.747502,0.116681,...,0.002551,0.237360,0.002304,0.001574,0.143316,1.079771e-03,0.001595,-0.037601,0.000989,0.000004
573,s9,upstairs_wall_trial2,C2_5_90,225,0.000063,0.845584,0.121036,0.000125,0.805607,0.135754,...,0.002647,0.194526,0.001566,0.001928,0.149883,1.839737e-03,0.001114,-0.012097,0.001042,0.001085
574,s9,upstairs_wall_trial2,C2_5_90,270,0.000063,0.861259,0.125405,-0.000125,0.861033,0.155843,...,0.004018,0.241440,0.001979,0.001108,0.122062,4.829201e-04,0.002040,-0.009795,0.001470,0.000012


In [43]:
train_df.to_csv('data/featurized/featurized_train_data.csv')
test_df.to_csv('data/featurized/featurized_test_data.csv')