# Generate .csv files for CNN

## Imports and Constants

In [4]:
import math
import os
from itertools import combinations

import numpy as np
from scipy.io import wavfile
import pandas as pd

from sklearn.preprocessing import OneHotEncoder

# Label resolution of angles
RESOLUTION = 10

# Number of samples to include while creating one ML feature
# having a sampling power that is a power of two makes the fourier
# conversion faster if it includes a power of 2
# duration_frame = (1/sample_rate) * frame_size = 128ms frames (10ms humn hearing resolution)
# sample rate = 16KHz and frame_size=sample = 2048
SAMPLES = 2048

# Determines the overlap of samples between consecutive features
STEP = 1024

# Training rooms dimensions
ROOMS = {
    'small' : np.array([4, 4, 3]),
    'medium' : np.array([6, 6, 3]),
    'large' : np.array([8, 8, 3])
}

# Testing rooms dimensions
TEST_ROOMS = {
    'small' : np.array([5, 5, 2]),
    'medium' : np.array([7, 7, 2]),
    'large' : np.array([9, 9, 2])
}

AUDIO_PATH = '..\\data\\half_circle\\0.001\\'

# Number of microphones
MICS_NUMBER = 2
MIC_COMBS = len(list(combinations(range(MICS_NUMBER), 2)))

## Utils

In [5]:
def gcc_phat(x_1, x_2, FS=16000, interp=1):    
    n = len(x_1) + len(x_2) - 1
    n += 1 if n % 2 else 0
    
    # Fourier transforms of the two signals
    X_1 = np.fft.rfft(x_1, n=n)
    X_2 = np.fft.rfft(x_2, n=n)
    
    # Normalize by the magnitude of FFT - because PHAT
    np.divide(X_1, np.abs(X_1), X_1, where=np.abs(X_1) != 0)
    np.divide(X_2, np.abs(X_2), X_2, where=np.abs(X_2) != 0)
    
    # GCC-PHAT = [X_1(f)X_2*(f)] / |X_1(f)X_2*(f)|
    # See Knapp and Carter (1976) for reference
    CC = X_1 * np.conj(X_2)
    cc = np.fft.irfft(CC, n=n * interp)
        
    # Maximum delay between a pair of microphones,
    # expressed in a number of samples.
    # 0.2 m is the distance between the micropones and 
    # 340 m/s is assumed to be the speed of sound.
    max_len = math.ceil(0.2 / 340 * FS * interp)
    
    # Trim the cc vector to only include a 
    # small number of samples around the origin
    cc = np.concatenate((cc[-max_len:], cc[:max_len+1]))
    
    # Return the cross correlation
    return cc


def compute_gcc_matrix(observation, fs, interp=1):    
    # Initialize a transformed observation, that will be populated with GCC vectors
    # of the observation
    transformed_observation = []

    # Compute GCC for every pair of microphones
    mic_1, mic_2 = [0, 1]
    x_1 = observation[:, mic_1]
    x_2 = observation[:, mic_2]

    gcc = gcc_phat(x_1, x_2, FS=fs, interp=interp)

    # Add the GCC vector to the GCC matrix
    transformed_observation.append(gcc)    
        
    return transformed_observation



def create_observations(wav_signals, fs, label, samples=1, step=1, resolution=RESOLUTION, interp=1):
    # Lists of observations and labels that will be populated
    X = []
    y = []
    rounded_label = round(label / resolution) * resolution
    if rounded_label == 360: rounded_label = 0
    
    # Loop through the signal frame and take subframes
    for i in range(0, len(wav_signals) - samples + 1, step):
        y.append(rounded_label)
        
        # Extract the observation from subframe
        observation = np.array(wav_signals[i : i + samples])
              
        # Transform observation into a GCC matrix
        transformed_observation = compute_gcc_matrix(observation, fs, interp=interp)
        X.append(transformed_observation)

    return X, y



def create_dataframe(subset, samples=20, step=5, resolution=RESOLUTION, interp=1):
    dataframes = []
    files = [f for f in os.listdir(AUDIO_PATH) if os.path.isfile(os.path.join(AUDIO_PATH, f)) and subset in f]

    # Loop through all WAVs
    for i, file in enumerate(files):
        if file[-3:] != 'wav':
            continue
    
        print(f'{subset} file {i+1}/{len(files)}', end='\r')
        
        path = os.path.join(AUDIO_PATH, file)
        fs, wav_signals = wavfile.read(path)

        label = int(file.split('_')[2])

        # Create observations from a given WAV file
        X_temp, y_temp = create_observations(wav_signals, fs, label, samples, step, resolution, interp=interp)

        cols = [
                f'mics{mic_1+1}{mic_2+1}_{i}' 
                    for mic_1, mic_2 in combinations(range(MICS_NUMBER), r=2) 
                        for i in range(np.shape(X_temp)[2])
            ]

        df = pd.DataFrame(data=np.reshape(X_temp, (len(X_temp), -1)), columns=cols)
        dist = int(file.split('_')[4])
        room = file.split('_')[6]
        df['dist'], df['room'] = dist, room
            
        # Add label column
        df['label'] = y_temp
        dataframes.append(df)
        
    return pd.concat(dataframes, ignore_index=True)



def one_hot_encode(encoder, y_train, y_test):    
    y_train = y_train.reshape(-1, 1)
    y_test = y_test.reshape(-1, 1)
    
    # One-hot encode training and testing labels
    enc = encoder.fit(y_train)
    y_train = enc.transform(y_train)
    y_test = enc.transform(y_test)
    
    return y_train, y_test



def create_whole_dataset(df_train, df_test, encoder):
    # Creates an entire dataset by extracting values from train and tests dataframes.
    # One-hot encodes the labels before returning.
    
    # Create train/test observations
    X_train = df_train.drop(columns=['dist', 'room', 'label']).values
    X_test = df_test.drop(columns=['dist', 'room', 'label']).values
    
    # Create train/test labels
    y_train, y_test = one_hot_encode(
        encoder, df_train['label'].values, df_test['label'].values)
    
    return X_train, y_train, X_test, y_test

## Generate dataframes for Test and Train

In [6]:
df_train = create_dataframe('train', samples=SAMPLES, step=STEP, resolution=RESOLUTION)
df_validation = create_dataframe('validation', samples=SAMPLES, step=STEP, resolution=RESOLUTION)
df_test = create_dataframe('test', samples=SAMPLES, step=STEP, resolution=RESOLUTION)

df_train.to_csv('../training_data/azimuth_train_dataset.csv')
df_validation.to_csv('../training_data/azimuth_validation_dataset.csv')
df_test.to_csv('../training_data/azimuth_test_dataset.csv')
print('Subsets csv generated!')

# Create numpy arrays with observations and one-hot labels
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
X_train, y_train, X_test, y_test = create_whole_dataset(df_train, df_test, encoder)

print(np.shape(X_train), np.shape(X_test), np.shape(y_train), np.shape(y_test))
pd.set_option('display.max_columns', 15)
df_train.head(5)

Subsets csv generated!
(57593, 21) (2166, 21) (57593, 19) (2166, 19)


Unnamed: 0,mics12_0,mics12_1,mics12_2,mics12_3,mics12_4,mics12_5,mics12_6,...,mics12_17,mics12_18,mics12_19,mics12_20,dist,room,label
0,0.036792,0.058609,0.04576,0.024997,-0.006422,0.051311,0.058871,...,-0.003219,-0.025594,0.003241,0.000271,100,large,0
1,0.067049,0.084604,0.015191,0.068866,0.054778,0.058599,0.082622,...,-0.016914,-0.031431,0.002551,-0.067137,100,large,0
2,0.126132,0.224345,0.046403,0.028237,0.02104,0.072457,0.125366,...,0.027638,0.01894,0.046008,0.0016,100,large,0
3,0.175253,0.254358,-0.059829,-0.002593,-0.039549,0.019071,0.220031,...,-0.024368,-0.033831,-0.009055,-0.010241,100,large,0
4,0.150964,0.253522,-0.054768,0.05243,-0.018582,0.051489,0.19852,...,0.008534,0.020387,0.036529,0.031182,100,large,0
