# Data Reader for Integration

This Jupyter Notebook integrates data by extracting audio from `.wav` files and localization information from `.csv` files. The processed data is saved into an `.h5` file, preparing it for the training phase.


### 1. Check the structure of the auido file 

In [2]:
import wave
import numpy as np

def read_wav_file(file_path):
    # Open WAV file
    with wave.open(file_path, 'rb') as wav_file:
        # Get WAV file parameters
        n_channels = wav_file.getnchannels()  # Number of channels
        sample_width = wav_file.getsampwidth()  # Bytes per sample
        frame_rate = wav_file.getframerate()  # Sampling rate
        n_frames = wav_file.getnframes()  # Total number of frames
        audio_format = wav_file.getcomptype()  # Compression type (usually 'NONE' for no compression)

        # Read audio data
        raw_data = wav_file.readframes(n_frames)  # Read all frames
        audio_data = np.frombuffer(raw_data, dtype=np.int16)  # Convert to NumPy array
        
        # If multi-channel, split audio data into separate channels
        if n_channels > 1:
            audio_data = np.reshape(audio_data, (-1, n_channels))
        
        return audio_data

In [2]:
if __name__ == "__main__":
    # Replace with your WAV file path
    wav_file_path = r'C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\mic_dev\mic_dev\fold6_room1_mix087_ov2.wav'  
    audio_data = read_wav_file(wav_file_path)
    print(audio_data.shape)

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\grizi\\Desktop\\TUD\\year2\\thesis\\neural_network\\DoA_Net\\data\\mic_dev\\mic_dev\\fold6_room1_mix087_ov2.wav'

### 2. Check the structure of the csv file : read the data from the csv file and form the data as an array 

In [1]:
import csv
import numpy as np

def read_csv_file_to_numpy(file_path):
    # Open the CSV file
    with open(file_path, mode='r', newline='', encoding='utf-8') as csv_file:
        csv_reader = csv.reader(csv_file)
        
        # Read the header
        header = next(csv_reader)
        print(f"Header: {header}")
        
        # Read the rows and convert to NumPy array
        rows = []
        for row in csv_reader:
            rows.append(row)
        
        data = np.array(rows)
        print(f"Data Shape: {data.shape}")
        return header, data

if __name__ == "__main__":
    # Replace with your CSV file path
    csv_file_path = r'C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\metadata_dev\metadata_dev\fold1_room1_mix001_ov1.csv'
    header, data = read_csv_file_to_numpy(csv_file_path)
    
    # Print the first few rows
    print(data[:5])

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\grizi\\Desktop\\TUD\\year2\\thesis\\neural_network\\DoA_Net\\data\\metadata_dev\\metadata_dev\\fold1_room1_mix001_ov1.csv'

### 3. Take all the data with only one sound 

split the sound file first

In [4]:
import os
import shutil

def split_ov1_files_into_folders(src_folder, train_folder, val_folder, test_folder):
    # Create destination folders if they don't exist
    for folder in [train_folder, val_folder, test_folder]:
        if not os.path.exists(folder):
            os.makedirs(folder)
    
    # Iterate over all files in the source folder
    for filename in os.listdir(src_folder):
        # Check if the file ends with '_ov1' and move to the corresponding folder
        if filename.endswith('_ov1.wav'):
            if filename.startswith(('fold3', 'fold4', 'fold5', 'fold6')):
                shutil.move(os.path.join(src_folder, filename), os.path.join(train_folder, filename))
            elif filename.startswith('fold2'):
                shutil.move(os.path.join(src_folder, filename), os.path.join(val_folder, filename))
            elif filename.startswith('fold1'):
                shutil.move(os.path.join(src_folder, filename), os.path.join(test_folder, filename))

if __name__ == "__main__":
    # Source folder path
    src_folder = r'C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\mic_dev\mic_dev'
    # Destination folders path
    train_folder = r'C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\ov1_mic_dev\train'
    val_folder = r'C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\ov1_mic_dev\val'
    test_folder = r'C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\ov1_mic_dev\test'
    
    # Split files
    split_ov1_files_into_folders(src_folder, train_folder, val_folder, test_folder)

FileNotFoundError: [WinError 3] 系统找不到指定的路径。: 'C:\\Users\\grizi\\Desktop\\TUD\\year2\\thesis\\neural_network\\DoA_Net\\data\\mic_dev\\mic_dev'

then split the label file to training, testing and valid set

In [17]:
import os
import shutil

def split_csv_files_into_folders(src_folder, train_folder, val_folder, test_folder):
    # Create destination folders if they don't exist
    for folder in [train_folder, val_folder, test_folder]:
        if not os.path.exists(folder):
            os.makedirs(folder)
    
    # Iterate over all files in the source folder
    for filename in os.listdir(src_folder):
        # Check if the file ends with '.csv' and move to the corresponding folder
        if filename.endswith('_ov1.csv'):
            if filename.startswith(('fold3', 'fold4', 'fold5', 'fold6')):
                shutil.move(os.path.join(src_folder, filename), os.path.join(train_folder, filename))
            elif filename.startswith('fold2'):
                shutil.move(os.path.join(src_folder, filename), os.path.join(val_folder, filename))
            elif filename.startswith('fold1'):
                shutil.move(os.path.join(src_folder, filename), os.path.join(test_folder, filename))

if __name__ == "__main__":
    # Source folder path
    src_folder = r'C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\metadata_dev\metadata_dev'
    # Destination folders path
    train_folder = r'C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\csv_dev\train'
    val_folder = r'C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\csv_dev\val'
    test_folder = r'C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\csv_dev\test'
    
    # Split files
    split_csv_files_into_folders(src_folder, train_folder, val_folder, test_folder)

### 4. Align the sound data with the label data and save in a h5 file 

In [4]:
import os
import pandas as pd
import h5py

In [5]:


# Specify the folder path where the CSV files are located
folder_path = r"C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\csv_dev\train"
# Specify the path where the .h5 file will be saved
h5_file_path = r"C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\training_more.h5"

start = 1
wav_data = []

# Create an H5 file to store all the data
for file_name in os.listdir(folder_path):
    if file_name.endswith(".csv"):  # Ensure only .csv files are processed
        file_path = os.path.join(folder_path, file_name)

        # find the correlated wav file
        wav_path = file_path.replace('csv_dev', 'ov1_mic_dev').replace('csv', 'wav').replace('csv', 'wav')

        # get the audio data from the wav file
        audio_data = read_wav_file(wav_path)

        # Read the CSV file into a pandas DataFrame
        df = pd.read_csv(file_path, header=None)
        df_array = np.array(df)
        if start == 1:
            dir_data = df_array
            start = 0   
        else:
            dir_data = np.vstack((dir_data, df_array))
        
        # get the related audio data with respect to the ground truth labels
        for i in range(len(df_array)):
            index = int(df_array[i][0])  # Assuming the first column contains the index
            start_idx = int(index * 0.1 * 24000)  # Convert to integer index
            end_idx = start_idx + 2400  # The end index for the slice
            
            # Append the audio data slice to the wav_data list
            wav_data.append(audio_data[start_idx:end_idx, :])
            if audio_data[start_idx:end_idx, :].shape != (2400, 4):
                print(index,start_idx,end_idx)
                print(audio_data[start_idx:end_idx, :].shape)

wav_data = np.array(wav_data)
print(wav_data.shape)
print(dir_data.shape)

        


# Save the label data to the h5 file
with h5py.File(h5_file_path, 'w') as h5file:
    h5file.create_dataset('label', data=dir_data)
    h5file.create_dataset('audio', data=wav_data)


(98396, 2400, 4)
(98396, 5)


Save the test set to a h5 file 

In [13]:
# Specify the folder path where the CSV files are located
folder_path = r"C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\csv_dev\test"
# Specify the path where the .h5 file will be saved
h5_file_path = r"C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\test.h5"

start = 1
wav_data = []

# Create an H5 file to store all the data
for file_name in os.listdir(folder_path):
    if file_name.endswith(".csv"):  # Ensure only .csv files are processed
        file_path = os.path.join(folder_path, file_name)

        # find the correlated wav file
        wav_path = file_path.replace('csv_dev', 'ov1_mic_dev').replace('csv', 'wav').replace('csv', 'wav')

        # get the audio data from the wav file
        audio_data = read_wav_file(wav_path)

        # Read the CSV file into a pandas DataFrame
        df = pd.read_csv(file_path, header=None)
        df_array = np.array(df)
        if start == 1:
            dir_data = df_array
            start = 0   
        else:
            dir_data = np.vstack((dir_data, df_array))
        
        # get the related audio data with respect to the ground truth labels
        for i in range(len(df_array)):
            index = int(df_array[i][0])  # Assuming the first column contains the index
            start_idx = int(index * 0.1 * 24000)  # Convert to integer index
            end_idx = start_idx + 2400  # The end index for the slice
            
            # Append the audio data slice to the wav_data list
            wav_data.append(audio_data[start_idx:end_idx, :])
            if audio_data[start_idx:end_idx, :].shape != (2400, 4):
                print(index,start_idx,end_idx)
                print(audio_data[start_idx:end_idx, :].shape)

wav_data = np.array(wav_data)
print(wav_data.shape)
print(dir_data.shape)

        


# Save the label data to the h5 file
with h5py.File(h5_file_path, 'w') as h5file:
    h5file.create_dataset('label', data=dir_data)
    h5file.create_dataset('audio', data=wav_data)


(19151, 2400, 4)
(19151, 5)


Check the content of the train and test dataset 

In [8]:
import h5py
import numpy as np

# Path to the H5 file
h5_file_path = r"C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\training_more.h5"

# Open the H5 file
with h5py.File(h5_file_path, 'r') as h5file:
    # Iterate through each dataset in the H5 file
    for dataset_name in h5file:
        # Get the dataset
        dataset = h5file[dataset_name]
        if dataset_name == 'audio':
            audio_data = np.array(dataset)
        else:
            label_data = np.array(dataset)
        
        # Print the dataset name and its shape
        print(f"Dataset: {dataset_name}, Shape: {dataset.shape}")


Dataset: audio, Shape: (98396, 2400, 4)
Dataset: label, Shape: (98396, 5)


In [15]:
import h5py
import numpy as np

# Path to the H5 file
h5_file_path = r"C:\Users\grizi\Desktop\TUD\year2\thesis\neural_network\DoA_Net\data\test.h5"

# Open the H5 file
with h5py.File(h5_file_path, 'r') as h5file:
    # Iterate through each dataset in the H5 file
    for dataset_name in h5file:
        # Get the dataset
        dataset = h5file[dataset_name]
        if dataset_name == 'audio':
            audio_data = np.array(dataset)
        else:
            label_data = np.array(dataset)
        
        # Print the dataset name and its shape
        print(f"Dataset: {dataset_name}, Shape: {dataset.shape}")

Dataset: audio, Shape: (19151, 2400, 4)
Dataset: label, Shape: (19151, 5)


### 5. Extract the GCC-PHAT from the audio message

Define a function to calculate GCC-PHAT

In [1]:
from scipy.signal import fftconvolve

def gcc_phat(signal_i, signal_j, fs, max_tau=None, interp=1):
    """
    Compute the GCC-PHAT between two signals.
    Parameters:
        signal_i: np.ndarray
            Signal from microphone i.
        signal_j: np.ndarray
            Signal from microphone j.
        fs: int
            Sampling frequency of the signals.
        max_tau: float, optional
            Maximum delay (in seconds) to consider.
        interp: int, optional
            Interpolation factor for GCC-PHAT.
    Returns:
        gcc: np.ndarray
            GCC-PHAT values.
        tau: np.ndarray
            Time delays corresponding to the GCC-PHAT values.
    """
    n = signal_i.shape[0] + signal_j.shape[0] - 1
    n_fft = 2 ** int(np.ceil(np.log2(n)))

    # Compute FFT of both signals
    X_i = np.fft.rfft(signal_i, n=n_fft)
    X_j = np.fft.rfft(signal_j, n=n_fft)

    # Compute cross-power spectrum
    cross_power = X_i * np.conj(X_j)
    epsilon = 1e-10  
    cross_power /= (np.abs(cross_power) + epsilon)

    # Compute inverse FFT to get GCC
    gcc = np.fft.irfft(cross_power, n=n_fft)
    
    # Shift to center the peak
    max_shift = int(n_fft / 2)
    gcc = np.roll(gcc, max_shift)

    # Compute time delays
    tau = np.linspace(-max_shift / fs, max_shift / fs, num=n_fft)

    # Limit to max_tau if specified
    if max_tau:
        max_shift = int(fs * max_tau)
        gcc = gcc[n_fft // 2 - max_shift : n_fft // 2 + max_shift]
        tau = tau[n_fft // 2 - max_shift : n_fft // 2 + max_shift]

    return gcc, tau

In [16]:
print(wav_data.shape)
gcc_vectors_all = []
for k in range(audio_data.shape[0]):
    test_audio_data = audio_data[k,:,:]
    fs = 24000 
    gcc_vectors = []
    for i in range(4):
        for j in range(i+1,4):
            signal_i = test_audio_data[:,i] # get the data from the first microphone
            signal_j = test_audio_data[:,j] # get the data from the second microphone

            # # calculate GCC-PHAT
            gcc, tau = gcc_phat(signal_i, signal_j, fs)
            selected_gcc = gcc[len(gcc)//2 - 25 : len(gcc)//2 + 26]
            gcc_vectors.append(selected_gcc)

    gcc_vectors = np.array(gcc_vectors).T
    gcc_vectors_all.append(gcc_vectors)

gcc_vectors_all = np.array(gcc_vectors_all)
print(gcc_vectors_all.shape)

(19151, 2400, 4)
(19151, 51, 6)


Now we have already prepared all the data we need to train the model

In [17]:
# Open the H5 file
with h5py.File(h5_file_path, 'a') as h5file:
    h5file.create_dataset('gcc_vectors', data=gcc_vectors_all)


In [18]:
with h5py.File(h5_file_path, 'r') as h5file:
    # Iterate through each dataset in the H5 file
    for dataset_name in h5file:
        # Get the dataset
        dataset = h5file[dataset_name]
        
        # Print the dataset name and its shape
        print(f"Dataset: {dataset_name}, Shape: {dataset.shape}")

Dataset: audio, Shape: (19151, 2400, 4)
Dataset: gcc_vectors, Shape: (19151, 51, 6)
Dataset: label, Shape: (19151, 5)
