## Approach B

In [5]:
import os
import torchaudio
import pandas as pd
import torchaudio.transforms as T # Import the transforms module

def load_and_preprocess_audios_from_folder(folder_path, target_sr=16000):
    """
    Load and normalize all audio files in a folder using torchaudio, extracting accent and gender from filename,
    and transforming the waveform to a spectrogram.

    Args:
        folder_path (str): Path to folder containing .wav files
        target_sr (int): Sampling rate

    Returns:
        pd.DataFrame: DataFrame with columns ['file_path', 'waveform' (spectrogram), 'accent', 'gender']
    """
    data = []
    # Initialize the Spectrogram transform
    spectrogram_transform = T.Spectrogram()

    for fname in os.listdir(folder_path):
        if fname.endswith('.wav'):
            file_path = os.path.join(folder_path, fname)
            # Load audio
            waveform, sr = torchaudio.load(file_path)
            # Normalize amplitude
            waveform = waveform / waveform.abs().max()
            
            # Apply STFT
            spectrogram = spectrogram_transform(waveform)

            # Extract accent and gender
            accent = int(fname[0])  # 1-5
            gender = fname[1]       # 'm' or 'f'
            data.append({
                'file_path': file_path,
                'waveform': spectrogram, # Store the spectrogram
                'accent': accent,
                'gender': gender
            })
    return pd.DataFrame(data)

df = load_and_preprocess_audios_from_folder("/Users/larsheijnen/DL/Train")
print(df.head())
#Size first waveform
print(df['waveform'].iloc[0].shape)

                                 file_path  \
0  /Users/larsheijnen/DL/Train/2m_9039.wav   
1  /Users/larsheijnen/DL/Train/4f_1887.wav   
2  /Users/larsheijnen/DL/Train/4f_9571.wav   
3  /Users/larsheijnen/DL/Train/1m_3736.wav   
4  /Users/larsheijnen/DL/Train/1m_3078.wav   

                                            waveform  accent gender  
0  [[[tensor(8.2519e-05), tensor(2.3516e-07), ten...       2      m  
1  [[[tensor(7.0271e-05), tensor(6.2840e-05), ten...       4      f  
2  [[[tensor(8.9346e-05), tensor(5.5403e-05), ten...       4      f  
3  [[[tensor(0.0017), tensor(0.0002), tensor(4.98...       1      m  
4  [[[tensor(0.0008), tensor(0.0003), tensor(0.00...       1      m  
torch.Size([1, 201, 208])
