In [1]:
import os
import numpy as np
import pandas as pd
# install also ffmpeg in the system (https://ffmpeg.org/download.html) for extracting the signal from the audio files.
import ffmpeg

## Extract Functions

In [2]:
def extract_audio_signal(file_path, start_sample=31380, end_sample=100000):
    """
    Extracts the audio signal from a file, crops it to a specified range, 
    and pads or truncates to ensure uniform length.
    
    Parameters:
        file_path (str): Path to the audio file.
        start_sample (int): The starting sample index for cropping the audio signal.
        end_sample (int): The ending sample index for cropping the audio signal.
        
    Returns:
        np.ndarray: A numpy array containing the cropped and padded audio signal.
    """
    try:
        # Use ffmpeg to extract the audio and convert it to wav format in memory
        out, _ = (
            ffmpeg
            .input(file_path)
            .output('pipe:', format='wav')
            .run(capture_stdout=True, capture_stderr=True)
        )

        # Convert the audio signal to a numpy array (assuming 16-bit PCM)
        signal = np.frombuffer(out, dtype=np.int16)
        
        # Crop the signal between the specified start and end samples
        signal_cropped = signal[start_sample:end_sample]

        # Check if the cropped signal is shorter than the desired end_sample length
        if len(signal_cropped) < (end_sample - start_sample):
            # Pad the signal with zeros if it is shorter
            signal_cropped = np.pad(signal_cropped, (0, (end_sample - start_sample) - len(signal_cropped)), 'constant')
        else:
            # Truncate the signal if it is longer
            signal_cropped = signal_cropped[:(end_sample - start_sample)]

        return signal_cropped

    except ffmpeg.Error as e:
        print(f"Error extracting audio signal from {file_path}: {e.stderr.decode()}")
        return None

In [3]:
def extract_audio_signal(file_path, start_sample=31380, end_sample=100000):
    """
    Extracts the audio signal from a file, crops it to a specified range, 
    and pads or truncates to ensure uniform length.
    
    Parameters:
        file_path (str): Path to the audio file.
        start_sample (int): The starting sample index for cropping the audio signal.
        end_sample (int): The ending sample index for cropping the audio signal.
        
    Returns:
        np.ndarray: A numpy array containing the cropped and padded audio signal.
    """
    try:
        # Use ffmpeg to extract the audio and convert it to wav format in memory
        out, _ = (
            ffmpeg
            .input(file_path)
            .output('pipe:', format='wav')
            .run(capture_stdout=True, capture_stderr=True)
        )

        # Convert the audio signal to a numpy array (assuming 16-bit PCM)
        signal = np.frombuffer(out, dtype=np.int16)
        
        # Crop the signal between the specified start and end samples
        signal_cropped = signal[start_sample:end_sample]

        # Check if the cropped signal is shorter than the desired end_sample length
        if len(signal_cropped) < (end_sample - start_sample):
            # Pad the signal with zeros if it is shorter
            signal_cropped = np.pad(signal_cropped, (0, (end_sample - start_sample) - len(signal_cropped)), 'constant')
        else:
            # Truncate the signal if it is longer
            signal_cropped = signal_cropped[:(end_sample - start_sample)]

        return signal_cropped

    except ffmpeg.Error as e:
        print(f"Error extracting audio signal from {file_path}: {e.stderr.decode()}")
        return None

In [4]:
def create_audio_dataset(base_path, class_dirs):
    """
    Creates a dataset of audio signals extracted from files organized in class directories.
    
    Parameters:
        base_path (str): Path to the base directory containing class subdirectories.
        class_dirs (list): List of class subdirectories, where each subdirectory contains audio files.
        
    Returns:
        np.ndarray: A 2D numpy array where each row represents an audio signal.
        np.ndarray: A 1D numpy array containing the class labels for each audio signal.
    """
    dataset = []
    labels = []
    
    for label, class_dir in enumerate(class_dirs):
        class_path = os.path.join(base_path, class_dir)
        audio_files = sorted(os.listdir(class_path))  # Organize files to ensure consistent order
        print(f"Processing {len(audio_files)} files in {class_dir}...")
        for audio_file in audio_files:
            audio_path = os.path.join(class_path, audio_file)
            # Extract the audio signal using the extract_audio_signal function
            signal = extract_audio_signal(audio_path)
            
            if signal is not None:
                dataset.append(signal)
                labels.append(label)  # Add the corresponding class label
    
    # Convert lists to numpy arrays
    dataset = np.array(dataset)
    labels = np.array(labels)
    
    return dataset, labels

In [5]:
def save_audio_dataset_to_csv(signals, labels, output_file):
    """
    Saves the audio dataset and labels to a CSV file.
    
    Parameters:
        signals (np.ndarray): A 2D numpy array where each row represents an audio signal.
        labels (np.ndarray): A 1D numpy array containing the class labels for each audio signal.
        output_file (str): The path to the output CSV file.
    """
    # Create a pandas DataFrame from the dataset and labels
    df = pd.DataFrame(signals)
    
    # Add a column for the labels
    df['label'] = labels
    
    # Save the DataFrame to a CSV file
    df.to_csv(output_file, index=False)

## Signal Extraction

In [6]:
base_path = './data/audios'  # base path for the audio files
class_dirs = ['abrir', 'fechar', 'ligar']  # class subdirectories

# Create the audio dataset
signals, labels = create_audio_dataset(base_path, class_dirs)

print(f"Dataset shape: {signals.shape}")  # Should be (45, length_signal)
print(f"Labels shape: {labels.shape}")    # Deve ser (45,)

Processing 15 files in abrir...
Processing 15 files in fechar...
Processing 15 files in ligar...
Dataset shape: (45, 68620)
Labels shape: (45,)


In [7]:
output_file = "data/audio_signals_dataset.csv"

# Save the audio dataset to a CSV file
save_audio_dataset_to_csv(signals, labels, output_file)

print(f"Dataset saved to {output_file}")

Dataset saved to data/audio_signals_dataset.csv
