# Raw audio data preprocessing

1.   Merging audio files in 1 destination folder (Merged audio files)
2.   Checking duration of audio files
3.   Standardized parameters and save in folder (Standardized)
4.   Cross check random samples and verify
5.   Check total audio samples and their length



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import librosa
import soundfile as sf
import numpy as np

# **Merge the contents of two audio files**

In [None]:
import shutil
import os

# Define the paths to the source folders and the destination folder
source_folder1 = '/content/drive/MyDrive/DL project/Fraiwan_Audio_Files'
source_folder2 = '/content/drive/MyDrive/DL project/ICBHI_audio_files'
destination_folder = '/content/drive/MyDrive/DL project/Merged and preprocessed audio/Merged audio files'

# Create the destination folder if it doesn't exist
os.makedirs(destination_folder, exist_ok=True)

# Iterate through files in the first source folder and copy them to the destination folder
for filename in os.listdir(source_folder1):
    source_path = os.path.join(source_folder1, filename)
    destination_path = os.path.join(destination_folder, filename)
    shutil.copy(source_path, destination_path)

# Iterate through files in the second source folder and copy them to the destination folder
for filename in os.listdir(source_folder2):
    source_path = os.path.join(source_folder2, filename)
    destination_path = os.path.join(destination_folder, filename)
    shutil.copy(source_path, destination_path)

print("Dataset folders combined successfully!")

Dataset folders combined successfully!


# Seee the duration of audio files

In [None]:
import librosa
import os
import pandas as pd

# Define the path to the folder containing audio files
audio_folder = destination_folder

# Define duration ranges and their labels
duration_ranges = [2, 5, 6, 9, 12, 15, 20, 25, 30, 50, 100, 150, 200, 250]
range_labels = ['<2s', '<5s', '<6s', '<9s', '<12s', '<15s', '<20s', '<25s', '<30s', '<50', '<100', '<150', '<200', '<250']

# Initialize a dictionary to store duration counts
duration_counts = {label: 0 for label in range_labels}

# Iterate through files in the folder
for filename in os.listdir(audio_folder):
    # Check if the file is an audio file (e.g., WAV, MP3)
    if filename.endswith(('.wav', '.mp3')):  # Add other audio extensions as needed
        # Construct the full file path
        file_path = os.path.join(audio_folder, filename)

        # Load the audio file using librosa
        try:
            audio_data, sample_rate = librosa.load(file_path, sr=None)  # Load with original sample rate
            duration = librosa.get_duration(y=audio_data, sr=sample_rate)

            # Find the appropriate duration range and increment the count
            for i, threshold in enumerate(duration_ranges):
                if duration < threshold:
                    duration_counts[range_labels[i]] += 1
                    break
        except Exception as e:
            print(f"Error processing file {filename}: {e}")

# Create a Pandas DataFrame for better visualization
df = pd.DataFrame(list(duration_counts.items()), columns=['Duration Range', 'Count'])

# Print the total number of audio samples
total_audio_samples = df['Count'].sum()
print(f"Total number of audio samples: {total_audio_samples}")

# Print the length-wise count (DataFrame)
print("\nLength-wise Count:")
print(df)

Total number of audio samples: 1234

Length-wise Count:
   Duration Range  Count
0             <2s      0
1             <5s      0
2             <6s      6
3             <9s      7
4            <12s     38
5            <15s    102
6            <20s    125
7            <25s    873
8            <30s     26
9             <50     31
10           <100     26
11           <150      0
12           <200      0
13           <250      0


**Standardize audio files with certain parameters**

In [None]:
#standardize audio files with fixed window length into 6sec audio clips, uniformly sampled at 4410HZ, 16bit depth and a stereo channel configuration.
# Define the path to the folder containing audio files
audio_folder = '/content/drive/MyDrive/DL project/Merged and preprocessed audio/Merged audio files'

# Define output folder for standardized clips
output_folder = '/content/drive/MyDrive/DL project/Merged and preprocessed audio/Standardized'

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Define target parameters
target_sr = 44100  # Sample rate (Hz)
target_duration = 6  # Duration in seconds
target_bit_depth = 16  # Bit depth
target_channels = 2  # Number of channels (stereo)

# Iterate through files in the folder
for filename in os.listdir(audio_folder):
    # Check if the file is an audio file (e.g., WAV, MP3)
    if filename.endswith(('.wav', '.mp3')):  # Add other audio extensions as needed
        # Construct the full file path
        file_path = os.path.join(audio_folder, filename)

        try:
            # Load audio file
            audio_data, sr = librosa.load(file_path, sr=target_sr, mono=False)  # Load with target sample rate, preserve channels

            # Ensure stereo channels (fix)
            if audio_data.ndim == 1:  # Mono audio
              audio_data = np.tile(audio_data, (2, 1))  # Duplicate mono to stereo
            elif audio_data.ndim > 2:  # More than 2 channels
              audio_data = audio_data[:2, :]  # Select only the first two channels

            # # Check and handle audio data shape
            # if audio_data.ndim == 1:  # Mono audio
            #   audio_data = np.expand_dims(audio_data, axis=0)  # Add channel dimension
            # elif audio_data.ndim > 2:  # More than 2 channels
            #   audio_data = audio_data[:2, :]  # Select only the first two channels (stereo)

            # Pad or trim audio to target duration
            target_samples = target_sr * target_duration
            num_samples = audio_data.shape[1]  # Get number of samples in the audio

            if num_samples < target_samples:
                # Pad with silence if shorter
                pad_width = target_samples - num_samples
                audio_data = np.pad(audio_data, pad_width=((0, 0), (0, pad_width)), mode='constant')
            elif num_samples > target_samples:
                # Trim if longer
                audio_data = audio_data[:, :target_samples]

            # Ensure stereo channels
            if audio_data.ndim == 1:
                audio_data = np.tile(audio_data, (2, 1))  # Duplicate mono to stereo

            # Save the standardized clip
            output_filename = os.path.splitext(filename)[0] + '_standardized.wav'
            output_path = os.path.join(output_folder, output_filename)
            sf.write(output_path, audio_data.T, target_sr, subtype='PCM_16', format='WAV')

        except Exception as e:
            print(f"Error processing file {filename}: {e}")

print("Audio standardization complete!")

Audio standardization complete!


Verify standardized parameters of random samples

In [None]:
import random

# Define the path to the folder containing standardized audio files
standardized_folder = '/content/drive/MyDrive/DL project/Merged and preprocessed audio/Standardized'

# Define target parameters
target_sr = 44100  # Sample rate (Hz)
target_duration = 6  # Duration in seconds
target_bit_depth = 16  # Bit depth
target_channels = 2  # Number of channels (stereo)

# Get a list of all standardized audio files
audio_files = [f for f in os.listdir(standardized_folder) if f.endswith(('.wav', '.mp3'))]

# Select 30 random audio files
random_samples = random.sample(audio_files, min(30, len(audio_files)))

# Iterate through the random samples and verify features
for filename in random_samples:
    file_path = os.path.join(standardized_folder, filename)

    try:
        # Load audio file
        audio_data, sr = librosa.load(file_path, sr=None, mono=False)  # Load with original sample rate and channels

        # Verify features
        duration = librosa.get_duration(y=audio_data, sr=sr)
        channels = audio_data.shape[0] if audio_data.ndim > 1 else 1
        bit_depth = sf.info(file_path).subtype.split('_')[-1]  # Extract bit depth from subtype

        print(f"File: {filename}")
        print(f"  Duration: {duration:.2f} seconds (Target: {target_duration} seconds)")
        print(f"  Sample Rate: {sr} Hz (Target: {target_sr} Hz)")
        print(f"  Channels: {channels} (Target: {target_channels})")
        print(f"  Bit Depth: {bit_depth} (Target: {target_bit_depth})")
        print("-" * 20)

    except Exception as e:
        print(f"Error verifying file {filename}: {e}")

print("Verification complete!")

File: 178_1b6_Tc_mc_AKGC417L_standardized.wav
  Duration: 6.00 seconds (Target: 6 seconds)
  Sample Rate: 44100 Hz (Target: 44100 Hz)
  Channels: 2 (Target: 2)
  Bit Depth: 16 (Target: 16)
--------------------
File: 159_1b1_Ar_sc_Meditron_standardized.wav
  Duration: 6.00 seconds (Target: 6 seconds)
  Sample Rate: 44100 Hz (Target: 44100 Hz)
  Channels: 2 (Target: 2)
  Bit Depth: 16 (Target: 16)
--------------------
File: 219_2b1_Ar_mc_LittC2SE_standardized.wav
  Duration: 6.00 seconds (Target: 6 seconds)
  Sample Rate: 44100 Hz (Target: 44100 Hz)
  Channels: 2 (Target: 2)
  Bit Depth: 16 (Target: 16)
--------------------
File: 170_1b2_Al_mc_AKGC417L_standardized.wav
  Duration: 6.00 seconds (Target: 6 seconds)
  Sample Rate: 44100 Hz (Target: 44100 Hz)
  Channels: 2 (Target: 2)
  Bit Depth: 16 (Target: 16)
--------------------
File: 205_2b3_Al_mc_AKGC417L_standardized.wav
  Duration: 6.00 seconds (Target: 6 seconds)
  Sample Rate: 44100 Hz (Target: 44100 Hz)
  Channels: 2 (Target: 2)


Check the total number of audio samples and their duration

In [None]:
# Define the path to the folder containing audio files
audio_folder = '/content/drive/MyDrive/DL project/Merged and preprocessed audio/Standardized'

# Define duration ranges and their labels
duration_ranges = [2, 5, 6, 9, 12, 15, 20, 25, 30, 50, 100, 150, 200, 250]
range_labels = ['<2s', '<5s', '<6s', '<9s', '<12s', '<15s', '<20s', '<25s', '<30s', '<50', '<100', '<150', '<200', '<250']

# Initialize a dictionary to store duration counts
duration_counts = {label: 0 for label in range_labels}

# Iterate through files in the folder
for filename in os.listdir(audio_folder):
    # Check if the file is an audio file (e.g., WAV, MP3)
    if filename.endswith(('.wav', '.mp3')):  # Add other audio extensions as needed
        # Construct the full file path
        file_path = os.path.join(audio_folder, filename)

        # Load the audio file using librosa
        try:
            audio_data, sample_rate = librosa.load(file_path, sr=None)  # Load with original sample rate
            duration = librosa.get_duration(y=audio_data, sr=sample_rate)

            # Find the appropriate duration range and increment the count
            for i, threshold in enumerate(duration_ranges):
                if duration < threshold:
                    duration_counts[range_labels[i]] += 1
                    break
        except Exception as e:
            print(f"Error processing file {filename}: {e}")

# Create a Pandas DataFrame for better visualization
df = pd.DataFrame(list(duration_counts.items()), columns=['Duration Range', 'Count'])

# Print the total number of audio samples
total_audio_samples = df['Count'].sum()
print(f"Total number of audio samples: {total_audio_samples}")

# Print the length-wise count (DataFrame)
print("\nLength-wise Count:")
print(df)

Total number of audio samples: 1234

Length-wise Count:
   Duration Range  Count
0             <2s      0
1             <5s      0
2             <6s      0
3             <9s   1234
4            <12s      0
5            <15s      0
6            <20s      0
7            <25s      0
8            <30s      0
9             <50      0
10           <100      0
11           <150      0
12           <200      0
13           <250      0
