In [1]:
import os
import random
import librosa
import numpy as np
import soundfile as sf
from pydub import AudioSegment
from pydub.playback import play
import matplotlib.pyplot as plt

In [2]:
def calculate_SNR(clean_signal, noisy_signal):
    # Ensure both signals have the same length
    min_length = min(len(clean_signal), len(noisy_signal))
    clean_signal = clean_signal[:min_length]
    noisy_signal = noisy_signal[:min_length]
    
    # Calculate energy
    clean_energy = np.sum(clean_signal ** 2)
    noise_energy = np.sum((clean_signal - noisy_signal) ** 2)
    
    # Calculate SNR
    snr = 10 * np.log10(clean_energy / noise_energy)
    return snr

In [3]:
# Initialize lists to store audio data and labels
audio_data = []  # Initialize audio data list
total_input_files = 0
total_output_files = 0

In [4]:
# Input and Output directory
input_directory = 'D:\\Sem4\\Data_298B_Project\\Input'
output_directory = 'D:\\Sem4\\Data_298B_Project\\Output'

In [5]:
# Iterate through each subfolder in the input directory
for subdir, dirs, files in os.walk(input_directory):
    input_files_count = 0
    output_files_count = 0
    for file in files:
        if file.endswith('.wav'):
            input_files_count += 1
            total_input_files += 1
            # Construct the full file path for input
            input_file_path = os.path.join(subdir, file)

            # Construct the corresponding output subdirectory path
            output_subdir_path = subdir.replace(input_directory, output_directory)
            if not os.path.exists(output_subdir_path):
                os.makedirs(output_subdir_path)
            # Load the audio file for Data Synthesis using pydub library
            audio_py = AudioSegment.from_file(input_file_path)
        
            min_duration_ms = 500  # Minimum duration in milliseconds
        
            if len(audio_py) < min_duration_ms:
                # Calculate how much silence needs to be added
                silence_duration = min_duration_ms - len(audio_py)
                # Create a silent audio segment of the required duration
                silence_segment = AudioSegment.silent(duration=silence_duration)
                # Append the silent segment to the audio to meet the minimum duration requirement
                audio_py = audio_py + silence_segment

            # Audio processing with pydub
            audio_faster = audio_py.speedup(playback_speed=1.5)
            audio_slower = audio_py.speedup(playback_speed=0.75)
            audio_reversed = audio_py.reverse()
            # Random pitch shift (within a range)
            octaves = random.uniform(-0.5, 0.5)
            new_sample_rate = int(audio_py.frame_rate * (2 ** octaves))
            audio_pitch_shifted = audio_py._spawn(audio_py.raw_data, overrides={'frame_rate': new_sample_rate})
            audio_pitch_shifted = audio_pitch_shifted.set_frame_rate(audio_py.frame_rate)

            # Load the audio file for Data Augmentation using librosa library
            audio_lr, sr = librosa.load(input_file_path, sr=None)

            # Audio processing with librosa
            audio_stretched = librosa.effects.time_stretch(audio_lr, rate=1.25)
            audio_shifted = librosa.effects.pitch_shift(audio_lr, sr=sr, n_steps=4)
            audio_noisy = audio_lr + 0.005 * np.random.randn(len(audio_lr))

            audio_data.append(audio_lr)

            # Extract the base name without the extension for the input file
            base_name = os.path.splitext(file)[0]
            
            # Define the output filenames based on the input filename
            output_paths = {
                'faster': os.path.join(output_subdir_path, f'{base_name}_faster.wav'),
                'slower': os.path.join(output_subdir_path, f'{base_name}_slower.wav'),
                'reversed': os.path.join(output_subdir_path, f'{base_name}_reversed.wav'),
                'pitch_shifted': os.path.join(output_subdir_path, f'{base_name}_pitch_shifted.wav'),
                'stretched': os.path.join(output_subdir_path, f'{base_name}_stretched.wav'),
                'shifted': os.path.join(output_subdir_path, f'{base_name}_shifted.wav'),
                'noisy': os.path.join(output_subdir_path, f'{base_name}_noisy.wav')
            }

            # Save the transformed audio to new WAV files
            audio_faster.export(output_paths['faster'], format="wav")
            audio_slower.export(output_paths['slower'], format="wav")
            audio_reversed.export(output_paths['reversed'], format="wav")
            audio_pitch_shifted.export(output_paths['pitch_shifted'], format="wav")
            sf.write(output_paths['stretched'], audio_stretched, sr)
            sf.write(output_paths['shifted'], audio_shifted, sr)
            sf.write(output_paths['noisy'], audio_noisy, sr)

            # Increment output files count for each file processed
            output_files_count += len(output_paths)
            total_output_files += len(output_paths)

            # Evaluate the augmented and synthesized audio files
            for key in output_paths:
                clean_audio, sr = librosa.load(input_file_path, sr=None)
                # Load augmented audio for SNR calculation
                augmented_audio, _ = librosa.load(output_paths[key], sr=sr)
                clean_len = len(clean_audio)
                augmented_len = len(augmented_audio)
                if augmented_len < clean_len:
                    augmented_audio = np.pad(augmented_audio, (0, clean_len - augmented_len), mode='constant')
                elif augmented_len > clean_len:
                    augmented_audio = augmented_audio[:clean_len]
                print("Length of clean audio:", len(clean_audio))
                print("Length of augmented audio:", len(augmented_audio))
                # Calculate SNR
                snr = calculate_SNR(clean_audio, augmented_audio)
                print(f"SNR for {key} file '{output_paths[key]}': {snr} dB")
                # Save the augmented audio to output directory only if SNR is greater than 0 dB
                if snr > 0:
                    output_file_path = output_paths[key]
                    sf.write(output_file_path, augmented_audio, sr)
                    print(f"Augmented audio saved to '{output_file_path}'")
                else:
                    print(f"SNR for {key} file '{output_paths[key]}' is below 0 dB. Augmented audio not saved.")

# Print the total number of files processed
print(f"Total files after data augmentation and synthesis: {total_input_files + total_output_files} ")

print("All files have been processed and saved to the output directory.")

Length of clean audio: 65441
Length of augmented audio: 65441
SNR for faster file 'D:\Sem4\Data_298B_Project\Output\African_Swine_Fever\sf_1_faster.wav': -2.459154725074768 dB
SNR for faster file 'D:\Sem4\Data_298B_Project\Output\African_Swine_Fever\sf_1_faster.wav' is below 0 dB. Augmented audio not saved.
Length of clean audio: 65441
Length of augmented audio: 65441
SNR for slower file 'D:\Sem4\Data_298B_Project\Output\African_Swine_Fever\sf_1_slower.wav': -1.2919066846370697 dB
SNR for slower file 'D:\Sem4\Data_298B_Project\Output\African_Swine_Fever\sf_1_slower.wav' is below 0 dB. Augmented audio not saved.
Length of clean audio: 65441
Length of augmented audio: 65441
SNR for reversed file 'D:\Sem4\Data_298B_Project\Output\African_Swine_Fever\sf_1_reversed.wav': -2.928105592727661 dB
SNR for reversed file 'D:\Sem4\Data_298B_Project\Output\African_Swine_Fever\sf_1_reversed.wav' is below 0 dB. Augmented audio not saved.
Length of clean audio: 65441
Length of augmented audio: 65441
SN

  snr = 10 * np.log10(clean_energy / noise_energy)


Length of clean audio: 3605
Length of augmented audio: 3605
SNR for pitch_shifted file 'D:\Sem4\Data_298B_Project\Output\Pneumonia\pnu_18_pitch_shifted.wav': -2.5299087166786194 dB
SNR for pitch_shifted file 'D:\Sem4\Data_298B_Project\Output\Pneumonia\pnu_18_pitch_shifted.wav' is below 0 dB. Augmented audio not saved.
Length of clean audio: 3605
Length of augmented audio: 3605
SNR for stretched file 'D:\Sem4\Data_298B_Project\Output\Pneumonia\pnu_18_stretched.wav': 9.13203239440918 dB
Augmented audio saved to 'D:\Sem4\Data_298B_Project\Output\Pneumonia\pnu_18_stretched.wav'
Length of clean audio: 3605
Length of augmented audio: 3605
SNR for shifted file 'D:\Sem4\Data_298B_Project\Output\Pneumonia\pnu_18_shifted.wav': -1.9895477592945099 dB
SNR for shifted file 'D:\Sem4\Data_298B_Project\Output\Pneumonia\pnu_18_shifted.wav' is below 0 dB. Augmented audio not saved.
Length of clean audio: 3605
Length of augmented audio: 3605
SNR for noisy file 'D:\Sem4\Data_298B_Project\Output\Pneumonia\