In [3]:
import os
import librosa
import soundfile as sf  # for saving audio files
import numpy as np

# Paths to the original dataset and where augmented data will be saved
original_data_path = "/content/drive/MyDrive/2DCNN_MAJOR/CONVERTED_DATA"
augmented_data_path = "/content/drive/MyDrive/2DCNN_MAJOR/NEW_DATA"

# List of data augmentation techniques
augmentation_techniques = ["noise", "pitch_shift", "reverberation"]

# Function to apply noise injection
def apply_noise_injection(audio, noise_level=0.005):
    noise = np.random.normal(0, noise_level, len(audio))
    augmented_audio = audio + noise
    return augmented_audio

# Function to apply pitch shift
def apply_pitch_shift(audio, sample_rate, pitch_shift_steps=2):
    return librosa.effects.pitch_shift(audio, sr=sample_rate, n_steps=pitch_shift_steps)

# Function to apply reverberation
def apply_reverberation(audio, room_scale=0.9):
    return audio  # Placeholder for reverberation, replace with actual function

# Iterate over classes (normal_augmented_data and pneumonia_augmented_data)
for class_name in ["normal_augmented_data", "pneumonia_augmented_data"]:
    class_original_path = os.path.join(original_data_path, class_name)

    # Create folders for each augmentation technique
    for technique in augmentation_techniques:
        technique_folder = os.path.join(augmented_data_path, class_name + "_" + technique)
        os.makedirs(technique_folder, exist_ok=True)

    # Iterate over audio files in the original class folder
    for filename in os.listdir(class_original_path):
        if filename.endswith('.wav'):  # Adjust file extension check if necessary
            file_path = os.path.join(class_original_path, filename)

            # Load the original audio
            audio, sample_rate = librosa.load(file_path, sr=None)  # Use original sample rate

            # Apply and save each data augmentation technique
            for technique in augmentation_techniques:
                augmented_audio = None

                if technique == "noise":
                    augmented_audio = apply_noise_injection(audio)
                elif technique == "pitch_shift":
                    augmented_audio = apply_pitch_shift(audio, sample_rate)
                elif technique == "reverberation":
                    augmented_audio = apply_reverberation(audio)

                if augmented_audio is not None:
                    # Save the augmented audio
                    augmented_filename = os.path.splitext(filename)[0] + f"_{technique}.wav"
                    augmented_file_path = os.path.join(augmented_data_path, class_name + "_" + technique, augmented_filename)
                    sf.write(augmented_file_path, augmented_audio, sample_rate)


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
