In [1]:
import os
os.chdir("../")

In [2]:
%pwd

'e:\\MyOnlineCourses\\ML_Projects\\arabic-digits-recognition'

In [3]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataAugmentationConfig:
    root_dir: Path
    src_dst_path: str


In [4]:
from src.ard.constants import *
from src.ard.utils.help import read_yaml, create_directories

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_augmentation_config(self) -> DataAugmentationConfig:
        config = self.config.data_augmentation

        create_directories([config.root_dir])

        data_augmentation_config = DataAugmentationConfig(
            root_dir=config.root_dir,
            src_dst_path=config.src_dst_path
        )

        return data_augmentation_config

In [5]:
import os
import numpy as np
import librosa
import soundfile as sf
from ard import logger
from tqdm import tqdm
import time

pnorm = False
rnorm = False
class DataAugmentation:
    def __init__(self, noise_level=0.001, config = DataAugmentationConfig):
        self.noise_level = noise_level
        self.config = config

    def add_noise(self, audio):
        """Add random noise to the audio signal."""
    
        noise = np.random.randn(len(audio))
        augmented_audio = audio + self.noise_level * noise
        return augmented_audio

    def peak_normalization(self, audio):
        """Normalize the audio signal to have a peak amplitude of 1.0."""
        peak = np.max(np.abs(audio))
        if peak > 0:
            normalized_audio = audio / peak
            pnorm = True
        else:
            normalized_audio = audio
            pnorm - False
        return normalized_audio
    
    import numpy as np

    def rms_normalization(self, audio, rms_level=-20):
        """
        Normalize the audio signal to a specified RMS level.

        Parameters:
            audio (np.ndarray): Input audio signal.
            rms_level (float): Desired RMS level in dB (default is -20 dB).

        Returns:
            np.ndarray: Normalized audio signal.
        """
        # Ensure audio is a NumPy array
        audio = np.asarray(audio)

        if len(audio) == 0:
            raise ValueError("Input audio array is empty.")

        # Calculate the target RMS value
        target_rms = 10 ** (rms_level / 20.0)

        # Calculate the current RMS of the audio
        current_rms = np.sqrt(np.mean(audio**2))

        if current_rms != 0:
            

            # Calculate the normalization factor
            normalization_factor = target_rms / current_rms

            # Normalize the audio
            normalized_audio = audio * normalization_factor

            # Clip the normalized audio to avoid distortion
            normalized_audio = np.clip(normalized_audio, -1.0, 1.0)
            rnorm = True
        else:
            rnorm=False

        return normalized_audio
 

    def fade_out(self, audio, fade_duration=1.0, sr=16000):
        """Apply a fade-out effect to the audio signal."""
        window = np.hamming(len(audio))
        augmented_sig = window * audio
        augmented_sig /= np.mean(np.abs(augmented_sig))
        #fade_samples = int(fade_duration * sr)
        #fade_out_curve = np.linspace(1, 0, fade_samples)
        #if fade_samples < len(audio):
        #    audio[-fade_samples:] *= fade_out_curve
        return augmented_sig

    def change_tone(self, audio, sr, n_steps=2):
        """Change the tone of the audio signal by shifting pitch."""
        return librosa.effects.pitch_shift(y=audio, sr=sr, n_steps=n_steps)

    def augment(self, audio_path, output_dir):
        """Load audio, apply augmentations, and save each result as a new file."""
        audio, sr = librosa.load(audio_path, sr=None)

        # Apply augmentations
        audio_noisy = self.add_noise(audio)
        audio_pnormalized = self.peak_normalization(audio)
        audio_rnormalized = self.rms_normalization(audio)
        audio_fade_out = self.fade_out(audio)
        audio_tone_changed = self.change_tone(audio, sr)

        # Create output directory if it doesn't exist
        sub_dir = os.path.dirname(audio_path)
        output_sub_dir = os.path.join(output_dir, os.path.basename(sub_dir))
        #os.makedirs(output_sub_dir, exist_ok=True)

        # Save the augmented audio files
        base_filename = os.path.splitext(os.path.basename(audio_path))[0]
        sf.write(os.path.join(output_sub_dir, f'{base_filename}_noisy.wav'), audio_noisy, sr)
        if pnorm:
            sf.write(os.path.join(output_sub_dir, f'{base_filename}_pnormalized.wav'), audio_pnormalized, sr)
        if rnorm:
            sf.write(os.path.join(output_sub_dir, f'{base_filename}_rnormalized.wav'), audio_rnormalized, sr)
        sf.write(os.path.join(output_sub_dir, f'{base_filename}_fade_out.wav'), audio_fade_out, sr)
        sf.write(os.path.join(output_sub_dir, f'{base_filename}_tone_changed.wav'), audio_tone_changed, sr)
        #logger.info(f"File {base_filename.split('/')[-1]} augmentations have been saved !!")
        
    def get_files(self):
        wav_files = []
        for root, _, files in os.walk(self.config.src_dst_path):
            for file in files:
                if file.endswith('.wav'):
                    wav_files.append(os.path.join(root, file))
                    
        return wav_files
        
    def load(self):
        """Process all WAV files in the input directory and its subdirectories."""
        # Gather all .wav files
        wav_files = self.get_files()   
        logger.info(f" Total original files : {len(wav_files)}")
        # Use tqdm to show progress while processing the files
        with tqdm(total=len(wav_files), colour="green", desc="Augmentation Process: ", 
                  bar_format="{l_bar}{bar} [time spent: {elapsed}]",
                  leave=True) as pbar:
                    for audio_path in wav_files:
                        self.augment(audio_path, self.config.src_dst_path)
                        pbar.update(1)
                        time.sleep(0.01)
        wav_files = self.get_files() 
        logger.info(f" Total files after Augmentation : {len(wav_files)}")
   
    

In [6]:
try:
    config = ConfigurationManager()
    data_augmentation_config = config.get_data_augmentation_config()
    data_augmentation = DataAugmentation(config=data_augmentation_config)
    dataset = data_augmentation.load()

except Exception as e:
    raise e

[2024-08-10 12:51:07,939: INFO: help: yaml file: config\config.yaml loaded successfully. Content size: 5]
[2024-08-10 12:51:07,951: INFO: help: yaml file: params.yaml loaded successfully. Content size: 7]
[2024-08-10 12:51:07,954: INFO: help: Total directories created: 1]
[2024-08-10 12:51:07,957: INFO: help: Total directories created: 1]
[2024-08-10 12:51:07,971: INFO: 2446048386:  Total original files : 402]


Augmentation Process: 100%|[32m██████████[0m [time spent: 09:29]

[2024-08-10 13:00:37,169: INFO: 2446048386:  Total files after Augmentation : 1608]



