In [None]:
import librosa
import numpy as np
import soundfile as sf
import os
import glob
from tqdm import tqdm
import random

In [None]:
def change_volume(y, factor):
    return y * factor

def add_noise(y, noise_factor=0.005):
    noise = np.random.randn(len(y))
    return y + noise_factor * noise

def remove_parts(y, sr, remove_ratio=0.1):
    y_len = len(y)
    part_len = int(y_len * remove_ratio)
    start = np.random.randint(0, y_len - part_len)
    y[start:start + part_len] = 0
    return y

def change_speed(y, speed_factor):
    return librosa.effects.time_stretch(y, rate=speed_factor)

def add_silence(y, sr, position='beginning', silence_duration_ms=100):
    silence = np.zeros(int(sr * silence_duration_ms / 1000))
    if position == 'beginning':
        return np.concatenate([silence, y])
    elif position == 'end':
        return np.concatenate([y, silence])

def mask_time(y, sr, max_mask_pct=0.1, num_masks=1):
    """Apply time masking to the audio."""
    len_spectro = y.shape[1]
    mask_len = int(len_spectro * max_mask_pct)
    
    for _ in range(num_masks):
        mask_start = np.random.randint(0, len_spectro - mask_len)
        y[:, mask_start:mask_start + mask_len] = 0
    return y

def mask_frequency(y, sr, max_mask_pct=0.1, num_masks=1):
    """Apply frequency masking to the audio."""
    num_mel_channels = y.shape[0]
    mask_len = int(num_mel_channels * max_mask_pct)
    
    for _ in range(num_masks):
        mask_start = np.random.randint(0, num_mel_channels - mask_len)
        y[mask_start:mask_start + mask_len, :] = 0
    return y

def warp_time(y, sr, max_warp_factor=0.2):
    """Apply time warping to the audio."""
    len_spectro = y.shape[1]
    warp_factor = random.uniform(1 - max_warp_factor, 1 + max_warp_factor)
    time_steps = np.arange(len_spectro)
    warped_time_steps = np.linspace(0, len_spectro - 1, int(len_spectro * warp_factor))
    warped_spectro = np.zeros((y.shape[0], len_spectro))
    for i in range(y.shape[0]):
        warped_spectro[i, :] = np.interp(time_steps, warped_time_steps, y[i, :])
    return warped_spectro

def save_audio(y, sr, filename):
    sf.write(filename, y, sr)

def create_variations(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    
    base_filename, ext = os.path.splitext(audio_path)
    base_filename = os.path.join(os.path.dirname(base_filename), os.path.basename(base_filename).replace(' ', '_'))

    # Higher volume
    y_high_vol = change_volume(y, 1.5)
    save_audio(y_high_vol, sr, f'{base_filename}_high_volume{ext}')

    # Lower volume
    y_low_vol = change_volume(y, 0.5)
    save_audio(y_low_vol, sr, f'{base_filename}_low_volume{ext}')

    # Added noise
    y_noise = add_noise(y)
    save_audio(y_noise, sr, f'{base_filename}_noise{ext}')

    # Parts missing
    y_missing = remove_parts(y.copy(), sr)
    save_audio(y_missing, sr, f'{base_filename}_parts_missing{ext}')

    # Faster
    y_faster = change_speed(y, 1.25)
    save_audio(y_faster, sr, f'{base_filename}_faster{ext}')

    # Slower
    y_slower = change_speed(y, 0.75)
    save_audio(y_slower, sr, f'{base_filename}_slower{ext}')

    # 100ms of silence at the beginning
    y_silence_begin = add_silence(y, sr, position='beginning', silence_duration_ms=100)
    save_audio(y_silence_begin, sr, f'{base_filename}_silence_begin{ext}')

    # 100ms of silence at the end
    y_silence_end = add_silence(y, sr, position='end', silence_duration_ms=100)
    save_audio(y_silence_end, sr, f'{base_filename}_silence_end{ext}')

    # Apply time masking
    mel_spectro = librosa.feature.melspectrogram(y, sr=sr)
    mel_spectro_db = librosa.power_to_db(mel_spectro, ref=np.max)
    mel_spectro_db_time_masked = mask_time(mel_spectro_db, sr)
    save_audio(mel_spectro_db_time_masked, sr, f'{base_filename}_time_masked{ext}')
    
    # Apply frequency masking
    mel_spectro_db_freq_masked = mask_frequency(mel_spectro_db, sr)
    save_audio(mel_spectro_db_freq_masked, sr, f'{base_filename}_freq_masked{ext}')

    # Apply time warping
    mel_spectro_db_time_warped = warp_time(mel_spectro_db, sr)
    save_audio(mel_spectro_db_time_warped, sr, f'{base_filename}_time_warped{ext}')
    
    #print(f"Finished processing {audio_path}")

def process_directory(directory_path):
    mp3_files = glob.glob(os.path.join(directory_path, '*.mp3'))
    print(f"Found {len(mp3_files)} files.")
    
    for mp3_file in tqdm(mp3_files, desc="Processing files"):
        create_variations(mp3_file)


directory_path = '/Users/ciprian/Desktop/do you need water/'
process_directory(directory_path)