In [None]:
import os
import random
import numpy as np
import soundfile as sf
import librosa
from scipy.io import wavfile
import shutil
import hashlib

input_folder = ""  # Replace with the actual path to your input folder
output_folder = ""  # Replace with the actual path to your output folder

desired_num_audios = 10  # number of desired audios
random_seed = 42
np.random.seed(random_seed)
random.seed(random_seed)

crop_duration = 3  # Random cropping duration in seconds
noise_level = 0.05  # Standard deviation for random normal distribution noise
volume_factor_range = [0.8, 1.2]  # Range for volume adjustment
speed_factor_range = [0.8, 1.2]  # Range for speed perturbation
pitch_semitones_range = [-2, 2]  # Range for pitch variation

# Define a set to store unique hashes of generated audio files
generated_audio_hashes = set()

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

audio_files = os.listdir(input_folder)
num_input_audios = len(audio_files)

# Copy the original audios
for audio_file in audio_files:
    input_audio_path = os.path.join(input_folder, audio_file)
    output_audio_path = os.path.join(output_folder, audio_file)
    shutil.copy(input_audio_path, output_audio_path)

# Function to generate a unique hash for an audio file
def generate_audio_hash(audio_data):
    return hashlib.sha1(audio_data).hexdigest()

# Augment the audio files
generated_audios = 0
while generated_audios < desired_num_audios:
    input_audio_file = random.choice(audio_files)
    input_audio_path = os.path.join(input_folder, input_audio_file)

    original_audio, sr = librosa.load(input_audio_path, sr=None)

    # Random cropping
    crop_start = np.random.uniform(0, len(original_audio) - crop_duration * sr)
    cropped_audio = original_audio[int(crop_start):int(crop_start + crop_duration * sr)]

    # Generate a hash for the augmented audio
    audio_hash = generate_audio_hash(cropped_audio)

    # Check if the hash is already in the set (duplicate)
    if audio_hash not in generated_audio_hashes:
        augmented_audio = cropped_audio.copy()
        output_audio_path = os.path.join(output_folder, f"random_cropping_{generated_audios + 1}.wav")
        wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
        print(f"Random cropping audio {generated_audios + 1}/{desired_num_audios} saved: {output_audio_path}")

        # Add the hash to the set
        generated_audio_hashes.add(audio_hash)

        generated_audios += 1

    # Continue to the next iteration if duplicates are found
    else:
        continue

    # Noise injection
    noise = np.random.normal(0, noise_level, len(cropped_audio))
    augmented_audio = cropped_audio + noise
    audio_hash = generate_audio_hash(augmented_audio)
    if audio_hash not in generated_audio_hashes:
        output_audio_path = os.path.join(output_folder, f"noise_injection_{generated_audios + 1}.wav")
        wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
        print(f"Noise injection audio {generated_audios + 1}/{desired_num_audios} saved: {output_audio_path}")
        generated_audio_hashes.add(audio_hash)
        generated_audios += 1

    # Volume adjustment
    volume_factor = np.random.uniform(*volume_factor_range)
    augmented_audio = cropped_audio * volume_factor
    audio_hash = generate_audio_hash(augmented_audio)
    if audio_hash not in generated_audio_hashes:
        output_audio_path = os.path.join(output_folder, f"volume_adjustment_{generated_audios + 1}.wav")
        wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
        print(f"Volume adjustment audio {generated_audios + 1}/{desired_num_audios} saved: {output_audio_path}")
        generated_audio_hashes.add(audio_hash)
        generated_audios += 1

    # Speed perturbation
    speed_factor = np.random.uniform(*speed_factor_range)
    augmented_audio = librosa.effects.time_stretch(cropped_audio, speed_factor)
    audio_hash = generate_audio_hash(augmented_audio)
    if audio_hash not in generated_audio_hashes:
        output_audio_path = os.path.join(output_folder, f"speed_perturbation_{generated_audios + 1}.wav")
        wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
        print(f"Speed perturbation audio {generated_audios + 1}/{desired_num_audios} saved: {output_audio_path}")
        generated_audio_hashes.add(audio_hash)
        generated_audios += 1

    # Pitch variation
    pitch_semitones = np.random.uniform(*pitch_semitones_range)
    augmented_audio = librosa.effects.pitch_shift(cropped_audio, sr, pitch_semitones)
    audio_hash = generate_audio_hash(augmented_audio)
    if audio_hash not in generated_audio_hashes:
        output_audio_path = os.path.join(output_folder, f"pitch_variation_{generated_audios + 1}.wav")
        wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
        print(f"Pitch variation audio {generated_audios + 1}/{desired_num_audios} saved: {output_audio_path}")
        generated_audio_hashes.add(audio_hash)
        generated_audios += 1

    # Continue to the next iteration if duplicates are found
    else:
        continue
