# Creating Synthetic Overlapping Audio

## Jessica Stinson
## s224576666

In [14]:
# Suppress warnings 
import warnings
warnings.filterwarnings("ignore")

# Set TensorFlow environment
import os
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'

import soundfile as sf 
from pathlib import Path
import librosa
import numpy as np
import random 
import tensorflow as tf

SC = {
    'AUDIO_DATA_DIRECTORY': r"C:\Project-Echo\src\Prototypes\data\Data Files\Bucket 3",
    
    'AUDIO_SAMPLE_RATE': 48000,
    'AUDIO_CLIP_DURATION': 5, # seconds

    'OUTPUT_DIR': r"C:\Project-Echo\src\Prototypes\engine\Working with overlapping audio\Synthetic Dataset"
}

audio_dir = Path(SC['AUDIO_DATA_DIRECTORY'])
output_dir = Path(SC['OUTPUT_DIR'])
synthetic_size = 500
SR = SC['AUDIO_SAMPLE_RATE']
duration = SC['AUDIO_CLIP_DURATION']

In [15]:
# Function to index directories and extract class names and file paths
def index_directory(directory, file_types=('.ogg', '.mp3', '.wav', '.flac')):
    audio_files = []
    labels = []
    class_names = sorted([dir.name for dir in Path(directory).glob('*') if dir.is_dir()])
    for label, class_name in enumerate(class_names):
        class_dir = Path(directory) / class_name
        for file_path in class_dir.glob(f'**/*'):
            if file_path.suffix in file_types:
                audio_files.append(str(file_path))
                labels.append(label)  # Store the integer label instead of the class name
    return audio_files, labels, class_names

In [16]:
# Load original audio dataset 
audio_files, labels, class_names = index_directory(audio_dir)

In [25]:
synthetic_labels = []

for i in range(synthetic_size):
    indices = random.sample(range(len(audio_files)), 3)

    synthetic_audio = np.zeros(int(duration * SR), dtype = np.float32)
    label_vector = np.zeros(len(class_names), dtype = np.float32)

    for idx in indices:
        path = audio_files[idx]
        label = labels[idx]

        audio, _ = librosa.load(path, sr = SR)
        audio = librosa.util.fix_length(audio, size = int(duration * SR))
        synthetic_audio += audio
        label_vector[label] = 1.0

    file_name = f"synthetic_{i + 1}.wav"
    sf.write(os.path.join(output_dir, file_name), synthetic_audio, SR)

    synthetic_labels.append(label_vector)
    
    if i == 0 or (i + 1) % 100 == 0 or i == synthetic_size - 1:
        print(f"Generated {i + 1}/{synthetic_size} synthetic samples")

np.save(output_dir / "synthetic_labels.npy", np.array(synthetic_labels))
print("Synthetic dataset generation complete.")

Generated 1/500 synthetic samples
Generated 100/500 synthetic samples
Generated 200/500 synthetic samples
Generated 300/500 synthetic samples
Generated 400/500 synthetic samples
Generated 500/500 synthetic samples
Synthetic dataset generation complete.
