In [None]:
import os
import random
import librosa
import numpy as np
import soundfile as sf
import pandas as pd
from datasets import load_dataset
from tqdm import tqdm
from datasets import DownloadConfig


# Load NSynth dataset (Hugging Face version)
dataset = load_dataset("jg583/NSynth", split="train", trust_remote_code=True)
dataset = dataset.select(range(300))

# Output folder
OUTPUT_PATH = "mixed_out_of_tune_samples"
os.makedirs(OUTPUT_PATH, exist_ok=True)

# Parameters
sample_rate = 16000
duration_sec = 4
pitch_shift_range = [-2, -1, 1, 2]  # in semitones
mix_size = 3  # number of instruments to mix
total_samples = 50  # how many mixed outputs you want

metadata = []

for i in tqdm(range(total_samples)):
    # Randomly pick 3 unique samples
    chosen_samples = random.sample(list(dataset), mix_size)
    out_of_tune_idx = random.randint(0, mix_size - 1)
    pitch_shift = random.choice(pitch_shift_range)
    
    mixture = np.zeros(sample_rate * duration_sec)
    label = {}

    for idx, sample in enumerate(chosen_samples):
        y = np.array(sample["audio"]["array"], dtype=np.float32)
        y = librosa.util.fix_length(y, size=sample_rate * duration_sec)

        instrument_name = sample["instrument"]

        if idx == out_of_tune_idx:
            y = librosa.effects.pitch_shift(y, sample_rate, n_steps=pitch_shift)
            label['out_of_tune'] = instrument_name
        else:
            label.setdefault('in_tune', []).append(instrument_name)

        mixture += y

    # Normalize audio
    mixture /= np.max(np.abs(mixture) + 1e-6)

    filename = f"mix_{i}.wav"
    sf.write(os.path.join(OUTPUT_PATH, filename), mixture, sample_rate)

    label['filename'] = filename
    metadata.append(label)

# Save labels
df = pd.DataFrame(metadata)
df.to_csv(os.path.join(OUTPUT_PATH, "labels.csv"), index=False)



Downloading data:   0%|          | 0.00/23.8G [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.07G [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/350M [00:00<?, ?B/s]

OSError: [Errno 28] No space left on device