In [None]:
import os
import random
import librosa
import numpy as np
import soundfile as sf
import pandas as pd
import deeplake
import pickle
from tqdm import tqdm
from multiprocessing import Pool, cpu_count

# CONFIGURATION 
OUTPUT_PATH = "mixed_out_of_tune_samples"
USE_MULTIPROCESSING = False   # Set to True to enable multiprocessing
SAVE_EVERY = 10000            # Save metadata every N samples
TOTAL_SAMPLES = 100_000
SAMPLE_RATE = 16000
DURATION_SEC = 4
PITCH_SHIFT_RANGE = [-2, -1, 1, 2]
MIX_SIZE = 3
FILTERED_CACHE = "filtered_samples.pkl"

# INSTRUMENT MAP
INSTRUMENT_MAP = {
    0: 'string_bass', 1: 'bass_guitar', 18: 'flute', 20: 'trumpet', 21: 'trombone',
    22: 'tuba', 24: 'guitar_acoustic', 26: 'guitar_electric', 47: 'viola',
    48: 'violin', 50: 'saxophone', 54: 'oboe', 55: 'bassoon'
}
target_instruments = set(INSTRUMENT_MAP.keys())

# CREATE OUTPUT FOLDER
if os.path.isfile(OUTPUT_PATH):
    os.remove(OUTPUT_PATH)
os.makedirs(OUTPUT_PATH, exist_ok=True)

# LOAD DATASET 
ds = deeplake.load("hub://activeloop/nsynth-train")

# FILTER TARGET INSTRUMENTS
if os.path.exists(FILTERED_CACHE):
    with open(FILTERED_CACHE, "rb") as f:
        filtered_samples = pickle.load(f)
else:
    print("Filtering target instruments...")
    filtered_samples = []
    for i, sample in tqdm(enumerate(ds), total=len(ds)):
        instrument_id = int(sample['instrument'].numpy())
        if instrument_id in target_instruments:
            filtered_samples.append((i, instrument_id))
    with open(FILTERED_CACHE, "wb") as f:
        pickle.dump(filtered_samples, f)

# PITCH SHIFT FUNCTION
def simple_pitch_shift(y, semitones, sr):
    rate = 2 ** (semitones / 12)
    y_stretched = librosa.effects.time_stretch(y, rate=1 / rate)
    y_shifted = librosa.resample(y_stretched, orig_sr=sr, target_sr=sr)
    return librosa.util.fix_length(y_shifted, size=len(y))

# SAMPLE GENERATION FUNCTION
def generate_sample(i):
    chosen = random.sample(filtered_samples, MIX_SIZE)
    out_of_tune_idx = random.randint(0, MIX_SIZE - 1)
    pitch_shift = random.choice(PITCH_SHIFT_RANGE)

    mixture = np.zeros((SAMPLE_RATE * DURATION_SEC,), dtype=np.float32)
    label = {}
    all_instruments = []

    for idx, (sample_idx, instrument_id) in enumerate(chosen):
        instrument_name = INSTRUMENT_MAP.get(instrument_id, str(instrument_id))
        all_instruments.append(instrument_name)

        sample = ds[sample_idx]
        y = np.array(sample['audios'], dtype=np.float32).flatten()
        y = librosa.util.fix_length(y, size=SAMPLE_RATE * DURATION_SEC)

        if idx == out_of_tune_idx:
            y = simple_pitch_shift(y, pitch_shift, SAMPLE_RATE)
            label['out_of_tune'] = instrument_name
        else:
            label.setdefault('in_tune', []).append(instrument_name)

        mixture += y

    # Normalize
    mixture /= np.max(np.abs(mixture) + 1e-6)

    # Generate filename
    filename = f"mix_{i}.wav"
    file_path = os.path.normpath(os.path.join(OUTPUT_PATH, filename))
    sf.write(file_path, mixture, SAMPLE_RATE)

    # Metadata
    label['filename'] = filename
    label['instruments_all'] = all_instruments
    return label

# GENERATION LOOP
if USE_MULTIPROCESSING:
    print(f"Using multiprocessing with {cpu_count()} cores...")
    with Pool(cpu_count()) as pool:
        metadata = []
        for i, label in enumerate(tqdm(pool.imap(generate_sample, range(TOTAL_SAMPLES)), total=TOTAL_SAMPLES)):
            metadata.append(label)
            if i > 0 and i % SAVE_EVERY == 0:
                pd.DataFrame(metadata).to_csv(os.path.join(OUTPUT_PATH, f"labels_part_{i}.csv"), index=False)
                metadata = []
        if metadata:
            pd.DataFrame(metadata).to_csv(os.path.join(OUTPUT_PATH, f"labels_part_final.csv"), index=False)
else:
    print("Generating samples (single-core)...")
    metadata = []
    for i in tqdm(range(TOTAL_SAMPLES)):
        try:
            label = generate_sample(i)
            metadata.append(label)
        except Exception as e:
            print(f"Error on sample {i}: {e}")
        if i > 0 and i % SAVE_EVERY == 0:
            pd.DataFrame(metadata).to_csv(os.path.join(OUTPUT_PATH, f"labels_part_{i}.csv"), index=False)
            metadata = []

    if metadata:
        pd.DataFrame(metadata).to_csv(os.path.join(OUTPUT_PATH, f"labels_part_final.csv"), index=False)

print("All samples generated.")




Opening dataset in read-only mode as you don't have write permissions.


|

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/nsynth-train



 

hub://activeloop/nsynth-train loaded successfully.





Generating samples (single-core)...


  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,
  0%|          | 179/100000 [01:42<11:44:20,  2.36it/s]