-

Opening dataset in read-only mode as you don't have write permissions.


\

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/nsynth-train



\

hub://activeloop/nsynth-train loaded successfully.



 

In [1]:
import os
import random
import librosa
import numpy as np
import soundfile as sf
import pandas as pd
from tqdm import tqdm
import deeplake

# Load NSynth from DeepLake
ds = deeplake.load("hub://activeloop/nsynth-train")

# Output folder
OUTPUT_PATH = "mixed_out_of_tune_samples"
os.makedirs(OUTPUT_PATH, exist_ok=True)

# Parameters
sample_rate = 16000
duration_sec = 4
pitch_shift_range = [-2, -1, 1, 2]  # in semitonesA
mix_size = 3
total_samples = 1000

# Memory-friendly pitch shift function
def simple_pitch_shift(y, semitones, sr):
    rate = 2 ** (semitones / 12)
    y_stretched = librosa.effects.time_stretch(y, rate=1 / rate)
    y_shifted = librosa.resample(y_stretched, orig_sr=sr, target_sr=sr)
    # Use keyword arguments to ensure compatibility
    return librosa.util.fix_length(data=y_shifted, size=len(y))

metadata = []

for i in tqdm(range(total_samples)):
    # Pick mix_size random samples
    chosen_samples = random.sample(list(ds), mix_size)
    out_of_tune_idx = random.randint(0, mix_size - 1)
    pitch_shift = random.choice(pitch_shift_range)

    # Initialize mixture once per outer iteration
    mixture = np.zeros((sample_rate * duration_sec,), dtype=np.float32)
    label = {}

    for idx, sample in enumerate(chosen_samples):
        # Get the audio array from the sample
        y = np.array(sample["audios"], dtype=np.float32)
        
        # Flatten to 1D if needed and fix the length to the specified duration
        if y.ndim > 1:
            y = y.flatten()
        y = librosa.util.fix_length(y, size=sample_rate * duration_sec)
        
        # Convert instrument value to string.
        # Here, instrument is stored as a tensor integer so we convert it.
        instrument_val = sample["instrument"].numpy()
        if instrument_val.ndim == 0:
            instrument_val = instrument_val.item()
        instrument_name = str(instrument_val)
        
        # Apply pitch shift to one sample and note the instrument
        if idx == out_of_tune_idx:
            y = simple_pitch_shift(y, pitch_shift, sample_rate)
            label['out_of_tune'] = instrument_name
        else:
            label.setdefault('in_tune', []).append(instrument_name)
        
        # Add the audio sample to the mixture
        mixture += y

    # Normalize the mixed audio
    mixture /= np.max(np.abs(mixture) + 1e-6)

    # Write the output file
    filename = f"mix_{i}.wav"
    sf.write(os.path.join(OUTPUT_PATH, filename), mixture, sample_rate)

    label['filename'] = filename
    metadata.append(label)

# Save metadata to CSV
df = pd.DataFrame(metadata)
df.to_csv(os.path.join(OUTPUT_PATH, "labels.csv"), index=False)



|

Opening dataset in read-only mode as you don't have write permissions.


\

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/nsynth-train



|

hub://activeloop/nsynth-train loaded successfully.



  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,
100%|██████████| 1000/1000 [5:25:15<00:00, 19.52s/it] 
