In [7]:
from pathlib import Path
import librosa
import soundfile as sf
import numpy as np
import os

def get_all_wav_files(directory):
    wav_files = []
    for dirpath, _, filenames in os.walk(directory):
        for filename in filenames:
            if filename.endswith('.wav'):
                wav_files.append(dirpath + '/' + filename)
    return wav_files

# Speed‐up / slow‐down factors
AUG_FACTORS = {
    'slow': 0.9,   # 90% of original speed
    'fast': 1.1,   # 110% of original speed
}

def pitch_shift_wavs(wav_path, n_steps) -> None:
    try:
        wav_path = Path(wav_path)
        
        # Load audio (mono or stereo preserved)
        y, sr = librosa.load(wav_path, sr=None, mono=False)
    
        # librosa.load returns shape (n,) for mono and (2, n) for stereo
        # pitch_shift works channel‑wise, so we reshape if stereo
        if y.ndim == 2:
            y_shift_up   = np.vstack([librosa.effects.pitch_shift(ch, sr=sr,  n_steps=+n_steps) for ch in y])
            y_shift_down = np.vstack([librosa.effects.pitch_shift(ch, sr=sr,  n_steps=-n_steps) for ch in y])
            # soundfile expects channels last → (n_samples, n_channels)
            y_shift_up   = y_shift_up.T
            y_shift_down = y_shift_down.T
            # Original stereo will load as channels first, so transpose original before saving
            y_original   = y.T
        else:
            y_shift_up   = librosa.effects.pitch_shift(y, sr=sr, n_steps=+n_steps)
            y_shift_down = librosa.effects.pitch_shift(y, sr=sr, n_steps=-n_steps)
            y_original   = y
        
        strech = []
         # create each augmentation
        for tag, factor in AUG_FACTORS.items():
            y_stretch = librosa.effects.time_stretch(y, rate=factor)
            strech.append(y_stretch)
            
        base = wav_path.stem
        up_path   = wav_path.with_name(f"{base}_p{n_steps}.wav")
        down_path = wav_path.with_name(f"{base}_m{n_steps}.wav")
        strech_path = wav_path.with_name(f"{base}_t110.wav")
        slow_path = wav_path.with_name(f"{base}_t90.wav")
        
        sf.write(up_path,   y_shift_up,   sr)
        sf.write(down_path, y_shift_down, sr)
        sf.write(strech_path, strech[1], sr)
        sf.write(slow_path, strech[0], sr)
    
        print(f"✓ {wav_path.name}  →  {up_path.name}, {down_path.name} , {strech_path.name} , {slow_path.name} ")
    except Exception as e:
        print(f"Could not augment {file}")

files = get_all_wav_files("../data/")
for file in files:
    pitch_shift_wavs(file, 2)

✓ blues.00000.wav  →  blues.00000_p2.wav, blues.00000_m2.wav , blues.00000_t110.wav , blues.00000_t90.wav 
✓ blues.00001.wav  →  blues.00001_p2.wav, blues.00001_m2.wav , blues.00001_t110.wav , blues.00001_t90.wav 
✓ blues.00002.wav  →  blues.00002_p2.wav, blues.00002_m2.wav , blues.00002_t110.wav , blues.00002_t90.wav 
✓ blues.00003.wav  →  blues.00003_p2.wav, blues.00003_m2.wav , blues.00003_t110.wav , blues.00003_t90.wav 
✓ blues.00004.wav  →  blues.00004_p2.wav, blues.00004_m2.wav , blues.00004_t110.wav , blues.00004_t90.wav 
✓ blues.00005.wav  →  blues.00005_p2.wav, blues.00005_m2.wav , blues.00005_t110.wav , blues.00005_t90.wav 
✓ blues.00006.wav  →  blues.00006_p2.wav, blues.00006_m2.wav , blues.00006_t110.wav , blues.00006_t90.wav 
✓ blues.00007.wav  →  blues.00007_p2.wav, blues.00007_m2.wav , blues.00007_t110.wav , blues.00007_t90.wav 
✓ blues.00008.wav  →  blues.00008_p2.wav, blues.00008_m2.wav , blues.00008_t110.wav , blues.00008_t90.wav 
✓ blues.00009.wav  →  blues.00009_p2.

  y, sr = librosa.load(wav_path, sr=None, mono=False)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


✓ jazz.00055.wav  →  jazz.00055_p2.wav, jazz.00055_m2.wav , jazz.00055_t110.wav , jazz.00055_t90.wav 
✓ jazz.00056.wav  →  jazz.00056_p2.wav, jazz.00056_m2.wav , jazz.00056_t110.wav , jazz.00056_t90.wav 
✓ jazz.00057.wav  →  jazz.00057_p2.wav, jazz.00057_m2.wav , jazz.00057_t110.wav , jazz.00057_t90.wav 
✓ jazz.00058.wav  →  jazz.00058_p2.wav, jazz.00058_m2.wav , jazz.00058_t110.wav , jazz.00058_t90.wav 
✓ jazz.00059.wav  →  jazz.00059_p2.wav, jazz.00059_m2.wav , jazz.00059_t110.wav , jazz.00059_t90.wav 
✓ jazz.00060.wav  →  jazz.00060_p2.wav, jazz.00060_m2.wav , jazz.00060_t110.wav , jazz.00060_t90.wav 
✓ jazz.00061.wav  →  jazz.00061_p2.wav, jazz.00061_m2.wav , jazz.00061_t110.wav , jazz.00061_t90.wav 
✓ jazz.00062.wav  →  jazz.00062_p2.wav, jazz.00062_m2.wav , jazz.00062_t110.wav , jazz.00062_t90.wav 
✓ jazz.00063.wav  →  jazz.00063_p2.wav, jazz.00063_m2.wav , jazz.00063_t110.wav , jazz.00063_t90.wav 
✓ jazz.00064.wav  →  jazz.00064_p2.wav, jazz.00064_m2.wav , jazz.00064_t110.wav , 

## Delete Augmented Tracks

In [6]:
files = get_all_wav_files("../data/")
for file in files:
    if 'p2' in file or 'm2' in file or 't90' in file or 't110' in file:
        os.remove(file)