In [1]:
# preprocess MUSDB18-HQ data

import glob
import os
import librosa
import soundfile as sf
from shutil import copyfile
from tqdm import tqdm
from tqdm.contrib.concurrent import process_map

MUSDB18_DIR = "/project/asc2022/plus/DeepMDX/data/raw/musdb18"

OUTPUT_DIR = "/project/asc2022/plus/DeepMDX/data"
OUTPUT_INSTRUMENTS_DIR = f"{OUTPUT_DIR}/instruments"
OUTPUT_MIXTURES_DIR = f"{OUTPUT_DIR}/mixtures"

def mix_instruments(wavs_dir, output_path):
    instruments = ["bass", "drums", "other"]
    ins_wavs = []
    for ins in instruments:
        ins = f"{wavs_dir}/{ins}.wav"
        ins, sr = librosa.load(ins)
        ins_wavs.append(ins)
    mix = sum(ins_wavs)
    sf.write(output_path, mix, sr, "PCM_16")
    
def copy_mixture(wav_path):
    music_name = wav_path.split("/")[-2]
    copyfile(wav_path, f"{OUTPUT_MIXTURES_DIR}/{music_name}_mixture.wav")

def preprocess():
    if not os.path.exists(OUTPUT_INSTRUMENTS_DIR):
        os.makedirs(OUTPUT_INSTRUMENTS_DIR)
    if not os.path.exists(OUTPUT_MIXTURES_DIR):
        os.makedirs(OUTPUT_MIXTURES_DIR)
    
    print("Mixing instruments...")
    music_dirs = glob.glob(f"{MUSDB18_DIR}/train/*") + glob.glob(f"{MUSDB18_DIR}/test/*")
    output_paths = [f"{OUTPUT_INSTRUMENTS_DIR}/{d.split('/')[-1]}_instruments.wav" for d in music_dirs]
    process_map(mix_instruments, music_dirs, output_paths)
    
    print("Copying mixtures...")
    mixture_wavs = glob.glob(f"{MUSDB18_DIR}/train/*/mixture.wav") + glob.glob(f"{MUSDB18_DIR}/test/*/mixture.wav")
    process_map(copy_mixture, mixture_wavs)

preprocess()

Mixing instruments...


  0%|          | 0/150 [00:00<?, ?it/s]

Copying mixtures...


  0%|          | 0/150 [00:00<?, ?it/s]