# Open-Unmix: Audio Source Separation

In [2]:
# Imports and setup

import os

import numpy as np
np.float_ = np.float32 # musdb, museval
import musdb
import museval
import torch
import torchaudio

from openunmix.predict import separate

In [3]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
AMD Radeon RX 9070 XT


In [4]:
# Paths
path_to_folder = "."
musdb_root = os.path.join(path_to_folder, "musdb18")
estimates_path = os.path.join(path_to_folder, "estimates")
output_path = os.path.join(path_to_folder, "output")

# Ensure directories exist
os.makedirs(estimates_path, exist_ok=True)
os.makedirs(output_path, exist_ok=True)
!ls

estimates  MIR.zip  musdb18  music_separator.ipynb  output


In [5]:
# Load MUSDB dataset
mus = musdb.DB(
    root=musdb_root,
    download=True
)

In [6]:
# Separate sources and save estimates
for track in mus:
    print(f"[→] Separating: {track.name}")
    audio = torch.tensor(track.audio.T).float()  # shape (2, samples)
    rate = track.rate
    subset = track.subset

    estimates = separate(
        audio=audio,
        rate=rate,
        targets=["vocals"],
        residual=True,
        device=device,
    )

    estimates["accompaniment"] = estimates.pop("residual")

    cpu_estimates = {
        key: torch.squeeze(value).detach().cpu().numpy().T  # shape (samples, 2)
        for key, value in estimates.items()
    }

    scores = museval.eval_mus_track(
        track, cpu_estimates, output_dir="./output"
    )

    print(scores)

    # Create subdirectory for the subset if it doesn't exist
    subset_path = os.path.join(estimates_path, subset)
    os.makedirs(subset_path, exist_ok=True)

    for target, audio_np in estimates.items():
        audio_np = torch.squeeze(audio_np).detach().cpu().numpy().T  # shape (samples, 2)
        file_name = f"{track.name} - {target}.wav"
        out_path = os.path.join(subset_path, file_name)
        torchaudio.save(out_path, torch.tensor(audio_np.T), rate)


[→] Separating: A Classic Education - NightOwl
Downloading: "https://zenodo.org/records/5069601/files/vocals-bccbd9aa.pth" to /home/teraflops/.cache/torch/hub/checkpoints/vocals-bccbd9aa.pth


100%|██████████| 108M/108M [00:33<00:00, 3.37MB/s] 


vocals          ==> SDR:   3.938  SIR:   5.421  ISR:  10.877  SAR:   6.708  
accompaniment   ==> SDR:  12.263  SIR:  18.199  ISR:  15.721  SAR:  14.834  

[→] Separating: ANiMAL - Clinic A
vocals          ==> SDR:   5.252  SIR:   8.576  ISR:  14.217  SAR:   7.013  
accompaniment   ==> SDR:  13.004  SIR:  22.343  ISR:  17.363  SAR:  14.997  

[→] Separating: ANiMAL - Easy Tiger
vocals          ==> SDR:   6.201  SIR:  12.574  ISR:  11.155  SAR:   6.372  
accompaniment   ==> SDR:  14.227  SIR:  20.120  ISR:  22.557  SAR:  15.535  

[→] Separating: ANiMAL - Rockshow
vocals          ==> SDR:   4.100  SIR:   6.684  ISR:  16.811  SAR:   6.979  
accompaniment   ==> SDR:  10.070  SIR:  20.800  ISR:  13.238  SAR:  11.581  

[→] Separating: Actions - Devil's Words
vocals          ==> SDR:   8.947  SIR:  12.121  ISR:  18.633  SAR:  10.098  
accompaniment   ==> SDR:  10.118  SIR:  19.151  ISR:  12.868  SAR:  10.232  

[→] Separating: Actions - One Minute Smile
vocals          ==> SDR:   8.739  SIR: