# Hybrid Demucs from Colab

This supports the Demucs source separation model (https://github.com/facebookresearch/demucs/)
This is only for separation with pre-trained models, not training!

In [None]:
!python3 -m pip install -U demucs

In [None]:
# Please BE VERY CAREFUL, this will link your entire drive.
# So don't edit code, except the one that says 'Customize the following options',
# or you might mess up your files.
# IF YOU DO NO WANT TO LINK DRIVE, please see below for an alternative!
from google.colab import drive
drive.mount('/gdrive')

In [None]:
# Customize the following options!
model = "htdemucs"
extensions = ["wav"]  # we will look for all those file types.
two_stems = None

# Options for the output audio.
mp3 = False
float32 = False  # output as float 32 wavs, unsused if 'mp3' is True.
int24 = False    # output as int24 wavs, unused if 'mp3' is True.
# You cannot set both `float32 = True` and `int24 = True` !!

In [None]:
#@title Useful functions, don't forget to execute
import io
from pathlib import Path
import select
from shutil import rmtree
import subprocess as sp
import sys
from typing import Dict, Tuple, Optional, IO

from google.colab import files

def find_files(in_path):
    out = []
    for file in Path(in_path).iterdir():
        if file.suffix.lower().lstrip(".") in extensions:
            out.append(file)
    return out

def copy_process_streams(process: sp.Popen):
    def raw(stream: Optional[IO[bytes]]) -> IO[bytes]:
        assert stream is not None
        if isinstance(stream, io.BufferedIOBase):
            stream = stream.raw
        return stream

    p_stdout, p_stderr = raw(process.stdout), raw(process.stderr)
    stream_by_fd: Dict[int, Tuple[IO[bytes], io.StringIO, IO[str]]] = {
        p_stdout.fileno(): (p_stdout, sys.stdout),
        p_stderr.fileno(): (p_stderr, sys.stderr),
    }
    fds = list(stream_by_fd.keys())

    while fds:
        # `select` syscall will wait until one of the file descriptors has content.
        ready, _, _ = select.select(fds, [], [])
        for fd in ready:
            p_stream, std = stream_by_fd[fd]
            raw_buf = p_stream.read(2 ** 16)
            if not raw_buf:
                fds.remove(fd)
                continue
            buf = raw_buf.decode()
            std.write(buf)
            std.flush()

def separate(inp=None, outp=None):
    inp = inp or in_path
    outp = outp or out_path
    cmd = ["python3", "-m", "demucs.separate", "-o", str(outp), "-n", model]
    if mp3:
        cmd += ["--mp3", f"--mp3-bitrate={mp3_rate}"]
    if float32:
        cmd += ["--float32"]
    if int24:
        cmd += ["--int24"]
    if two_stems is not None:
        cmd += [f"--two-stems={two_stems}"]
    files = [str(f) for f in find_files(inp)]
    if not files:
        print(f"No valid audio files in {in_path}")
        return
    print("Going to separate the files:")
    print('\n'.join(files))
    print("With command: ", " ".join(cmd))
    p = sp.Popen(cmd + files, stdout=sp.PIPE, stderr=sp.PIPE)
    copy_process_streams(p)
    p.wait()
    if p.returncode != 0:
        print("Command failed, something went wrong.")


def from_upload():
    out_path = Path('separated')
    in_path = Path('tmp_in')

    if in_path.exists():
        rmtree(in_path)
    in_path.mkdir()

    if out_path.exists():
        rmtree(out_path)
    out_path.mkdir()

    uploaded = files.upload()
    for name, content in uploaded.items():
        (in_path / name).write_bytes(content)
    separate(in_path, out_path)


In [None]:
# Full EgoMusic and MUSDB for SDR comparison between data sets
separate(inp='/gdrive/MyDrive/egomusic_separation/full_musdb/', outp='/gdrive/MyDrive/egomusic_separation/estimated_full_musdb/')
separate(inp='/gdrive/MyDrive/egomusic_separation/full_egomusic/', outp='/gdrive/MyDrive/egomusic_separation/estimated_full_egomusic/')

# Segmented EgoMusic separation for VISQOL analysis
separate(inp='/gdrive/MyDrive/egomusic_separation/clean/', outp='/gdrive/MyDrive/egomusic_separation/estimated_clean/')
separate(inp='/gdrive/MyDrive/egomusic_separation/near/', outp='/gdrive/MyDrive/egomusic_separation/estimated_near/')
separate(inp='/gdrive/MyDrive/egomusic_separation/mid/', outp='/gdrive/MyDrive/egomusic_separation/estimated_mid/')
separate(inp='/gdrive/MyDrive/egomusic_separation/far/', outp='/gdrive/MyDrive/egomusic_separation/estimated_far/')
separate(inp='/gdrive/MyDrive/egomusic_separation/static/', outp='/gdrive/MyDrive/egomusic_separation/estimated_static/')