# Extracting vocals from a song using Demucs

In [1]:
import torch
from demucs import pretrained
from demucs.apply import apply_model
import torchaudio
import os


def convert_to_stereo(input_file, target_samplerate):
    wav, sr = torchaudio.load(input_file, normalize=True)

    if sr != target_samplerate:
        wav = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_samplerate)(wav)

    if wav.shape[0] == 1:
        wav = torch.cat([wav, wav], dim=0)
    return wav


def extract_vocals(input_file, output_folder):
    model = pretrained.get_model('mdx_extra')
    model.cuda()

    wav = convert_to_stereo(input_file, model.samplerate)

    sources = apply_model(model, wav[None], device='cuda')
    vocals = sources[0, 3].cpu().numpy()  # Index 3 corresponds to vocals in 'mdx_extra'

    output_path = os.path.join(output_folder, f"{os.path.basename(input_file).replace('.mp3', '')}_vocals.wav")
    torchaudio.save(output_path, torch.tensor(vocals), sample_rate=model.samplerate)
    print(f"Vocal part saved: {output_path}")

In [2]:
output_folder = "songs/output_vocals"

input_file_1 = "songs/В самое сердце.mp3"
input_file_2 = "songs/Counting Stars.mp3"

os.makedirs(output_folder, exist_ok=True)
extract_vocals(input_file_1, output_folder)
extract_vocals(input_file_2, output_folder)

Vocal part saved: songs/output_vocals\В самое сердце_vocals.wav
Vocal part saved: songs/output_vocals\Counting Stars_vocals.wav


In [3]:
import whisper

model = whisper.load_model("medium")

files = os.listdir(output_folder)
for file in files:
    input_file = os.path.join(output_folder, file)
    print(f"Transcribing {input_file}")
    print(model.transcribe(input_file)['text'])
    print('------------------------')

Transcribing songs/output_vocals\Counting Stars_vocals.wav
 Lately I've been, I've been losing sleep Dreaming about the things that we could be But baby I've been, I've been praying hard Said no more counting dollars, we'll be counting stars Yeah, we'll be counting stars I see this life like a swinging vine Swing my heart across the line And my face is flashing signs Seek it out and ye shall find the old But I'm not that old young But I'm not that bold And I don't think the world is sold I'm just doing what we're told And I feel something so right By doing the wrong thing And I feel something so wrong By doing the right thing I could lie, could lie, could lie Everything that kills me makes me feel alive But baby I've been, I've been losing sleep Dreaming about the things that we could be But baby I've been, I've been praying hard Said no more counting dollars, we'll be counting stars But baby I've been, I've been losing sleep Dreaming about the things that we could be But baby I've bee