# Separating A Capella Songs into their Separated Vocal Tracks using Spleeter

In [None]:
# Code source credit: https://github.com/deezer/spleeter/wiki/2.-Getting-started

In [None]:
# import necessary libraries
import librosa
import soundfile as sf
import numpy as np
import os
from IPython.display import Audio
import random


In [None]:
# get duration of a song
def get_duration(filename):
    y, sr = librosa.load(filename)
    duration = librosa.get_duration(y=y, sr=sr)
    return duration

# Add to csv file
def add_to_csv(mix_path, alto_path, bass_path, lead_vocal_path, soprano_path, tenor_path, vocal_percussion_path, csv_path):
    # print("adding")
    duration = get_duration(mix_path)
    with open(csv_path, 'a') as f:
        f.write(mix_path + ',' + alto_path + ',' + bass_path + ',' + lead_vocal_path + ',' + soprano_path + ',' + tenor_path + ',' + vocal_percussion_path + ',' + str(duration) + '\n')

# Put audio tracks from dataset into training csv file
def create_training_file():
    if not os.path.exists('spleeter/configs/jacapella_train.csv'):
        # Add column heading mix_path,alto_path,bass_path,lead_vocal_path,soprano_path,tenor_path,vocal_percussion_path,duration
        with open('spleeter/configs/jacapella_train.csv', 'a') as f:
            f.write('mix_path,alto_path,bass_path,lead_vocal_path,soprano_path,tenor_path,vocal_percussion_path,duration\n')
        for genre in [filename for filename in os.listdir('Dataset/Jacapella') if os.path.isdir(os.path.join('Dataset/Jacapella',filename))]:
            for song in [filename for filename in os.listdir('Dataset/Jacapella/'+genre) if os.path.isdir(os.path.join('Dataset/Jacapella/'+genre,filename))]:
                mix_path = 'Dataset/Jacapella/' + genre + '/' + song + '/mixture.wav'
                alto_path = 'Dataset/Jacapella/' + genre + '/' + song + '/alto.wav'
                bass_path = 'Dataset/Jacapella/' + genre + '/' + song + '/bass.wav'
                lead_vocal_path = 'Dataset/Jacapella/' + genre + '/' + song + '/lead_vocal.wav'
                soprano_path = 'Dataset/Jacapella/' + genre + '/' + song + '/soprano.wav'
                tenor_path = 'Dataset/Jacapella/' + genre + '/' + song + '/tenor.wav'
                vocal_percussion_path = 'Dataset/Jacapella/' + genre + '/' + song + '/vocal_percussion.wav'
                add_to_csv(mix_path, alto_path, bass_path, lead_vocal_path, soprano_path, tenor_path, vocal_percussion_path, 'spleeter/configs/jacapella_train.csv')
    print("Training file created") 

# sample 5 random songs from training csv to validation csv
def create_validation_file():
    if not os.path.exists('spleeter/configs/jacapella_validation.csv'):
        # Add column heading mix_path,alto_path,bass_path,lead_vocal_path,soprano_path,tenor_path,vocal_percussion_path,duration
        with open('spleeter/configs/jacapella_validation.csv', 'a') as f:
            f.write('mix_path,alto_path,bass_path,lead_vocal_path,soprano_path,tenor_path,vocal_percussion_path,duration\n')
        with open('spleeter/configs/jacapella_train.csv', 'r') as f:
            # exclude column heading
            lines = f.readlines()[1:]
            random.shuffle(lines)
            for line in lines[:5]:
                with open('spleeter/configs/jacapella_validation.csv', 'a') as f:
                    f.write(line)
                # remove those lines from training csv
                lines.remove(line)
    print("Validation file created")



In [None]:
# Train Spleeter
def audio_train_spleeter():
    create_training_file()
    create_validation_file()
    # print("Audio separation trained") 


In [None]:
audio_train_spleeter()

In [None]:
!spleeter train -p spleeter/configs/jacapella_config.json -d '/spleeter'


In [None]:
!spleeter separate -p spleeter/configs/jacapella_config.json -o output Dataset/Jacapella/popular/akatonbo/mixture.wav