In [1]:
import IPython.display as ipd
import json
import librosa
import os
import pandas as pd
import soundfile as sf
import sys
from tqdm.notebook import tqdm

tqdm.pandas()

In [None]:
!git clone https://github.com/google-research/sound-separation.git

In [None]:
# Models can be downloaded using the following command, which will copy the model checkpoint files to the current folder:
!gsutil -m cp -r gs://gresearch/sound_separation/bird_mixit_model_checkpoints .

In [2]:
def ogg_to_wav(f_path):
    new_f_path = os.path.join(os.path.dirname(f_path), f_path.replace("\\", "/").rsplit("/", 1)[-1].replace(".ogg", ".wav"))
    data, samplerate = sf.read(f_path)
    sf.write(new_f_path, data, samplerate)
    return new_f_path

def wav_to_ogg(f_path):
    new_f_path = os.path.join(os.path.dirname(f_path), f_path.replace("\\", "/").rsplit("/", 1)[-1].replace(".wav", ".ogg"))
    data, samplerate = sf.read(f_path)
    sf.write(new_f_path, data, samplerate)
    return new_f_path

In [3]:
def separate_sound(wav_path, n_outputs):
    out_f_name = wav_path.replace('\\', '/').split("/")[-1].split(".")[0]

    !python sound-separation/models/tools/process_wav.py \
        --model_dir bird_mixit_model_checkpoints/output_sources4 \
        --checkpoint bird_mixit_model_checkpoints/output_sources4/model.ckpt-3223090 \
        --input {wav_path} \
        --output {os.path.join(os.path.dirname(wav_path), out_f_name + '.wav')} \
        --num_sources {n_outputs}
    
    separated_f_paths = []
    for i in range(n_outputs):
        separated_f_paths.append(os.path.join(wav_path).replace(".wav", f"_source{i}.wav"))
    return separated_f_paths

In [4]:
def exec_sound_separation(f_path, n_outputs, display_audio=False):
    wav_path = ogg_to_wav(f_path)
    separated_f_paths = separate_sound(wav_path, n_outputs)
    os.remove(wav_path)
    
    if display_audio:
        print("\n... ORIGINAL AUDIO ...\n")
        display(ipd.Audio(f_path))

        print(f"\n... {n_outputs} SEPARATED AUDIO TRACKS ...\n")
        for i in range(n_outputs):
            display(ipd.Audio(separated_f_paths[i]))
            
    return separated_f_paths

In [5]:
exec_sound_separation('./dataset/train_audio/yefcan/XC207920.ogg', n_outputs=4, display_audio=True)


... ORIGINAL AUDIO ...



Instructions for updating:
non-resource variables are not supported in the long term
2022-04-30 20:48:57.795592: W tensorflow/core/common_runtime/graph_constructor.cc:1511] Importing a graph with a lower producer version 837 into an existing graph with producer version 987. Shape inference will have run different parts of the graph with different producer versions.
2022-04-30 20:48:59.808196: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-04-30 20:49:00.429321: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2153 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1650 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5



... 4 SEPARATED AUDIO TRACKS ...



['./dataset/train_audio/yefcan\\XC207920_source0.wav',
 './dataset/train_audio/yefcan\\XC207920_source1.wav',
 './dataset/train_audio/yefcan\\XC207920_source2.wav',
 './dataset/train_audio/yefcan\\XC207920_source3.wav']

In [6]:
Train_Metadata_DIR = '../train_metadata.csv'
Scored_Bird_DIR = '../scored_birds.json'
Train_DIR = '../train_audio/'

In [7]:
train_df = pd.read_csv(Train_Metadata_DIR)

with open(Scored_Bird_DIR) as sbfile:
    scored_birds = json.load(sbfile)

bird_training_sample = train_df.primary_label.value_counts()

print('Number of scored birds training files \n', bird_training_sample.loc[scored_birds])

Number of scored birds training files 
 akiapo      14
aniani      12
apapan      47
barpet      15
crehon       2
elepai      14
ercfra       6
hawama      21
hawcre      20
hawgoo       9
hawhaw       3
hawpet1      3
houfin     322
iiwi        37
jabwar      78
maupar       1
omao        21
puaioh       3
skylar     500
warwhe1     71
yefcan      67
Name: primary_label, dtype: int64


In [8]:
train_df = train_df[train_df['primary_label'].isin(scored_birds)]
train_df['dir'] = Train_DIR + train_df['filename']
train_df.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,time,url,filename,dir
32,akiapo,"['apapan', 'hawama', 'iiwi']",['song'],19.6294,-155.3615,Hemignathus wilsoni,Akiapolaau,Brooks Rownd,Creative Commons Attribution-NonCommercial-Sha...,4.5,12:31,https://www.xeno-canto.org/122399,akiapo/XC122399.ogg,../train_audio/akiapo/XC122399.ogg
33,akiapo,"['apapan', 'iiwi', 'warwhe1']",['call'],19.6294,-155.3615,Hemignathus wilsoni,Akiapolaau,Brooks Rownd,Creative Commons Attribution-NonCommercial-Sha...,4.5,17:46,https://www.xeno-canto.org/122401,akiapo/XC122401.ogg,../train_audio/akiapo/XC122401.ogg
34,akiapo,['apapan'],['song'],19.6642,-155.3896,Hemignathus wilsoni,Akiapolaau,Brooks Rownd,Creative Commons Attribution-NonCommercial-Sha...,4.5,16:22,https://www.xeno-canto.org/122693,akiapo/XC122693.ogg,../train_audio/akiapo/XC122693.ogg
35,akiapo,"['apapan', 'elepai', 'hawama', 'iiwi', 'omao',...",['call'],19.6642,-155.3896,Hemignathus wilsoni,Akiapolaau,Brooks Rownd,Creative Commons Attribution-NonCommercial-Sha...,3.5,15:58,https://www.xeno-canto.org/124705,akiapo/XC124705.ogg,../train_audio/akiapo/XC124705.ogg
36,akiapo,"['apapan', 'hawama', 'iiwi', 'omao', 'warwhe1']",['song'],19.6334,-155.3753,Hemignathus wilsoni,Akiapolaau,Brooks Rownd,Creative Commons Attribution-NonCommercial-Sha...,2.5,14:43,https://www.xeno-canto.org/124801,akiapo/XC124801.ogg,../train_audio/akiapo/XC124801.ogg


In [9]:
train_df.primary_label.count()

1266

In [None]:
train_df['dir'].progress_apply(exec_sound_separation, n_outputs=4, display_audio=False)

In [10]:
train_df.drop(columns="dir", errors='ignore').to_csv("train_metadata_sound_separation.csv", index=False)