In [1]:
import os
import sys
import time
from joblib import Parallel, delayed

import numpy as np
import librosa
import soundfile as sf
import pandas as pd

Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit


In [2]:
DATA_DIR_MP3 = "audiodata/mp3"
DATA_DIR_WAV = "audiodata/wav"

SR = 14000
BIRDS_DIR_LIST = os.listdir(DATA_DIR_MP3)

# Create directories for wav files
if not os.path.exists(DATA_DIR_WAV):
    os.makedirs(DATA_DIR_WAV)

for bird in BIRDS_DIR_LIST:
    data_dir_bird = f"{DATA_DIR_WAV}/{bird}"
    if not os.path.exists(data_dir_bird):
        os.makedirs(data_dir_bird)

In [21]:
def convert_mp3_to_wav(filename, birdName, sr):
    """
    Converts the mp3 file to wav, with a sample_rate = 14000Hz and a single channel.
    """
    bird_filename_mp3 = f"{DATA_DIR_MP3}/{birdName}/{filename}"
    bird_filename_wav = f"{DATA_DIR_WAV}/{birdName}/{filename[:-4]}.wav"

    if bird_filename_mp3[-4:] == '.mp3' and not os.path.isfile(bird_filename_wav):
#         print(bird_filename_mp3)
        bird_array, _ = librosa.load(bird_filename_mp3, mono=True, sr=sr)
        sf.write(bird_filename_wav, bird_array, sr)

In [22]:
def conversion_function(birdslist, sr, do_print=False):
    """
    Converts all of the mp3 files
    """
    for birdName in birdslist:
        bird_dir = f"{DATA_DIR_MP3}/{birdName}"
        birdsaudiolist = [f for f in os.listdir(bird_dir) if os.path.isfile(os.path.join(bird_dir, f))]
        
        if do_print: start_time = time.time(); print(f"Converting {len(birdsaudiolist)} audio files of {birdName}")
            
        Parallel(n_jobs=-1, verbose=1)(delayed(convert_mp3_to_wav)(birdaudio, birdName, sr) 
                                       for birdaudio in birdsaudiolist)

        """
        To debug corrupted files, 
        uncomment the lines below and the print in the 'convert_mp3_to_wav' function,
        comment the 'Parallel' function,
        then delete the file.
        """
#         for birdaudio in birdsaudiolist:
#             convert_mp3_to_wav(birdaudio, birdName, sr)
        
                
        if do_print: print(f"Conversion duration: {time.time()-start_time}s\n")

In [23]:
%%time
conversion_function(BIRDS_DIR_LIST, SR, do_print=True)

Converting 334 audio files of carduelis_carduelis


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  68 tasks      | elapsed:    2.0s
[Parallel(n_jobs=4)]: Done 334 out of 334 | elapsed:    2.2s finished


Conversion duration: 2.281167984008789s

Converting 167 audio files of columba_palumbus


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 130 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 167 out of 167 | elapsed:    0.1s finished


Conversion duration: 0.1570119857788086s

Converting 424 audio files of cyanistes_caeruleus


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 424 out of 424 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.0s


Conversion duration: 0.1980137825012207s

Converting 686 audio files of erithacus_rubecula


[Parallel(n_jobs=4)]: Done 686 out of 686 | elapsed:    0.1s finished


Conversion duration: 0.2130134105682373s

Converting 949 audio files of fringilla_coelebs


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 949 out of 949 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Conversion duration: 0.31902313232421875s

Converting 227 audio files of hirundo_rustica


[Parallel(n_jobs=4)]: Done 190 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 227 out of 227 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 130 tasks      | elapsed:    0.0s


Conversion duration: 0.15501141548156738s

Converting 170 audio files of linaria_cannabina


[Parallel(n_jobs=4)]: Done 170 out of 170 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Conversion duration: 0.16301369667053223s

Converting 1082 audio files of parus_major


[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 1082 out of 1082 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 190 tasks      | elapsed:    0.0s


Conversion duration: 0.3200228214263916s

Converting 223 audio files of passer_domesticus


[Parallel(n_jobs=4)]: Done 223 out of 223 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Conversion duration: 0.1960158348083496s

Converting 749 audio files of phylloscopus_collybita


[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 749 out of 749 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Conversion duration: 0.21501493453979492s

Converting 286 audio files of regulus_regulus


[Parallel(n_jobs=4)]: Done 250 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 286 out of 286 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Conversion duration: 0.17101359367370605s

Converting 196 audio files of sturnus_vulgaris
Conversion duration: 0.12901043891906738s



[Parallel(n_jobs=4)]: Done 196 out of 196 | elapsed:    0.0s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 312 tasks      | elapsed:    0.0s


Converting 645 audio files of turdus_merula
Conversion duration: 106.53695821762085s

Wall time: 1min 51s


[Parallel(n_jobs=4)]: Done 645 out of 645 | elapsed:  1.8min finished
