In [1]:
import numpy as np
import pydub
import requests
import os
import json
import noisereduce as nr
import librosa
import librosa.display
from scipy.io import wavfile
import matplotlib.pyplot as plt
from PreprocessingFunctions import *

#### Download select mp3 files for local machine testing

In [4]:
PATH = "./data/costa_rica/mp3/"

for filename in os.listdir("./data/costa_rica"):
    if filename.endswith(".json"):
        with open("./data/costa_rica/" + filename, 'r') as speciesFile:
            species_data = json.load(speciesFile)
            for i in range(0, len(species_data)):
                d = species_data[i]
                if d["file-name"].endswith(".mp3") and d["smp"]=="48000" and d["q"] != "E":
                    mp3_url = d["file"]
                    r = requests.get(mp3_url, allow_redirects=True)
                    open(PATH + filename[:-5] + f"_{i}.mp3", 'wb').write(r.content)

#### Make data windows for Costa-Rica data

In [None]:
PATH_TO_COSTA_MP3 = "./data/costa_rica/mp3"

for filename in os.listdir(PATH_TO_COSTA_MP3):
    if filename.endswith(".mp3"):
        sr, numpy_audio_array = mp3_to_np(PATH_TO_COSTA_MP3 + "/" + filename)
        if len(numpy_audio_array.shape) == 2:
            numpy_audio_array = np.mean(numpy_audio_array, axis=1)
        windows = extract_best_windows(numpy_audio_array, sr)
        for i in range(0, len(windows)):
            np_to_mp3("./data/costa_rica/mp3_windows/" + filename[:-4] + f"_{i}.mp3", sr, windows[i])

#### Sort Costa data into training and validation splits. Testing data will come from real recordings

In [14]:
import random
import shutil

SPEC_PATH = "data/costa_rica/denoised_spectrogram"
TRAIN_PATH = "data/costa_rica/train_dn"
VAL_PATH = "data/costa_rica/validation_dn"
TRAIN_RATIO = .9

if not os.path.exists(TRAIN_PATH):
    os.mkdir(TRAIN_PATH)
if not os.path.exists(VAL_PATH):
    os.mkdir(VAL_PATH)
    
for directory in os.listdir(SPEC_PATH):
    if not os.path.exists(TRAIN_PATH+"/"+directory):
        os.mkdir(TRAIN_PATH+"/"+directory)
    if not os.path.exists(VAL_PATH+"/"+directory):
        os.mkdir(VAL_PATH+"/"+directory)
    file_list = os.listdir(SPEC_PATH+"/"+directory)
    random.shuffle(file_list)
    t_idx = int(len(file_list)*TRAIN_RATIO)
    train_files = file_list[:t_idx]
    val_files = file_list[t_idx:]
    train_dest = TRAIN_PATH+"/"+directory
    val_dest = VAL_PATH+"/"+directory
    for filename in train_files:
        if not filename.endswith(".ipynb_checkpoints"):
            shutil.copy(SPEC_PATH+"/"+directory+"/"+filename, train_dest)
    for filename in val_files:
        if not filename.endswith(".ipynb_checkpoints"):
            shutil.copy(SPEC_PATH+"/"+directory+"/"+filename, val_dest)
            

### Denoise all wav files

In [16]:
#WAV_PATH = "data/costa_rica/wav"
#DEST_PATH = "data/costa_rica/denoised_wav"
WAV_PATH = "data/costa_rica/test_wav"
DEST_PATH = "data/costa_rica/test_wav"
for filename in os.listdir(WAV_PATH):
    if filename.endswith(".wav"):
        # load data
        data, sr = librosa.load(WAV_PATH +"/"+ filename, sr=None)
        # perform noise reduction
        reduced_noise = nr.reduce_noise(y=data, sr=sr)
        wavfile.write(DEST_PATH +"/"+ filename[:-4] + "dn.wav", sr, reduced_noise)
        

### Make denoised spectrograms

In [17]:
# wav_path = "data/costa_rica/denoised_wav"
# audio_clips = os.listdir(wav_path)
# export_path = "data/costa_rica/denoised_spectrogram/"
wav_path = "data/costa_rica/test_wav"
audio_clips = os.listdir(wav_path)
export_path = "data/costa_rica/test/"

get_spectro_from_wav(audio_clips=audio_clips, wav_path=wav_path+"/", export_path=export_path, make_subdirs=True)

  3.2392138e-05  2.6903126e-05], sr=48000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  Z = librosa.feature.melspectrogram(x,sr)


0.5


In [4]:
wav_file_path = "data/costa_rica/test_wav/Bird_1dn.wav"
export_path = "data/costa_rica/test/"
file_name_root = "Bird1dn"
convert_long_wav_to_spectro_windows(wav_file_path, export_path, file_name_root, window_size=5)

In [4]:
wavPath = 'data/costa_rica/wav/Broad-billed_Motmot_22.wav'
newmp3testpath = 'data/costa_rica/testmp3/Broad-billed_Motmot_22.mp3'
convert_wav_to_mp3(wavPath,newmp3testpath)


In [5]:
mp3_path = "data/costa_rica/test/wav"+"/mp3version.mp3"
wav_file_path = "data/costa_rica/test_wav/Bird_1dn.wav"

convert_wav_to_mp3(wav_file_path, mp3_path)
sr, audio_array = mp3_to_np(mp3_path)
windows = extract_best_windows(audio_array, sr)
for i in range(0, len(windows)):
    np_to_wav("data/costa_rica/test/wav"+f"/wavtest_{i}.wav", sr, windows[i])
    
wav_path = "data/costa_rica/test/wav"
audio_clips = os.listdir(wav_path)
export_path = "data/costa_rica/test/spectrogram/"

get_spectro_from_wav(audio_clips=audio_clips, wav_path=wav_path+"/", export_path=export_path)
        