In [187]:
import pandas as pd
import numpy as np
from glob import glob
import librosa
import soundfile as sf
import math

Resample to 22050Hz 24bit depth

In [188]:
audio_files = glob("fishsounds/dataset/*.wav")
len(audio_files)

output_dir = r"fishsounds/resampled_22050Hz_24bit/"
for f in audio_files:
    signal, rate = librosa.load(f)
    print("-----------------------------------------------------------")
    f_name = f[19:]
    print(f'y: {signal[:10]}')
    print(f'shape signal: {signal.shape}')
    print(f'sampling rate: {rate}')
    sf.write(output_dir+f_name, signal, rate, subtype='PCM_24')

-----------------------------------------------------------
y: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
shape signal: (121986,)
sampling rate: 22050
-----------------------------------------------------------
y: [0.01893576 0.02372942 0.02165109 0.0204875  0.02056332 0.02249832
 0.02578675 0.02844589 0.02929114 0.02844568]
shape signal: (74353,)
sampling rate: 22050
-----------------------------------------------------------
y: [-0.08920449 -0.10867348 -0.09398311 -0.08309154 -0.07639719 -0.07498258
 -0.07630886 -0.07474533 -0.07172927 -0.07381283]
shape signal: (119511,)
sampling rate: 22050
-----------------------------------------------------------
y: [0.00734051 0.01321094 0.00982868 0.01036316 0.00984962 0.01100795
 0.01066968 0.01017946 0.01080279 0.01039888]
shape signal: (520260,)
sampling rate: 22050
-----------------------------------------------------------
y: [-8.2873739e-06 -9.5795840e-06 -3.9143488e-06 -2.8246548e-05
 -1.5269266e-05 -3.8075726e-05 -2.5225803e-05 -5.0035305e-05
 -2

Pass to Low Pass filter at 1250Hz

In [190]:
def running_mean(x, windowSize):
    cumsum = np.cumsum(np.insert(x, 0, 0)) 
    return (cumsum[windowSize:] - cumsum[:-windowSize]) / windowSize
    
def lowpassfilter(signal, cutOffFrequency, sampleRate):
    # get window size
    # from http://dsp.stackexchange.com/questions/9966/what-is-the-cut-off-frequency-of-a-moving-average-filter
    freqRatio = (cutOffFrequency/sampleRate)
    N = int(math.sqrt(0.196196 + freqRatio**2)/freqRatio)
    print("Window Size for "+ str(cutOffFrequency) + "Hz is "+ str(N))
    signal = running_mean(signal, N)
    return signal

audio_files = glob("fishsounds/resampled_22050Hz_24bit/*.wav")
print("Dataset Size: "+ str(len(audio_files)))

output_dir = r"fishsounds/filtered_lpf_1k/"
for f in audio_files:
    signal, rate = librosa.load(f)
    print("-----------------------------------------------------------")
    f_name = f[35:]
    print(f'y: {signal[:10]}')
    print(f'shape signal: {signal.shape}')
    print(f'sampling rate: {rate}')
    signal = lowpassfilter(signal, 1250, rate)
    signal = lowpassfilter(signal, 1250, rate)
    signal = lowpassfilter(signal, 1250, rate)
    signal = lowpassfilter(signal, 1250, rate)
    signal = lowpassfilter(signal, 1250, rate)
    signal = lowpassfilter(signal, 1250, rate)
    sf.write(output_dir+f_name, signal, rate, subtype='PCM_24')

Dataset Size: 109
-----------------------------------------------------------
y: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
shape signal: (121986,)
sampling rate: 22050
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7
-----------------------------------------------------------
y: [0.01893568 0.02372932 0.02165103 0.02048743 0.02056324 0.02249825
 0.02578664 0.02844584 0.02929103 0.0284456 ]
shape signal: (74353,)
sampling rate: 22050
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7
-----------------------------------------------------------
y: [-0.08920455 -0.10867357 -0.09398317 -0.08309162 -0.0763973  -0.07498264
 -0.07630897 -0.07474542 -0.0717293  -0.07381284]
shape signal: (119511,)
sampling rate: 22050
Window Size for 1250Hz is 7
Window Size for 1250Hz is 7