In [None]:
%matplotlib inline
import numpy as np
from pydub import AudioSegment
import matplotlib.pyplot as plt

dataset_path = '../dataset/speech_commands_v0.02/'
wavefile = 'up/004ae714_nohash_0.wav'
sounds = AudioSegment.from_file(dataset_path + wavefile, 'wav')

In [None]:
print(f'channel: {sounds.channels}')
print(f'frame rate: {sounds.frame_rate}')
print(f'duration: {sounds.duration_seconds} s')
print(f'sample width: {sounds.sample_width}')

In [None]:
sig = np.array(sounds.get_array_of_samples())[::sounds.channels]
dt = 1.0/sounds.frame_rate
t_start = 0.0
t_end = sounds.duration_seconds
time = np.linspace(t_start, t_end, len(sig), endpoint=False)

In [None]:
#DFT
dft_n = len(sig)
dft_x = np.fft.fft(sig)
dft_f = np.fft.fftfreq(dft_n, dt)

In [None]:
# Band-pass filter
bpf_cutoff_low = 5.0e+2
bpf_cutoff_high = 10.0e+2
bpf_x = dft_x.copy()
bpf_x[(dft_f > bpf_cutoff_high) | (dft_f < -bpf_cutoff_high)] = 0.0  # low-pass
bpf_x[((dft_f > 0) & (dft_f < bpf_cutoff_low)) | ((dft_f < 0) & (dft_f > -bpf_cutoff_low))] = 0.0 # high-pass
bpf_sig = np.real(np.fft.ifft(bpf_x))

bpf_sounds = AudioSegment(bpf_sig.astype('int16').tobytes(),
                          sample_width = sounds.sample_width,
                          frame_rate = sounds.frame_rate,
                          channels = 1)
bpf_sounds.export('BPFed.wav', format='wav')

In [None]:
# Plot
fig, (ax01, ax02) = plt.subplots(nrows = 2, figsize = (6, 8))
#plt.subplots_adjust(wspace = 0.0, hspace = 0.6)

ax01.set_xlim(t_start, t_end)
ax01.set_xlabel('Time (s)')
ax01.set_ylabel('X')
ax01.plot(time, sig, color = 'black')
ax01.plot(time, bpf_sig, color = 'orange')

ax02.set_xlim(0, 2000)
ax02.set_xlabel('Frequency (Hz)')
ax02.set_ylabel('|X|/N')
ax02.plot(dft_f[0:dft_n//2], np.abs(dft_x[0:dft_n//2])/dft_n, color = 'black')
ax02.plot(dft_f[0:dft_n//2], np.abs(bpf_x[0:dft_n//2])/dft_n, color = 'orange')