In [1]:
%matplotlib notebook
from scipy.io import wavfile
from scipy.io.wavfile import write
import scipy.fftpack as dsp
import scipy.signal as sg
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def down_sample (data, fs, desired_fs):
    dec_ratio = np.round(fs / desired_fs)
    offset = np.ceil(140. / dec_ratio) * dec_ratio
    start_pad = data[0] * np.ones(int(offset), dtype=np.float32)
    end_pad = data[-1] * np.ones(int(offset), dtype=np.float32)
    data = np.concatenate((start_pad, data, end_pad), axis=0)

    try:
        out = sg.decimate(data, int(dec_ratio), 3, zero_phase=True)
    except:
        out = sg.decimate(data, int(dec_ratio), 3)
    actual_fs = fs / dec_ratio
    out_final = out[int(offset / dec_ratio):-int(offset / dec_ratio)]
    out_final = out_final - np.mean(out_final)
    actual_fs = int(np.round(actual_fs))
    return out_final, actual_fs

In [3]:
def convert_data(data):
    datamono = np.mean(data, axis=1)
    maxenergy = np.argmax(datamono)
    normdatamono = datamono/datamono[maxenergy]  
    return normdatamono

In [4]:
def find_voice(data_sampled, threshold):
    
    window = np.zeros(100)
    j = 100
    k = 0
    flag = 0
    
    for i in range (int(np.ceil(len(data_sampled)/80))):
        window = data_sampled[k:j]
        k += 80
        j += 80       
        energy = np.abs(window)
        avg_energy = np.mean(energy)    
        if (avg_energy > threshold) and (flag == 0):
            flag = 1
            start = k           
        if avg_energy > threshold:
            flag = 1              
        if (avg_energy < threshold) and (flag == 1):
            flag = 2
            end = j
    
    VAD_data = data_sampled[start:end]
    return VAD_data

In [5]:
def fs_padding(VAD_Final, VAD_data):
    
    padding = len(VAD_Final) - len(VAD_data)
    front = int(np.round(padding/2))
                                   
    for i in range (front):
        VAD_Final[i] = 0
                                   
    for i in range (front, (len(VAD_data) + front)):
        VAD_Final[i] = VAD_data[i-front]
    
    return VAD_Final

In [6]:
def denoise(data_sampled, cut_off):
    fft_out = dsp.fft(data_sampled)
    denoised = np.zeros(len(fft_out))
    denoised = denoised.astype('complex128')
    for i in range (len(fft_out)):
        if (np.abs(fft_out[i]) > cut_off):
            denoised[i] = fft_out[i]
        else:
            denoised[i] = 0
            
    data_sampled_temp = dsp.ifft(denoised)
    data_sampled = data_sampled_temp.real
    
    print(fft_out)
    
    return data_sampled, fft_out, denoised

In [7]:
import os
arr = os.listdir('own')

In [8]:
for i in arr:
    
    filename = 'own/'+i
    desired_fs = 16000
    cut_off = 0
    VAD_Final = np.zeros(desired_fs)
    fs, data = wavfile.read(filename)

    data = convert_data(data)
    data, actual_fs = down_sample(data, fs, desired_fs)

    threshold = np.mean(np.abs(data))

    data_sampled, fft_out, denoised = denoise(data, cut_off)

    VAD_data = find_voice(data_sampled, threshold)
    VAD_Final = fs_padding(VAD_Final, VAD_data)

    write('owntest/'+i, actual_fs, VAD_Final)

    '''write("voicepart.wav", actual_fs, VAD_Final)

    f1, ax1 = plt.subplots()
    ax1.plot(data)
    ax1.set_title('Before PreProcessing', fontsize=12)
    ax1.set_xlabel('Samples', fontsize=12)
    ax1.set_ylabel('Amplitude', fontsize=12)

    f2, ax2 = plt.subplots()
    ax2.plot(VAD_Final)
    ax2.set_title('After PreProcessing', fontsize=12)
    ax2.set_xlabel('Samples', fontsize=12)
    ax2.set_ylabel('Amplitude', fontsize=12)

    f3, ax3 = plt.subplots()
    ax3.plot(np.abs(fft_out))
    ax3.set_title('Before Denoising', fontsize=12)
    ax3.set_xlabel('Frequency', fontsize=12)
    ax3.set_ylabel('Amplitude', fontsize=12)
    f4, ax4 = plt.subplots()

    ax4.plot(np.abs(denoised))
    ax4.set_title('After Denoising', fontsize=12)
    ax4.set_xlabel('Frequency', fontsize=12)
    ax4.set_ylabel('Amplitude', fontsize=12)

    f1.savefig('BPProces.png')
    f2.savefig('APProces.png')
    f3.savefig('FFTBD.png')
    f4.savefig('FFTAD.png')'''

[ 1.55431223e-14+0.j          1.31738428e+00+0.69147242j
  9.66659205e-01+0.20325987j ... -7.09109765e-01-0.85748388j
  9.66659205e-01-0.20325987j  1.31738428e+00-0.69147242j]
[-1.73194792e-14+0.j          2.70460917e+00-0.17053172j
  1.50453365e-01-0.12882581j ... -2.41539471e+00-0.35322918j
  1.50453365e-01+0.12882581j  2.70460917e+00+0.17053172j]
[-7.10542736e-15 +0.j          8.26712094e+00+15.59469964j
  6.75867722e+00-16.44876242j ... -1.38962684e+01 -5.34218678j
  6.75867722e+00+16.44876242j  8.26712094e+00-15.59469964j]
[-1.02140518e-14+0.j          4.93498958e+00+0.39636828j
 -4.90003589e+00-0.90423034j ...  4.87419073e+00-1.22659565j
 -4.90003589e+00+0.90423034j  4.93498958e+00-0.39636828j]
[ 1.17683641e-14+0.j          5.72716321e-01-5.23277933j
  6.65809131e+00+0.4539998j  ... -6.40434053e-01-5.89413109j
  6.65809131e+00-0.4539998j   5.72716321e-01+5.23277933j]
[-1.64313008e-14+0.j          1.26143821e+01-1.68145492j
 -1.10115097e+01+2.58469738j ...  9.38890785e+00+2.858291

[2.70894418e-14+0.j         1.37097454e+00-0.25377228j
 3.05112569e-01-0.48858689j ... 1.12416943e+00-0.3732142j
 3.05112569e-01+0.48858689j 1.37097454e+00+0.25377228j]
[ 6.21724894e-15+0.j          1.09821889e+00-0.02282945j
 -2.26713983e-01+0.21886851j ...  6.58535816e-01+0.92125898j
 -2.26713983e-01-0.21886851j  1.09821889e+00+0.02282945j]
[-6.21724894e-15+0.j          1.62583650e-01+0.15880106j
  1.04717117e+00+0.61298373j ... -2.91763908e-01+0.4454511j
  1.04717117e+00-0.61298373j  1.62583650e-01-0.15880106j]
[-3.24185123e-14+0.j          1.93481300e+00+0.23163549j
 -6.13875758e-01-0.65145368j ...  8.00252467e-01+0.06689341j
 -6.13875758e-01+0.65145368j  1.93481300e+00-0.23163549j]
[-2.48689958e-14+0.j          3.03219282e-01-0.21128435j
  3.90900674e-01-0.22713625j ...  1.30991136e+00-0.15819929j
  3.90900674e-01+0.22713625j  3.03219282e-01+0.21128435j]
[ 8.88178420e-15+0.j          5.07736599e-01-0.71079721j
  5.83196697e-01+0.3227364j  ... -2.03159934e-01-0.01082109j
  5.831966

In [9]:
np.max(data_sampled)

1.0013876076033699