# End Pointing of Speech signal

* Based on [Paper by Rabiner and Sambur](http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=6778857)

In [190]:
%matplotlib inline
from scipy import signal
from scipy.io.wavfile import write,read
import matplotlib.pyplot as plt
import math
import numpy as np
import IPython
from numpy import convolve
from scipy.signal import hamming
from scipy.ndimage.interpolation import shift
from scipy.linalg import toeplitz,inv
from scipy.fftpack import fft,fftshift,ifft

# Returns ndarray corresponding to voiced part in filename wav file
def endpointer(filename,Fs,glob_count=1):
    Ts=1/Fs
    read_wav = read(filename+'.wav')
    inp=np.array(read_wav[1],dtype='float64')
    plt.rcParams["figure.figsize"] = (18,10)
    #plt.title("Raw Input")
    #plt.plot(inp,'blue')
    
    #------------------------------------------------------------------------------
    #Compute Avg magnitude (Short Time), with window size N/20 (0-9 spoken twice)
    W=50
    pad_zeros=np.zeros(W)
    zp_inp_t=np.append(pad_zeros,inp)
    zp_inp=np.append(zp_inp_t,pad_zeros)
    st_avg=[np.sum(abs(zp_inp[i-W:i+W])) for i in range(W,zp_inp.size-W)]
    #plt.plot(st_avg,'red')

    #Compute ZCR
    st_zcr=[np.where(np.diff(np.signbit(inp[i-W:i+W])))[0].size for i in range(W,zp_inp.size-W)]
    #plt.plot(st_zcr,'green')
    
    #Compute IZCT assuming 100ms silence in start
    num_samp_in_sil=int(Fs*0.01)
    sil_inp=inp[::-1][0:num_samp_in_sil]
    sil_inp_t=np.append(pad_zeros,sil_inp)
    sil_inp_wpad=np.append(sil_inp_t,pad_zeros)
    #Compute ZCR
    sil_zcr=[np.where(np.diff(np.signbit(sil_inp_wpad[i-W:i+W])))[0].size for i in range(W,sil_inp_wpad.size-W)]
    sil_avg=[np.sum(np.abs(sil_inp_wpad[i-W:i+W])) for i in range(W,sil_inp_wpad.size-W)]
    
    
    sigma_izc=np.std(sil_zcr)
    mean_izc=np.amax(sil_zcr)
    IF=25
    
    IZCT=min(IF,mean_izc+(2*sigma_izc))
    #print (mean_izc+(2*sigma_izc))
    
    #Compute Peak Energy Imax and Silence Energy Imin
    Imx=np.amax(st_avg)
    Imn=np.mean(sil_avg)
    #print(Imn)
    #Compute ITL and ITU
    I1=0.03*(Imx-Imn)+Imn
    I2=4*Imn
    ITL=2250
    #print(ITL)
    ITU=10*ITL
    
    #Search Fwd
    i=0
    while(True):
        #print(i)
        if(i==0):
            start_index=num_samp_in_sil+1
        zero_st,zero_fi=search(st_avg,st_zcr,start_index,IZCT,ITU,ITL)
        #print (zero_fi)
        if(zero_st==-1):
            start_index=zero_fi
            continue
        print (zero_st)
        print (zero_fi)
        IPython.display.Audio(np.array(inp[zero_st:zero_fi]),rate=8000)
        
        filename="./Processed_Data/"+str(int(i/2))+"/"+str(glob_count+(i%2))+".wav"
        print (filename)
        write(filename, 8000, np.array(inp[zero_st:zero_fi]).astype(np.dtype('i2')))
        start_index=zero_fi
        i=i+1
        if(i==20):
            break
        #plt.plot(inp[zero_st:zero_fi])
        
endpointer('./Raw_Data/Digits_male_8Khz/zero_to_nine_Hitesh2',8000,5)
endpointer('./Raw_Data/Digits_male_8Khz/zero_to_nine_Hitesh1',8000,7)


5849
10082
./Processed_Data/0/5.wav
14788
17440
./Processed_Data/0/6.wav
24426
27454
./Processed_Data/1/5.wav
32710
37543
./Processed_Data/1/6.wav
38139
42318
./Processed_Data/2/5.wav
43491
47455
./Processed_Data/2/6.wav
52422
55798
./Processed_Data/3/5.wav
62135
66486
./Processed_Data/3/6.wav
71041
74398
./Processed_Data/4/5.wav
81271
85433
./Processed_Data/4/6.wav
90970
93433
./Processed_Data/5/5.wav
101937
105515
./Processed_Data/5/6.wav
109835
114765
./Processed_Data/6/5.wav
126863
128654
./Processed_Data/6/6.wav
141020
145242
./Processed_Data/7/5.wav
150740
153417
./Processed_Data/7/6.wav
160121
162313
./Processed_Data/8/5.wav
168256
171564
./Processed_Data/8/6.wav
176872
181277
./Processed_Data/9/5.wav
185896
190444
./Processed_Data/9/6.wav
5952
11281
./Processed_Data/0/7.wav
14596
19111
./Processed_Data/0/8.wav
26893
32676
./Processed_Data/1/7.wav
36524
40316
./Processed_Data/1/8.wav
50576
55501
./Processed_Data/2/7.wav
60872
63608
./Processed_Data/2/8.wav
74079
80615
./Processe

In [188]:
def search(st_avg,st_zcr,start_index,IZCT,ITU,ITL):
    N1=0
    N2=0
    m=start_index    
    #Fwd Search
    while(True):
        flag=0
        contender=0
        i=0
        #print(st_avg[m]-ITL)
        if(st_avg[m] >= ITL):
            i=m
            while(True):
                if(st_avg[i]<ITL):
                    flag=1
                    break
                if(st_avg[i]>=ITU):
                    contender=i
                    if(i==m):
                        contender=contender-1
                    flag=2
                    break
                else: 
                    i=i+1
            
            if(flag==2):              
                M1=0
                last_index=0
                for j in range(1,26):
                    if(st_zcr[contender-j]>IZCT):
                        M1=M1+1
                        last_index=j
                if (M1 > 2):
                    N1=contender-last_index
                    break
                else:
                    N1=contender
                    break
        if(flag==1):
            m=i+1
            continue
        if(flag==0):
            m=m+1
            continue

                
    #Bkwd Search
    m=N1
    while(True):
        flag=0
        contender=0
        i=0
        if(st_avg[m] <= ITU):
            i=m
            while(True):
                if(st_avg[i]>ITU):
                    flag=1
                    break
                if(st_avg[i]<=ITL):
                    contender=i
                    if(i==m):
                        contender=contender+1
                    flag=2
                    break
                else: 
                    i=i+1
            
            if(flag==2):
                M1=0
                last_index=0
                if(contender>=len(st_zcr)-25):
                    N2=contender
                    break
                for j in range(1,26):
                    if(st_zcr[contender+j]>IZCT):
                        M1=M1+1
                        last_index=j
                if (M1 > 2):
                    N2=contender+last_index
                    break
                else:
                    N2=contender
                    break
                
        if(flag==1):
            m=i+1
        if(flag==0):
            m=m+1
    
    if((N2-N1)<1600):
        return -1,N2+1
    return N1,N2
            
    
 