In [None]:
import librosa
import librosa.display
import numpy as np
from numpy import inf
import IPython.display as ipd
import matplotlib.pyplot as plt
from pydub import AudioSegment
%matplotlib inline
import os
import scipy

noiseDB_mean = {
    "Train": 0.2737,
    "Airport": 0.2157,
    "Street":0.2836,
    "Exhibition": 0.2671,
    "Station": 0.1778,
    "Restaurant": 0.1865,
    "Car": 0.2611,
    "Babble": 0.2983,
}

noiseDB_var = {
    "Train": 0.0708,
    "Airport": 0.1116,
    "Street": 0.0688,
    "Exhibition": 0.0674,
    "Station": 0.0685,
    "Restaurant": 0.1066,
    "Car": 0.0770,
    "Babble": 0.0891,
}

noiseDB = [
    noiseDB_mean,
    noiseDB_var,
]

def _get_frame(frame_sample_ms=20, shift_sample_ms=10):
    """
    input: frame_sample_ms(samples of window, ms단위, int)
           shift_sample_ms(samples of shift, ms단위, int)
    todo: frame과 hop_frame의 길이 안의 samples를 계산
    return: frame_len, frame_shift 
            자료형은 int
    """
    frame_len = int(frame_sample_ms*fs/1000) # frame_len = len_sample(ms) 만큼의 samples
    frame_shift = int(shift_sample_ms*fs/1000)
    return frame_len, frame_shift

"""
Entropy
"""
def _get_entropy(stft):
    """
    stft : numpy_array
           stft = librosa.stft(wavefile)
    stft된 배열의 entropy 배열을 구하는 것.
    크기는 stft와 동일\n"
    """
    Y = np.abs(stft)
    sqr_Y = Y ** 2
    div_Y = sqr_Y / np.sum(sqr_Y, axis=0)
    oneofpart = div_Y * np.log2(div_Y) * (-1)
    entropy = np.sum(oneofpart, axis=0)
    return entropy

def get_entropy_element(wave):
    frame_len, frame_shift = _get_frame()
    stft = librosa.stft(wave, hop_length=frame_shift, win_length=frame_len)
    Y = np.abs(stft)
    sqr_Y = Y ** 2
    div_Y = sqr_Y / np.sum(sqr_Y, axis=0)
    oneofpart = div_Y * np.log2(div_Y) * (-1)
    entropy = np.sum(oneofpart, axis=0)
    entropy[entropy == inf]=0
    entropy = np.nan_to_num(entropy)
    return entropy

"""
Harmonics
"""
def _get_hpeak(stft):
    """
    hpeak와 fk를 리턴하는 함수
    hpeak는 peak된 배열
    """
#     stft, _ = librosa.decompose.hpss(stft)
    hpeak = np.zeros_like(stft)
    hpeak = np.abs(hpeak)
    fk = np.zeros((stft.shape[1],))
    for i in range(stft.shape[1]): # 열기준 세로뭉텅이 자르기
        abs_stft = np.abs(stft[:, i])
        flag = 0
        for idx, elm in enumerate(abs_stft):
            if (idx == 0) or (idx == stft.shape[0] - 1):
                pass
            else:
                if (abs_stft[idx] > abs_stft[idx-1]) and (abs_stft[idx] > abs_stft[idx+1]):
                    hpeak[idx, i] = elm
                    if flag == 0:
                        fk[i,] = idx
                        flag = 1
                else:
                    pass
    return hpeak, fk

def _get_column_harmonic(column_hpeak):
    idx_col = np.where(column_hpeak != 0.0)[0]
    N = idx_col.shape[0]
    if N == 0:
        return 0
    mean_col = 0.0
    var_col = 0.0
    dist_col = 0.0
    tmp = 0.0
    for idx in idx_col:
        tmp = column_hpeak[idx]
        mean_col += tmp
    mean_col = mean_col/N
    tmp = 0.0
    for idx in idx_col:
        tmp = (column_hpeak[idx]-mean_col)**2
        var_col += tmp
    var_col = var_col/N
    tmp = 0.0
    for idx, elm in enumerate(idx_col):
        if idx != 0:
            tmp = (np.abs(column_hpeak[idx_col[idx]] - column_hpeak[idx_col[idx-1]]))/N
        else:
            tmp = column_hpeak[idx] / N
        dist_col += tmp
    return dist_col

def _get_all_harmonics(hpeak):
    blankarray = np.zeros((hpeak.shape[1],))
    for i in range(hpeak.shape[1]):
        blankarray[i] = _get_column_harmonic(hpeak[:,i])
    harmonics = blankarray
    return harmonics

def get_harmonics_element(wave):
    frame_len, frame_shift = _get_frame()
    stft = librosa.stft(wave, hop_length=frame_shift, win_length=frame_len)
    hpeak, fk = _get_hpeak(stft=stft)
    harmonics = _get_all_harmonics(hpeak=hpeak)
    return harmonics

"""
SEM
"""
def ftobark(stft, fs):
    freqs = librosa.core.fft_frequencies(n_fft=stft.shape[0]*2-1, sr=fs)
    bark_freqs = 13*np.arctan(0.00076*freqs)+3.5*np.arctan(((freqs/7500)**2))
    bark_freqs_bin = bark_freqs.astype(int)
    bark_freqs_bin = bark_freqs_bin + 1
    zeroscore_idx = np.where(freqs < 20)[0]
    if zeroscore_idx.shape[0] != 0:
        for idx in zeroscore_idx:
            bark_freqs_bin[idx] = 0
    
    return bark_freqs_bin

def _make_mu(stft, fs):
    abs_stft = np.abs(stft)
    bf = ftobark(stft, fs)
    N = bf[-1]+1
    fre_sum = np.sum(abs_stft, axis=0)
    mu = fre_sum / N
    return mu

def _get_column_B(stft,stft_column, fs):
    bf = ftobark(stft, fs)
    N = bf[-1] + 1
    Bshell = np.arange(N, dtype=float)
    stft_column = np.abs(stft_column)
    for i in range(N):
        idx_array = np.where(bf == i)[0]
        toadd = 0.0
        for idx in idx_array:
            toadd += stft_column[idx]
#         print(toadd, i)
        Bshell[i] = toadd
    return Bshell

def _get_all_column_B(stft, fs):
    n = stft.shape[1]
#     toreturn = _get_column_B(stft, stft[:, i], fs)
    for i in range(n):
        forreshape = _get_column_B(stft, stft[:, i], fs)
        forstack = forreshape.reshape((forreshape.shape[0],1))
        if i==0:
            forreturn = forstack
        else:
            forreturn = np.hstack([forreturn, forstack])
    B = forreturn
    
    return B

def _get_Pmin(B):
    """
    B는 3.9의 B
    """
    return np.min(B, axis=0)

def _get_PSR(B, Pmin, mu):
    PSR = (B - Pmin)/mu
    return PSR

def _get_Bbar(B, PSR):
    Bbar = B * PSR
    return Bbar

def _get_muhat(Bbar):
    muhat = np.mean(Bbar, axis=0)
    return muhat

def _get_sigmahat(muhat, Bbar):
    sigmahat = np.sqrt(np.mean((Bbar - muhat)**2, axis=0))
    return sigmahat

def _get_Bhat(Bbar, muhat, sigmahat):
    Bhat = Bbar*((Bbar - muhat)/sigmahat)
    Bhat = np.nan_to_num(Bhat)
    return Bhat

def get_sem_element(wave, fs):
    """
    muhat: Array
           (1, stft.shape[1])
    sigmahat : Array
               (1. stft.shape[1])
    """
    frame_len, frame_shift = _get_frame()
    stft = librosa.stft(wave, hop_length=frame_shift, win_length=frame_len)
    B = _get_all_column_B(stft, fs)
    Pmin = _get_Pmin(B)
    mu = _make_mu(fs=fs, stft=stft)
    PSR = _get_PSR(B, Pmin, mu)
    Bbar = _get_Bbar(B, PSR)
    muhat = _get_muhat(Bbar)
    sigmahat = _get_sigmahat(muhat, Bbar)
    muhat = np.nan_to_num(muhat)
    sigmahat = np.nan_to_num(sigmahat)
    return muhat, sigmahat

"""
Short-term energy
"""
def shortTermEnergy(stft):
    doublestft = np.abs(stft)**2
    lenframe = stft.shape[0]
    return np.sum(doublestft, axis=0)/lenframe

"""
Zero-crossing rate
"""
def get_ZCR(wave, pad=0):
    frame_len, frame_shift = _get_frame()
    zcr = librosa.feature.zero_crossing_rate(wave+pad, frame_length=frame_len, hop_length=frame_shift)
    zcr = zcr[0]
    return zcr

def checkpartByVariance(wave):
    """
    DB의 값에서 나온 분산을 이용해서 h0, h1 결정
    h1 : numpy array, Speech+Noise section의 index들을 배열로 리턴
    h0 : numpy array, Noise section의 index들을 배열로 리턴
    """
    frame_len, frame_shift = _get_frame()
    stft = librosa.stft(wave, hop_length=frame_shift, win_length=frame_len)
    abs_stft = np.abs(stft)
    var_abs_stft = np.var(abs_stft, axis=0)
    gamma = (var_abs_stft/0.082475)
    return np.where(gamma>1.0)[0], np.where(gamma<=1.0)[0] # noise+speech

def _get_normEH(wave):
    """
    harmonics/entropy를 표준화하여 리턴
    normEH: 1xtime-domain-frame(numpy array)
    """
    frame_len, frame_shift = _get_frame()
    stft = librosa.stft(wave, hop_length=frame_shift, win_length=frame_len)
    entropy = _get_entropy(stft=stft)
    entropy = np.nan_to_num(entropy)
    entropy[entropy == 0]=5
    harmonics = get_harmonics_element(wave=wave)
    normEH = harmonics/entropy
    normEH = normEH/np.max(normEH)
    return normEH

def get_wave_point(wave, cl=0.03, cutEH=0.95, cutN=0.95):
    """
    normEH로 음성부분을 포함한 후 normN으로 비음성부분을 제거하는 함수
    maskingEH: 1xtime-domain-frame(numpy array) 값은 1,0만 있음
    """
    normEH = _get_normEH(wave)
    musigma, muhat = get_sem_element(fs=fs, wave=wave)
    Nmask = musigma + muhat
    normN = Nmask/np.max(Nmask)
    toadd=0.0
    floor_EH = np.zeros_like(normEH)
    for i in range(20):
        toadd = 1-cl*i
        floor_EH += np.floor(normEH+toadd)
    floor_EH = floor_EH/np.max(floor_EH)
    floor_EH = np.floor(floor_EH+cutEH)
    floor_N = np.floor(normN+cutN)
    tominusarray = np.zeros_like(floor_N)
    for idx, elm in enumerate(floor_EH):
        if elm == 1:
            if floor_N[idx] == 0:
                tominusarray[idx] = 1
    maskingEH = floor_EH - tominusarray
    return maskingEH

def check_speech(wavepointarray, frameoffset=5):
    """
    wavepointarray에 get_wave_point()의 값을 대입
    speech 부분에 offset을 주어서 리턴하는 함수
    return: 1xtime-domain-frame(numpy array)
    maskingEH에 offset을 도입한 것과 같다.
    """
    wavepointarray[-1]=0
    startflag=0
    startpoint = 0
    endpoint = 0
    detectset = []
    for idx, elm in enumerate(wavepointarray):
        if idx < wavepointarray.shape[0]-4: 
            if startflag==0: #startpoint를 찾아야돼!
                if (wavepointarray[idx]==1 # speech start 확인
                   and (wavepointarray[idx+1]+wavepointarray[idx+2])!=0
                   and wavepointarray[idx+3]==1):
                    startflag = 1
                    startpoint = idx
            else: #endpoint 찾기, startflag==1
                if (wavepointarray[idx]==0 
                   and (wavepointarray[idx+1]+wavepointarray[idx+2])!=2
                   and wavepointarray[idx+3]==0):
                    endpoint = idx
                    startflag=0
                    detectset.append((startpoint, endpoint))
        else:
            if startflag==1:
                if wavepointarray[idx]==0:
                    endpoint = idx
                    startflag=0
                    detectset.append((startpoint, endpoint))
    if len(detectset)>=1:
        for i in detectset:
            startcut=frameoffset
            endcut=frameoffset
            if i[0]<=5:
                startcut=i[0]
            else:
                pass
            if i[1] >= (wavepointarray.shape[0]-endcut+1):
                endcut = wavepointarray.shape[0]-i[1]
            else:
                pass
            wavepointarray[(i[0]-startcut):(i[1]+endcut)]=1
    else:
        raise "Can't detect speech part."
    return wavepointarray

def check_final(wave, cl=0.05, cutEH=0.98, cutN=0.98,frameoffset=5):
    """
    이것만 하면 돼!
    """
    maskingEH = get_wave_point(wave, cl, cutEH, cutN)
    H1, H0 = checkpartByVariance(wave=wave)
    for i in H0:
        maskingEH[i]=0
    print("Checking the speech")
    speechpart = check_speech(wavepointarray=maskingEH, frameoffset=frameoffset)
    finalarray = speechpart
    return finalarray

def checkWavepoint_by_strength(wave, num=2):
    """
    check_final을 1, 2, 3 의 강도로 체크하는 함수
    3이 가장 예민함
    cl = 0.05
    frameoffset = 미정
    1: cutEH 0.99, cutN=0.98
    2: cutEH 0.98, cutN=0.96
    3: cutEH 0.95, cutN=0.93
    """
    if num==1:
        wavepointarray2 = get_wave_point(cl=0.05,cutEH=0.99, cutN=0.98, wave=wave)
    elif num==2:
        wavepointarray2 = get_wave_point(cl=0.05,cutEH=0.97, cutN=0.95, wave=wave)
    elif num==3:
        wavepointarray2 = get_wave_point(cl=0.05,cutEH=0.95, cutN=0.93, wave=wave)
    else:
        raise "입력 숫자를 1, 2, 3 중에 넣어주세요. ex)깔끔한 음성=>1, 노이즈가 많은 음성=>3"
    return wavepointarray2

def returnFinalBridgeArray(wave, num=2, frameoffset=10, step=20):
    if num==1:
        finalarray2 = check_final(wave, cl=0.05, cutEH=0.99, cutN=0.98, frameoffset=frameoffset)
    elif num==2:
        finalarray2 = check_final(wave, cl=0.05, cutEH=0.97, cutN=0.95, frameoffset=frameoffset)
    elif num==3:
        finalarray2 = check_final(wave, cl=0.05, cutEH=0.95, cutN=0.93, frameoffset=frameoffset)
    else:
        raise "입력 숫자를 1, 2, 3 중에 넣어주세요. ex)깔끔한 음성=>1, 노이즈가 많은 음성=>3"
    denoisingAF = denoisingAfterFinalArray(finalarray=finalarray2)
    FBA = makeBridge(dAF=denoisingAF, step=step)
    return FBA, denoisingAF

# (1/fs)*final.shape[0]*frame_shift
def pointset(finalarray):
    finalarray[-1] = 0
    startflag = 0
    startpoint = 0
    endpoint = 0
    pointlst = []
    for idx, elm in enumerate(finalarray):
        if startflag==0:
            if elm==1:
                startflag = 1
                startpoint = idx
            else:
                pass
        elif startflag==1:
            if elm==0:
                startflag = 0
                endpoint = idx
                pointlst.append((startpoint, endpoint))
            else:
                pass
    return pointlst

def _audioseg(startframe, endframe, frame_shift, fs):
    """
    start, end
    """
    starttime = startframe * (1/fs) * frame_shift * 1000
    endtime = endframe * (1/fs) * frame_shift * 1000
    newAudio = AudioSegment.from_wav(f_path)
    newAudio = newAudio[starttime: endtime]
    return newAudio

def makeWavSegFile(newAudio,dirname="shinminadir", i=0):
    try:
        export_path = 'C:\\Users\\jmlik\\Desktop\\Work\\mywork\\simplevad\\auditok_eg\\'+dirname
        mkdir_mina = os.mkdir(export_path)
    except:
        export_path = 'C:\\Users\\jmlik\\Desktop\\Work\\mywork\\simplevad\\auditok_eg\\'+dirname
    return newAudio.export(export_path+'\\newmina' +str(i)+ '.wav', format="wav")

    
def makeWavFile(pointset, frame_shift, fs):
    for idx,i in enumerate(pointset):
        startframe = i[0]
        endframe = i[1]
        audioseg = _audioseg(startframe, endframe, frame_shift, fs)
        makeWavSegFile(audioseg, i=idx)
    return "Finish"

"""
noise 종류는 010, 0110, 01110
앞뒤중 하나가 노이즈값에 3프레임 건너서 스피치가 나올 때 (ex. 111100010001111)
이는 speech 뭉텅이에 합쳐주고 (앞에 합치는 것을 우선순위로 잡는다.)
그 외의 경우는 노이즈 제거
"""
def denoisingAfterFinalArray(finalarray):
    """
    input: finalarray(offset처리를 마친 array)
    """
    for idx, elm in enumerate(finalarray):
        if (idx>=7) and (idx <= finalarray.shape[0]-1-10): # 양끝고려
            if (finalarray[idx] == 1 and # 010
                finalarray[idx-1] == 0 and
                finalarray[idx+1] == 0):
                if np.sum(finalarray[idx-5:idx-1])==4:
                    finalarray[idx-5:idx] = 1
                elif np.sum(finalarray[idx+2:idx+6])==4:
                    finalarray[idx:idx+6] = 1
                elif np.sum(finalarray[idx-6:idx-2])==4:
                    finalarray[idx-6:idx] = 1
                elif np.sum(finalarray[idx+3:idx+7])==4:
                    finalarray[idx:idx+7] = 1
                elif np.sum(finalarray[idx-7:idx-3])==4:
                    finalarray[idx-7:idx] = 1
                elif np.sum(finalarray[idx+4:idx+8])==4:
                    finalarray[idx:idx+8] = 1
                else:
                    finalarray[idx] = 0

            elif (finalarray[idx] == 1 and # 0110
                finalarray[idx-1] == 0 and
                finalarray[idx+1] == 1 and
                finalarray[idx+2] == 0):
                if np.sum(finalarray[idx-5:idx-1])==4:
                    finalarray[idx-5:idx] = 1
                elif np.sum(finalarray[idx+3:idx+7])==4:
                    finalarray[idx:idx+7] = 1
                elif np.sum(finalarray[idx-6:idx-2])==4:
                    finalarray[idx-6:idx] = 1
                elif np.sum(finalarray[idx+4:idx+8])==4:
                    finalarray[idx:idx+8] = 1
                elif np.sum(finalarray[idx-7:idx-3])==4:
                    finalarray[idx-7:idx] = 1
                elif np.sum(finalarray[idx+5:idx+9])==4:
                    finalarray[idx:idx+9] = 1
                else:
                    finalarray[idx] = 0
                    finalarray[idx+1] = 0

            elif (finalarray[idx] == 1 and # 01110
                finalarray[idx-1] == 0 and
                finalarray[idx+1] == 1 and
                finalarray[idx+2] == 1 and
                finalarray[idx+3] == 0):
                if np.sum(finalarray[idx-5:idx-1])==4:
                    finalarray[idx-5:idx] = 1
                elif np.sum(finalarray[idx+4:idx+8])==4:
                    finalarray[idx:idx+8] = 1
                elif np.sum(finalarray[idx-6:idx-2])==4:
                    finalarray[idx-6:idx] = 1
                elif np.sum(finalarray[idx+5:idx+9])==4:
                    finalarray[idx:idx+9] = 1
                elif np.sum(finalarray[idx-7:idx-3])==4:
                    finalarray[idx-7:idx] = 1
                elif np.sum(finalarray[idx+6:idx+10])==4:
                    finalarray[idx:idx+10] = 1
                else:
                    finalarray[idx] = 0    
                    finalarray[idx+1] = 0
                    finalarray[idx+2] = 0
        if idx<7:
            if idx==0:
                finalarray[idx]=0
            if 0<idx<6:
                if (finalarray[idx] == 1 and # 010
                finalarray[idx-1] == 0 and
                finalarray[idx+1] == 0):
                    if np.sum(finalarray[idx+1:idx+5])==4:
                        finalarray[idx:idx+5]=1
                    elif np.sum(finalarray[idx+2:idx+6])==4:
                        finalarray[idx:idx+6]=1
                    elif np.sum(finalarray[idx+3:idx+7])==4:
                        finalarray[idx:idx+7]=1
                    else:
                        finalarray[idx]=0
            
                elif (finalarray[idx] == 1 and # 0110
                finalarray[idx-1] == 0 and
                finalarray[idx+1] == 1 and
                finalarray[idx+1] == 0):
                    if np.sum(finalarray[idx+3:idx+7])==4:
                        finalarray[idx:idx+7] = 1
                    elif np.sum(finalarray[idx+4:idx+8])==4:
                        finalarray[idx:idx+8] = 1
                    elif np.sum(finalarray[idx+5:idx+9])==4:
                        finalarray[idx:idx+9] = 1
                    else:
                        finalarray[idx] = 0
                        finalarray[idx+1] = 0
            
            
                elif (finalarray[idx] == 1 and # 01110
                finalarray[idx-1] == 0 and
                finalarray[idx+1] == 1 and
                finalarray[idx+2] == 1 and
                finalarray[idx+3] == 0):
                    if np.sum(finalarray[idx+4:idx+8])==4:
                        finalarray[idx:idx+8] = 1
                    elif np.sum(finalarray[idx+5:idx+9])==4:
                        finalarray[idx:idx+9] = 1
                    elif np.sum(finalarray[idx+6:idx+10])==4:
                        finalarray[idx:idx+10] = 1
                    else:
                        finalarray[idx] = 0    
                        finalarray[idx+1] = 0
                        finalarray[idx+2] = 0
            
            if idx==6:
                if (finalarray[idx] == 1 and # 010
                finalarray[idx-1] == 0 and
                finalarray[idx+1] == 0):
                    if np.sum(finalarray[1:5])==4:
                        finalarray[idx-1:idx+1]=0
                    elif np.sum(finalarray[idx+1:idx+5])==4:
                        finalarray[idx:idx+5]=1
                    elif np.sum(finalarray[idx+2:idx+6])==4:
                        finalarray[idx:idx+6]=1
                    elif np.sum(finalarray[idx+3:idx+7])==4:
                        finalarray[idx:idx+7]=1
                    else:
                        finalarray[idx]=0
            
                elif (finalarray[idx] == 1 and # 0110
                finalarray[idx-1] == 0 and
                finalarray[idx+1] == 1 and
                finalarray[idx+1] == 0):
                    if np.sum(finalarray[1:5])==4:
                        finalarray[idx-1:idx+1]=0
                    elif np.sum(finalarray[idx+3:idx+7])==4:
                        finalarray[idx:idx+7] = 1
                    elif np.sum(finalarray[idx+4:idx+8])==4:
                        finalarray[idx:idx+8] = 1
                    elif np.sum(finalarray[idx+5:idx+9])==4:
                        finalarray[idx:idx+9] = 1
                    else:
                        finalarray[idx] = 0
                        finalarray[idx+1] = 0
            
            
                elif (finalarray[idx] == 1 and # 01110
                finalarray[idx-1] == 0 and
                finalarray[idx+1] == 1 and
                finalarray[idx+2] == 1 and
                finalarray[idx+3] == 0):
                    if np.sum(finalarray[1:5])==4:
                        finalarray[idx-1:idx+1]=0
                    elif np.sum(finalarray[idx+4:idx+8])==4:
                        finalarray[idx:idx+8] = 1
                    elif np.sum(finalarray[idx+5:idx+9])==4:
                        finalarray[idx:idx+9] = 1
                    elif np.sum(finalarray[idx+6:idx+10])==4:
                        finalarray[idx:idx+10] = 1
                    else:
                        finalarray[idx] = 0    
                        finalarray[idx+1] = 0
                        finalarray[idx+2] = 0
    return finalarray    

def get_speech_section(daf, fs, frame_shift):
    startflag = 0
    daf[-1] = 0
    daf[0] = 0
    startframe = 0
    endframe = 0
    Isspeech = []
    Tdspeech = []
    Fdspeech = []
    starttime = 0.0
    endtime = 0.0
    Tstarttime = 0
    Tendtime = 0
    Fstarttime = 0
    Fendtime = 0
    toadd = 0
    costcount = 0
    totaltime = 0.0
    toaddtime = 0.0
    for idx, elm in enumerate(daf):
        if startflag==0:
            if elm==1:
                startflag=1
                startframe=idx
            else:
                pass   
        elif startflag==1:
            if elm==1:
                pass
            else: # elm==0:
                startflag=0
                endframe=idx-1
                starttime = (1/fs)*startframe*frame_shift
                endtime = (1/fs)*endframe*frame_shift
                Fstarttime = startframe
                Fendtime = endframe
                Tstarttime = startframe*frame_shift
                Tendtime = endframe*frame_shift
                Isspeech.append({'startTime':starttime,
                                'endTime':endtime,
                                'word':""})
                Tdspeech.append({'startTime':Tstarttime,
                                'endTime':Tendtime,
                                'word':""})
                Fdspeech.append({'startTime':Fstarttime,
                                'endTime':Fendtime,
                                'word':""})
#                 toadd = int((endtime-starttime)/15)+1
#                 costcount += toadd
#                 toaddtime = endtime-starttime
#                 totaltime += toaddtime
    return Isspeech, Tdspeech, Fdspeech  # costcount, totaltime

#finalarray, daf, isspeech
def makeForm(wave, fs, Isspeech,step):
    totalWave = np.arange(0, dtype=float)
    for i in Isspeech:
        startframe = int(i["startTime"]*fs)
        endframe = int(i["endTime"]*fs) + 1
        waveToAdd = np.hstack([wave[startframe:endframe], np.zeros((int(fs*step),))])
        totalWave = np.hstack([totalWave, waveToAdd])
    return totalWave

def makeBridge(dAF, step):
    """
    denoisingAfterFinalArray()의 값을 인풋으로 하여
    지정 프레임내에 다시 speech가 시작하는 부분을 잇는 함수
    """
    DAF = dAF.copy()
    startframe = 0
    endframe = 0
    startflag = 0
    bridgelist = []
    for idx, elm in enumerate(DAF):
        if idx != (DAF.shape[0]-1):
            if startflag==0:
                if DAF[idx]==1 and DAF[idx+1]==0:
                    startflag=1
                    startframe=idx+1
                else:
                    pass
            else: # startflag==1
                if DAF[idx+1]==1:
                    startflag=0
                    endframe = idx
                    bridgelist.append((startframe,endframe))
                else:
                    pass
    for i in bridgelist:
        if (i[1] - i[0] + 1)<=step:
            DAF[i[0]:i[1]+1] = 1
        else:
            pass
    return DAF

def breakBridge(bridgeArray, DAF, fs, frame_shift):
    """
    BridgeArray를 기준시간에 맞춰 자르는 함수.
    """
    pass


def extractIndexOfBridge(bridgeArray, max_sec, min_sec, fs, frame_shift):
    stateNum = 0
    startflag = 0
    startIdx = 0
    endIdx = 0
    toReturnLst = []
    toSliceDic = {}
    toFixDic = {}
    startTime = 0.0
    endTime = 0.0
    for idx, elm in enumerate(bridgeArray):
        if startflag==0: # state: non-speech
            if elm==1:
                startflag = 1
                startIdx = idx
            else:
                pass
        elif startflag==1: # state: speech
            if elm==1:
                pass
            elif elm==0:
                totalFrameLength = idx - startIdx + 1
                totalTime = totalFrameLength*frame_shift*(1/fs)
                if totalTime>max_sec: # 너무 길면
                    endIdx = idx
                    startTime = startIdx*frame_shift*(1/fs)
                    endTime = endIdx*frame_shift*(1/fs)
                    runTime = endTime-startTime
                    toDivideNum = int(runTime%4) # 이 갯수로 쪼개야돼
                    toSliceDic['startTime']=startTime
                    toSliceDic['endTime']=endTime
                    toSliceDic['runTime']=endTime-startTime
                    toSliceDic['divideNum']=toDivideNum
                    toSliceDic['startFrame']=startIdx
                    toSliceDic['endFrame']=endIdx
                    toSliceDic['status']=1
                    toReturnLst.append(toSliceDic)
                    toSliceDic = {}
                    startflag = 0
                elif totalTime>=min_sec and totalTime<=max_sec:
                    endIdx = idx
                    startTime = startIdx*frame_shift*(1/fs)
                    endTime = endIdx*frame_shift*(1/fs)
                    toSliceDic['startTime']=startTime
                    toSliceDic['endTime']=endTime
                    toSliceDic['runTime']=endTime-startTime
                    toSliceDic['startFrame']=startIdx
                    toSliceDic['endFrame']=endIdx
                    toSliceDic['status']=0                    
                    toReturnLst.append(toSliceDic)
                    toSliceDic = {}
                    startflag = 0
                elif totalTime<min_sec:
                    endIdx = idx
                    startTime = startIdx*frame_shift*(1/fs)
                    endTime = endIdx*frame_shift*(1/fs)
                    toSliceDic['startTime']=startTime
                    toSliceDic['endTime']=endTime
                    toSliceDic['runTime']=endTime-startTime
                    toSliceDic['startFrame']=startIdx
                    toSliceDic['endFrame']=endIdx
                    toSliceDic['status']=-1             
                    toReturnLst.append(toSliceDic)
                    toSliceDic = {}
                    startflag = 0
    statusLst = [] # toReturnLst 에서 1 과 -1/0 으로 구분지어 스타트와 엔드를 튜플에 넣어 표기하는 리스트
    for idx, elm in enumerate(toReturnLst):
        statusLst.append(elm['status'])
    stdx = 0
    endx = -1
    idxLst = [{'startblock':stdx,
                          'endblock':endx,'blockStatus':0}]
    for idx, elm in enumerate(statusLst):
        if elm == 1:
            if idx != len(statusLst)-1:
                idxLst.pop()
                endx=idx
                idxLst.append({'startblock':stdx,
                              'endblock':endx,'blockStatus':0})
                idxLst.append({'startblock':endx,
                              'endblock':endx+1,'blockStatus':1})
                stdx=endx+1
                idxLst.append({'startblock':stdx,
                              'endblock':-1,'blockStatus':0})
                
            else:
                idxLst.pop()
                endx=idx
                idxLst.append({'startblock':stdx,
                              'endblock':endx,'blockStatus':0})
                idxLst.append({'startblock':endx,
                              'endblock':endx+1,'blockStatus':1})
        else:
            pass
    return toReturnLst, idxLst

def _stackWithRightSide(i, EIB, min_sec=2, max_sec=5):
    toadd = {}
    toadd['startTime']=EIB[i]['startTime']
    toadd['endTime']=EIB[i+1]['endTime']
    toadd['runTime']=EIB[i+1]['endTime']-EIB[i]['startTime']
    toadd['startFrame']=EIB[i]['startFrame']
    toadd['endFrame']=EIB[i+1]['endFrame']
    if toadd['runTime']>=min_sec and toadd['runTime']<=max_sec:
        toadd['status']=0
    elif toadd['runTime'] > max_sec:
        toadd['status']=1
    else:
        toadd['status']=-1
    del EIB[i+1]
    EIB[i]=toadd
    return EIB

def _stackWithLeftSide(i, EIB, min_sec=2, max_sec=5):
    toadd = {}
    toadd['startTime']=EIB[i-1]['startTime']
    toadd['endTime']=EIB[i]['endTime']
    toadd['runTime']=EIB[i]['endTime']-EIB[i-1]['startTime']
    toadd['startFrame']=EIB[i-1]['startFrame']
    toadd['endFrame']=EIB[i]['endFrame']
    if toadd['runTime']>=min_sec and toadd['runTime']<=max_sec:
        toadd['status']=0
    elif toadd['runTime'] > max_sec:
        toadd['status']=1
    else:
        toadd['status']=-1
    del EIB[i]
    EIB[i-1]=toadd
    return EIB

def _divideForOvermaxsec(i, EIB, DAF, min_sec=2, max_sec=5):
    """
    EIB의 status가 1인 element를 받아서
    -1과 0인 element인 부분으로 분해해서 내보내는 함수
    중간을 분해해서 EIB 형식으로 리턴
    분할은 DAF를 기준으로 분할
    """
    startFrame = EIB[i]['startFrame']
    endFrame = EIB[i]['endFrame']
    startTime = EIB[i]['startTime']
    EIB_part, IndexList_part = extractIndexOfBridge(bridgeArray=DAF[startFrame:endFrame], frame_shift=frame_shift, fs=fs, max_sec=max_sec, min_sec=min_sec)
    for idx, elm in enumerate(EIB_part):
        EIB_part[idx]['startTime'] = EIB_part[idx]['startTime']+startTime
        EIB_part[idx]['endTime'] = EIB_part[idx]['endTime']+startTime
        EIB_part[idx]['startFrame'] = EIB_part[idx]['startFrame']+startFrame
        EIB_part[idx]['endFrame'] = EIB_part[idx]['endFrame']+endFrame
    
    EIB_part_Stacked = _stackWithSide(EIB=EIB_part, DAF=DAF, min_sec=min_sec, max_sec=max_sec)
#     EIB_part_Stacked = _underdivide(EIB_part_Stacked=EIB_part_Stacked, min_sec=min_sec, max_sec=max_sec, DAF=DAF)
    length = len(EIB_part_Stacked)
    returnEIB = EIB[:i]+EIB_part_Stacked+EIB[i+1:]

    return returnEIB, EIB_part_Stacked, length

def _underdivide(EIB_part_Stacked, DAF, min_sec, max_sec):
    idx = 0
    lenStacked = len(EIB_part_Stacked)
    while idx<lenStacked:
        if EIB_part_Stacked[idx]['status']==1:
            _,EIB_part_Stacked, length = _divideForOvermaxsec(i=idx, EIB=EIB_part_Stacked,DAF=DAF, min_sec=min_sec, max_sec=max_sec)
            lenStacked = len(EIB_part_Stacked)
            if length==1:
                idx+=1
                
            else:
                pass
            
        else:
            idx+=1
    
    return EIB_part_Stacked

def _totalDivide(EIB, DAF, i, max_sec=5, min_sec=2):
    _, EIB_part_Stacked, _ = _divideForOvermaxsec(DAF=DAF, EIB=EIB, i=i, max_sec=max_sec, min_sec=min_sec)
    EIB_part_Stacked = _underdivide(DAF=DAF, EIB_part_Stacked=EIB_part_Stacked, max_sec=max_sec, min_sec=min_sec)
    length = len(EIB_part_Stacked)
    returnEIB = EIB[:i]+EIB_part_Stacked+EIB[i+1:]
    return returnEIB, length

def _stackWithSide(EIB, DAF, min_sec=2, max_sec=5):
    i = 0
    lenEIB = len(EIB)
    while i < lenEIB:
        if lenEIB == 1:
            """원소가 하나뿐이라면"""
            i+=1
        else:
            if i==0:
                if EIB[i]['status']==-1:
                    """stack with right side"""
                    EIB_copy = EIB
                    EIB = _stackWithRightSide(i=i, EIB=EIB_copy)
                    lenEIB = len(EIB)
                    if EIB[i]['runTime']>=min_sec:
                        i+=1
                    else:
                        pass
                elif EIB[i]['status']==1:
                    """stack after divide"""
                    EIB, length = _totalDivide(DAF=daf, EIB=EIB, i=i, min_sec=min_sec, max_sec=max_sec)
                    lenEIB = len(EIB)
                    i+=length
                else:
                    i+=1

            elif i==len(EIB)-1:
                if EIB[i]['status']==-1:
                    """stack with left side"""
                    EIB_copy = EIB
                    EIB = _stackWithLeftSide(i=i, EIB=EIB_copy)
                    lenEIB = len(EIB)
                    if EIB[i-1]['runTime']>=min_sec:
                        i+=1
                    else:
                        pass
                elif EIB[i]['status']==1:
                    """stack after divide"""
                    EIB, length = _totalDivide(DAF=daf, EIB=EIB, i=i, min_sec=min_sec, max_sec=max_sec)
                    lenEIB = len(EIB)
                    i+=length
                
                else:
                    i+=1                    
            else:
                if EIB[i]['status']==-1:
                    """비교 후 stack with shorter one"""
                    if EIB[i+1]['runTime'] >= EIB[i-1]['runTime']:
                        EIB = _stackWithLeftSide(i=i, EIB=EIB)
                        lenEIB = len(EIB)
                        if EIB[i]['runTime']>=min_sec:
                            i+=1
                        else:
                            pass
                    else:
                        EIB = _stackWithRightSide(i=i, EIB=EIB)
                        lenEIB = len(EIB)
                        if EIB[i]['runTime']>=min_sec:
                            i+=1
                        else:
                            pass
                elif EIB[i]['status']==1:
                    """stack after divide"""
                    EIB, length = _totalDivide(DAF=DAF, EIB=EIB, i=i, min_sec=min_sec, max_sec=max_sec)
                    lenEIB = len(EIB)
                    i+=length
                else:
                    i+=1
    return EIB

def extractTime(sws):
    toReturn = {}
    toReturnlst = []
    for i in sws:
        toReturn = {}
        toReturn['startTime'] = round(i['startTime'],2)
        toReturn['endTime'] = round(i['endTime'],2)
        toReturnlst.append(toReturn)
    return toReturnlst

"""End to End"""
def allInOne(wavFilePath, level=2, max_sec=5, min_sec=2, step=20, frameoffset=10):
    frame_len, frame_shift = _get_frame()
    wave, fs = librosa.load(wavFilePath)
    FBA, DAF = returnFinalBridgeArray(frameoffset=frameoffset, num=level, step=step, wave=wave)
    EIB, _ = extractIndexOfBridge(bridgeArray=FBA, frame_shift=frame_shift, fs=fs, max_sec=max_sec, min_sec=min_sec)
    SWS = _stackWithSide(DAF=DAF, EIB=EIB, max_sec=max_sec, min_sec=min_sec)
    toReturn = extractTime(SWS)
    return toReturn