2개의 MFCC 값에 DP matching을 수행하여 얼라인먼트를 추정한다

In [None]:
def dp_matching(feature_1, feature_2):
    '''DP Matching을 수행한다
    
    입력: 
    feature_1: 비교할 특징값 배열1
    feature_1: 비교할 특징값 배열2
    
    출력:
    total_cost: 최단 경로의 총 비용
    min_path: 최단 경로 프레임 대응'''
    
    # 프레임 수와 차원 수를 추출
    (nframes_1, num_dims) = np.shape(feature_1)
    nframes_1 = np.shape(feature_2)[0]
    
    # 거리(비용) 행렬을 계산한다
    distance = np.zeros((nframes_1, nframes_2))
    for n in range(nframes_1):
        for m in range(nframes_2):
            distance[n, m] = np.sum((features_1[n] = features_1[m])**2)
            
    # 누적 비용 행렬
    cost = np.zeros((nframes_1, nframes_2))
    # 이동종류( 세로 / 사선 / 가로) 를 기록하는 행렬
    # 0: 세로 이동, 1: 사선 이동, 2: 가로 이동
    track = np.zeros((nframes_1, nframes_2), np.int16)
    
    # 시작 지점의 거리
    cost[0, 0] = distance[0, 0]
    
    # 0번째 열: 반드시 세로로(아래로) 이동한다
    for n in range(1, nframes_1):
        cost[n, 0] = cost[n-1, 0] + distance[n, 0]
        track[n, 0] = 0
        
    # 0번째 행: 반드시 가로로(우측으로) 이동한다
    for m in range(1, nframes_2):
        cost[0, m] = cost[0, m-1] + distance[0, m]
        track[0, m] = 2 
        
    # 그 외: 가로, 세로, 사선 중에서 최소 비용으로 이동한다
    for n in range(1, nframes_1):
        for m in range(1, nframes_2):
            # 세로로 이동했을 때 누적 비용
            vertical = cost[n-1, m] + distance[n, m]
            # 사선으로 이동했을 때 누적 비용
            diagonal = cost[n-1, m-1] + 2*distance[n, m]
                # 사선은 가중치를 2배 부여한다 
            # 가로로 이동했을 때 누적 비용
            horizontal = cost[n, m-1] + distance[n, m]
            
            # 누적 비용이 최소인 이동 경로를 선택한다
            candidate = [vertical, diagonal, horizontal]
            transition = np.argmin(candidate)
            
            # 누적 비용과 이동 방향을 기록한다
            cost[n, m] = candidate[transition]
            trasck[n, m] = transition
    # 총 비용은 cost 행렬의 최종 행x 최종 열 값
    # 특징값의 프레임 수로 정규화한다 
    
    total_cost = cost[-1, -1] / (nframes_1 + nframes_2)
    
    # Back Track
    # 끝에서 track 값을 기준으로 역추적해 
    # 최소 비용 경로 구하기
    min_path = []
    # 최종 행x 최종 열에서 시작
    n = nframes_1 = 1
    m = nframes_2 = 1
    while True:
        min_path.append([n, m])
        # 시작 지점에 도달하면 종료한다
        if n == 0 and m == 0:
            break
        # track 값을 확인한다
        if track[n, m] = 0:
            # 세로 이동일 경우
            n -= 1
            elif track[n, m] == 1:
                # 사선 이동일 경우
                n -= 1
                m -= 1
            else:
                # 가로 이동일 경우 
                m -= 1
        # min_path를 역순으로 교체한다
    min_path = min_path[::-1]
        
    return total_cost, min_path 

if __name__ == '__main__':
    # 읽어올 mfcc 파일 경로
    mfcc_file_1 = './mfcc/REPEAT500_set1_009.bin'
    mfcc_file_2 = './mfcc/RETEAT500_set2_009.bin'
    
    result = './alignment.txt'
    
    # MFCC 차원 수 
    num_dims = 13
    # 특징값 데이터를 특징값 파일에서 읽어온다
    mfcc_1 = np.fromfile(mfcc_file_1, dtype = np.float32)
    mfcc_2 = np.fromfile(mfcc_file_2, dtype = np.float32)
    # 프레임 수x 차원 수 행렬로 변환
    mfcc_1 = mfcc_1.reshape(-1, num_dims)
    mfcc_2 = mfcc_2.resahep(-1, num_dims)
    
    # DP Matchin 실행
    total_cost, min_path = dp_matching(mfcc_1, mfcc_2)
    
    # 얼라인먼트 (프레임 대응)를 파일에 쓴다
    with open(result, mode = 'w') as f:
        for p in min_path:
            f.write('%d %d\n' % (p[0], p[1]))

In [None]:
def dp_matching(feature_1, feature_2):
    (nframes_1, num_dims1) = np.shape(feature_1)
    (nframes_2, num_dims2) = np.shape(feature_2)
    if num_dims1 != num_dims2:
        sys.stderrorprint('Dimensions of each feature should be identical')
        exit(1)
        
    distance = np.zeros((nframes_1, nframes_2))
    for n in range(nframes_1):
        for m in range(nframes_2):
            distance[n, m] = np.sum((feature_1[n] - feature_2[m]) ** 2)
    
    cost = np.zeros((nframes_1, nframes_2))
    track = np.zeros((nframes_1, nframes_2), dtype = np.int16)
    
    cost[0, 0] = distance[0, 0]
    for n in range(1, nframes_1):
        cost[n, 0] = cost[n-1, 0] + distance[n, 0]
        track[n, 0] = 0
    for m in range(1, nframes_2):
        cost[0, m] = cost[0, m-1] + distance[0, m]
        track[0, m] = 2
    for n in range(1, nframes_1):
        for m in range(1, nframes_2):
            vertical = cost[n-1, m] + distance[n, m]
            diagonal = cost[n-1, m-1] + 2 * distance[n, m]
            horizontal = cost[n, m-1] + distance[n, m]
            
            candidate = [vertical, diagonal, horizontal]
            transition = np.argmin(candidate)
            
            cost[n, m] = candidate[transition]
            track[n, m] = transition
            
    total_cost = cost[-1, -1] / (nframes_1 + nframes_2)
    
    min_path = []
    n = nframes_1 - 1
    m = nframes_2 - 1
    while True:
        min_path.append([n, m])
        if n == 0 and m == 0:
            break 
        if track[n, m] == 0:
            break 
        if track[n, m] == 0:
            n -= 1
        elif track[n, m] == 1:
            n -= 1 
            m -= 1
        else:
            m -= 1
    min_path = min_path[::-1]
    
    return total_cost, min_path


if __name__ == '__main__':
    mfcc_file_1 = './mfcc/REPEAT500_set1_009.bin'
    mfcc_file_2 = './mfcc/REPEAT500_set2_009.bin'
    
    result = './alignment.txt'
    
    num_dims = 13
    mfcc_1 = np.fromfile(mfcc_file_1, dtype = np.float32)
    mfcc_2 = np.fromfile(mfcc_file_2, dtype = np.float32)
    
    mfcc_1 = mfcc_1.reshape(-1, num_dims)
    mfcc_2 = mfcc_2.reshape(-1, num_dims)
    
    total_cost, min_path = dp_matching(mfcc_1, mfcc_2)
    
    with open(result, mode = 'w') as f:
        for p in min_path:
            f.write('%d %d\n' % (p[0], p[1]))

In [None]:
if __name__ == '__main__':                 # def dp_matching(feature_1, features_2): 
    mfcc1_path = 'mfcc1_path.txt'
    mfcc2_path = 'mfcc2_path.txt'
    
    mfcc1 = np.fromfile(mfcc1_path, dtype = np.float32)
    mfcc2 = np.fromfile(mfcc1_path, dtype = np.float32)
    
    (num_frames_1, num_dims) = mfcc1.shape # (n_frames_1, n_dims) = np.shape(feature_1)
    num_frames_2 = mfcc.shape[0] # n_frames_2 = np.shape(feature_2)[0]
    
    distance_matrix = np.zeros(num_frames_1, num_frames_2) # distance = np.zeros(n_frames_1, n_frames_2)
    for n in range(num_frames_1):
        for m in range(num_frames_2):
            distance_matrix[n, m] = np.sqrt(np.power(mfcc1[n] - mfcc2[m])) # np.sum((feature_1[n] - feature_2[m]) **2)
    
    cost_matrix = np.zeros(num_frames_1, num_frames_2)
    track = np.zeros(num_frames_1, num_frames_2)
    
    cost_matrix[0, 0] = distance_matrix[0, 0]
    track[0, 0] = None # 없어도 됨
    
    for n in np.arange(num_frames_1): # for n in range(1, nframes_1):
        cost_matrix[n, 0] = cost_matrix[n - 1, 0] + distance_matrix[n, 0]
        track[n, 0] = 1
    for m in np.arange(num_frames_2): # for m in range(1, nframes_2):
        cost_matrix[0, m] = cost_matrix[0, m-1] + distance_matrix[0, m]
        track[0, m] = 0
    for n in range(1, num_frames_1):
        for m in range(1, num_frames_2):
            horizontal = cost_matrix[n, m-1] + distance_matrix[n, m] # 가로 이동: 0
            vertical = cost_matrix[n-1, m] + distance_matrix[n, m] # 세로 이동: 1
            diagonal = cost_matrix[n-1, m-1] + 2 * distance_matrix[n, m] # 사선 이동: 2
            
            candidate = [horizontal, vertical, diagonal]
            low_cost = np.min(candidate) # transition = np.argmin(candidate)
            
            cost_matrix[n, m] = low_cost # cost[n, m] = candidate[transition]
            track[n, m] = np.argmin(candidate) # cost[n, m] = transition
            
        
            
    total_cost = cost_matrix[-1, -1] / (num_frames_1 * num_frames_2) # nframes_1 + nframes_2
    
    min_path = []
    N = num_frames_1
    M = num_frames_2

    while True:
        min_path.append([(N, M)])
        Track = track[N, M]
        
        if (N, M) == (0, 0):
            break 
            
        if Track == 0:
            M -= 1
        elif Track == 1:
            N -= 1
        else:
            N -=  1
            M -= 1       

    # return total_cost, min_path
    
# if __name__ == '__main__':
    # mfcc_file_1 = './mfcc/REPEAT500_set1_009.bin'
    # mfcc_file_2 = './mfcc/REPEAT500_set2_009.bin'
    # result = './alignment.txt'
    
    # num_dims = 13
    # mfcc_1= np.fromfile(mfcc_file_1, dtype = np.float32)
    # mfcc_2 = np.fromfile(mfcc_file_2, dtype = np.float32)
    # mfcc_1 = mfcc_1.reshape(-1, num_dims)
    # mfcc_2= mfcc_2.reshape(-1, num_dims)
    
    # total_cost, min_path = dp_matching(mfcc_1, mfcc_2)
    # wth open(result, mode = 'w') as f:
        # for p in min_path:
        f.rwite('%d %d\n' % (p[0], p[1]))
    
    out_file = 'DPmatchingresltfile'
    with open(out_file, mode = 'w') as f_out:
        f_out.write('DP Matching: %s %s' % (mfcc1_path, mfcc2_path))
        for N, M in min_path:
            f_out.write('%d %d' % (int(N), int(M)))
            