In [7]:
import sounddevice as sd 
import soundfile as sf
import tkinter as tk
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import os, glob
import os.path
import librosa
from math import *
import sys
from dtw import dtw
from numpy.linalg import norm

In [None]:
class Application(tk.Frame):
    def __init__(self, master=None):
        super().__init__(master)
        self.master = master
        self.pack()
        self.draw_graph()
        self.create_widgets()

    def create_widgets(self):
        self.record_button = tk.Button(self, text="녹음 시작", command=self.Voice_rec)
        self.record_button.pack(side="top")
        
        self.dtw_button = tk.Button(self, text="DTW 알고리즘", command=self.dtw_analysis)
        self.dtw_button.pack(side="bottom")
        
        self.cnn_button = tk.Button(self, text="CNN 알고리즘", command=self.cnn_analysis)
        self.cnn_button.pack(side="bottom")
        

    def Voice_rec(self): 
        fs = 48000

        # seconds 
        duration = 5
        myrecording = sd.rec(int(duration * fs),  
                             samplerate=fs, channels=2) 
        sd.wait() 

        # Save as FLAC file at correct sampling rate 
        return sf.write('my_Audio_file.wav', myrecording, fs) 
    
    def dtw_analysis(self):
        base_path = "./data/base/0_jackson_0.wav"
        test_path = "./data/test/0_nicolas_37.wav"
        loss_list = get_loss_dtw(base_path, test_path)
        print("DTW algorithm")
        self.draw_graph(data=loss_list)
        
    def cnn_analysis(self):
        print("CNN algorithm")
    
    def draw_graph(self,data = None):
        if data == None:
            x=np.arange(1, 10, 1)
            y=2*x**2
        else:
            x = data
            y = range(len(data))
        
        fig = Figure(figsize=(10, 7), dpi=100)  #그리프 그릴 창 생성
        fig.add_subplot(1,1,1).plot(x, y)#창에 그래프 하나 추가
        canvas = FigureCanvasTkAgg(fig, master=self.master)
        canvas.draw()
        canvas.get_tk_widget().pack()
    
root = tk.Tk()
root.geometry("800x500")
app = Application(master=root)
app.mainloop()

In dtw function
[0.6226668953895569, 0.8163523077964783, 0.6573701500892639, 0.6833181381225586, 0.5678728818893433, 0.5458644032478333, 0.6413932740688324, 0.6482222229242325, 0.9795098900794983, 0.8415072560310364, 1.0107145309448242, 0.9143742322921753, 1.142136812210083, 1.1306138634681702, 1.2508413791656494]
DTW algorithm


Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\UOK\anaconda3\envs\tensor2\lib\tkinter\__init__.py", line 1705, in __call__
    return self.func(*args)
  File "<ipython-input-11-e68bd2e4350d>", line 37, in dtw_analysis
    self.draw_graph(data=loss_data)
NameError: name 'loss_data' is not defined


In [10]:
base_path = "./data/base/0_jackson_0.wav"
test_path = "./data/test/0_nicolas_37.wav"
loss_list = get_loss_dtw(base_path, test_path)
loss_list

In dtw function
[0.6226668953895569, 0.8163523077964783, 0.6573701500892639, 0.6833181381225586, 0.5678728818893433, 0.5458644032478333, 0.6413932740688324, 0.6482222229242325, 0.9795098900794983, 0.8415072560310364, 1.0107145309448242, 0.9143742322921753, 1.142136812210083, 1.1306138634681702, 1.2508413791656494]


[0.6226668953895569,
 0.8163523077964783,
 0.6573701500892639,
 0.6833181381225586,
 0.5678728818893433,
 0.5458644032478333,
 0.6413932740688324,
 0.6482222229242325,
 0.9795098900794983,
 0.8415072560310364,
 1.0107145309448242,
 0.9143742322921753,
 1.142136812210083,
 1.1306138634681702,
 1.2508413791656494]

In [5]:
def get_loss_dtw(base_path, test_path): 
    #targerdir = r"./숫자 발음 데이터/test/wav"
    #files = os.listdir(targerdir)
    y_base, sr_base = librosa.load(base_path)
    y_test, sr_test = librosa.load(test_path)
    mfcc1 = to_mfcc(y_base,sr_base)
    mfcc2 = to_mfcc(y_test,sr_test)
    
    dist, cost, acc_cost, path = dtw(mfcc1.T, mfcc2.T, dist=lambda x, y: norm(x - y, ord=1))
    audio_loss = loss(cost, path)

    base_idx = path[0]
    test_idx = path[1]
    loss_list = []
    temp = 0
    count = 1
    for i in range(len(base_idx)):
        x_idx = base_idx[i]
        y_idx = test_idx[i]
        temp += cost[x_idx][y_idx]
        if y_idx == test_idx[i-1]:
            count +=1
        else:
            loss_list.append(temp/count)
            temp = 0
            count = 1
    print("In dtw function")
    print(loss_list)
    return loss_list

In [9]:
def loss(cost, path):
    path_length = len(path[0])
    cost_sum = 0
    for i in range(path_length):
        x_idx = path[0][i]
        y_idx = path[1][i]
        cost_sum += cost[x_idx][y_idx]**2
    return cost_sum/path_length

In [3]:
def _normalize(S):
    min_level_db= -100
    return np.clip((S - min_level_db) / -min_level_db, 0, 1)

# mfcc 변환 함수
# mfcc값을 리턴

def to_mfcc(y, sr):
    S = librosa.core.stft(y, n_fft=1024, hop_length=512, win_length=1024)
    D = np.abs(S)**2
    mel_basis = librosa.filters.mel(sr, 1024, n_mels=40)
    mel_S = np.dot(mel_basis, D)
    log_mel_S = librosa.power_to_db(mel_S)
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels = 128)
    log_S = librosa.power_to_db(S, ref=np.max)
    mfcc = librosa.feature.mfcc(S=log_mel_S, n_mfcc=32)
    mfcc = mfcc.astype(np.float32)
    mfcc = _normalize(mfcc)
    
    return mfcc