In [1]:
import sounddevice as sd 
import soundfile as sf
import tkinter as tk
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import os, glob
import os.path
import librosa
from math import *
import sys
from dtw import dtw
from numpy.linalg import norm

In [2]:
class Algorithm():
    def __init__(self):
        super().__init__()
        print('Algorithm class init')
        
    def _normalize(self, S):
        min_level_db= -100
        return np.clip((S - min_level_db) / -min_level_db, 0, 1)
    

    def to_mfcc(self, y, sr):
        S = librosa.core.stft(y, n_fft=1024, hop_length=512, win_length=1024)
        D = np.abs(S)**2
        mel_basis = librosa.filters.mel(sr, 1024, n_mels=40)
        mel_S = np.dot(mel_basis, D)
        log_mel_S = librosa.power_to_db(mel_S)
        S = librosa.feature.melspectrogram(y, sr=sr, n_mels = 128)
        log_S = librosa.power_to_db(S, ref=np.max)
        mfcc = librosa.feature.mfcc(S=log_mel_S, n_mfcc=32)
        mfcc = mfcc.astype(np.float32)
        mfcc = self._normalize(mfcc)

        return mfcc
    
    
    def loss(self, cost, path):
        path_length = len(path[0])
        cost_sum = 0
        for i in range(path_length):
            x_idx = path[0][i]
            y_idx = path[1][i]
            cost_sum += cost[x_idx][y_idx]**2
        return cost_sum/path_length
    
    
    def get_loss_dtw(self, base_path, test_path): 
        #targerdir = r"./숫자 발음 데이터/test/wav"
        #files = os.listdir(targerdir)
        y_base, sr_base = librosa.load(base_path)
        y_test, sr_test = librosa.load(test_path)
        mfcc1 = self.to_mfcc(y_base,sr_base)
        mfcc2 = self.to_mfcc(y_test,sr_test)

        dist, cost, acc_cost, path = dtw(mfcc1.T, mfcc2.T, dist=lambda x, y: norm(x - y, ord=1))
        audio_loss = self.loss(cost, path)

        base_idx = path[0]
        test_idx = path[1]
        loss_list = []
        temp = 0
        count = 1
        for i in range(len(base_idx)):
            x_idx = base_idx[i]
            y_idx = test_idx[i]
            temp += cost[x_idx][y_idx]
            if y_idx == test_idx[i-1]:
                count +=1
            else:
                loss_list.append(temp/count)
                temp = 0
                count = 1
        return loss_list

In [14]:
class Application(tk.Frame):
    def __init__(self, master=None):
        super().__init__(master)
        self.master = master
        self.pack()
        self.draw_graph()
        self.create_widgets()
        self.api = Algorithm()
        print('Application class init')

    def create_widgets(self):
        self.record_button = tk.Button(self, text="녹음 시작", command=self.Voice_rec)
        self.record_button.pack(side="top")
        
        self.dtw_button = tk.Button(self, text="DTW 알고리즘", command=self.dtw_analysis)
        self.dtw_button.pack(side="bottom")
        
        self.cnn_button = tk.Button(self, text="CNN 알고리즘", command=self.cnn_analysis)
        self.cnn_button.pack(side="bottom")
        

    def Voice_rec(self): 
        fs = 48000

        # seconds 
        duration = 2
        myrecording = sd.rec(int(duration * fs),  
                             samplerate=fs, channels=2) 
        sd.wait() 

        # Save as FLAC file at correct sampling rate 
        return sf.write('./data/test/test.wav', myrecording, fs) 
    
    def dtw_analysis(self):
        base_path = "./data/base/0_jackson_0.wav"
        #test_path = "./data/base/0_theo_12.wav"
        test_path = "./data/test/test.wav"
        loss_list = self.api.get_loss_dtw(base_path, test_path)
        print("DTW algorithm")
        self.draw_graph(test_data=loss_list)
        
    def cnn_analysis(self):
        print("CNN algorithm")
    
    def draw_graph(self,base_data = None,test_data = None):
        if test_data == None:
            x = []
            y = []
            #x=np.arange(1, 10, 1)
            #y=2*x**2
            self.fig = Figure(figsize=(10, 7), dpi=100)  #그리프 그릴 창 생성
            self.canvas = FigureCanvasTkAgg(self.fig, master=self.master)
            self.canvas.get_tk_widget().pack()
        else:
            self.fig.clear()
            x = test_data
            y = range(len(test_data))
        
        self.fig.add_subplot(1,1,1).plot(y, x)#창에 그래프 하나 추가
        self.fig.add_subplot(1,1,1).set_ylim([0,2])
        self.canvas.draw()
    
root = tk.Tk()
root.title("영어 발음 분석")
root.geometry("800x500")
app = Application(master=root)
app.mainloop()



Algorithm class init
Application class init
DTW algorithm
