In [64]:
import librosa
from fastdtw import fastdtw
import numpy as np
import IPython.display as ipd

In [198]:
def feature_extraction(audio):
    y , sr = librosa.load(audio, sr=48000)
    y, _ = librosa.effects.trim(y=y, top_db=30)
    y = librosa.effects.preemphasis(y=y)
    D = np.abs(librosa.stft(y))**2
    S = librosa.feature.melspectrogram(S=D, sr=sr, n_mels=20)
    # mfccs = librosa.feature.mfcc(S=S)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, window='hamming', htk=True, hop_length=100, n_fft=1024, norm='ortho', fmax=8000)
    delta_mfccs = librosa.feature.delta(mfccs, order=1)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    comprehensive_mfccs = np.concatenate((mfccs, delta_mfccs, delta2_mfccs))
    comprehensive_mfccs = librosa.util.normalize(comprehensive_mfccs)
    mfccs = librosa.util.normalize(mfccs)
    # return y, sr, comprehensive_mfccs
    return y, sr, mfccs

In [199]:
audio1 = "../Dataset/01.Ha\'/Ha\'_M1_1.wav"
audio2 = "../Dataset/01.Ha\'/Ha\'_M1_2.wav"

In [200]:
ipd.Audio(audio1)

In [201]:
ipd.Audio(audio2)

In [202]:
y1,sr1,mfccs1 = feature_extraction(audio1)
y2,sr2,mfccs2 = feature_extraction(audio2)

In [203]:
print(y1.shape, sr1, mfccs1.shape)

(519168,) 48000 (13, 5192)


In [204]:
print(y2.shape, sr2, mfccs2.shape)

(522752,) 48000 (13, 5228)


In [205]:
distance, path = fastdtw(mfccs1.T, mfccs2.T)

In [206]:
print(distance)

1773.2320833678596


# DTW

In [207]:
audio_path1 = "../Dataset/01.Ha\'/Ha\'_M1_1.wav"
audio_path2 = "../Dataset/01.Ha\'/Ha\'_M1_2.wav"

y1,sr1,mfccs1 = feature_extraction(audio_path1)
y2,sr2,mfccs2 = feature_extraction(audio_path2)

distance, path = fastdtw(mfccs1.T, mfccs2.T)

print(f'Distance = {distance}')


Distance = 1773.2320833678596


In [208]:
# bacaan sama orang beda

from fastdtw import fastdtw

# Path ke dua file audio
audio_path1 = '../Dataset/01.Ha\'/Ha\'_RY1_1.wav'
audio_path2 = '../Dataset/01.Ha\'/Ha\'_M1_1.wav'

y1,sr1,mfccs1 = feature_extraction(audio_path1)
y2,sr2,mfccs2 = feature_extraction(audio_path2)

distance, path = fastdtw(mfccs1.T, mfccs2.T)

print(f'Distance = {distance}')

Distance = 3235.6557250927926


In [209]:
# bacaan beda orang sama

from fastdtw import fastdtw

# Path ke dua file audio
audio_path1 = '../Dataset/01.Ha\'/Ha\'_M1_1.wav'
audio_path2 = '../Dataset/02.Kha\'/Kha\'_M1_1.wav'

y1,sr1,mfccs1 = feature_extraction(audio_path1)
y2,sr2,mfccs2 = feature_extraction(audio_path2)

distance, path = fastdtw(mfccs1.T, mfccs2.T)

print(f'Distance = {distance}')

Distance = 1962.2749300332127


In [180]:
# bacaan beda orang beda

from fastdtw import fastdtw

# Path ke dua file audio
audio_path1 = '../Dataset/01.Ha\'/Ha\'_RY1_1.wav'
audio_path2 = '../Dataset/02.Kha\'/Kha\'_M1_1.wav'

y1,sr1,mfccs1 = feature_extraction(audio_path1)
y2,sr2,mfccs2 = feature_extraction(audio_path2)

distance, path = fastdtw(mfccs1.T, mfccs2.T)

print(f'Distance = {distance}')

Distance = 461.2164386919271


In [13]:
from python_speech_features import mfcc
from python_speech_features import delta
from fastdtw import fastdtw
import scipy.io.wavfile as wav
import numpy as np

def extraction(audio):
    rate, sig = wav.read(audio)
    mfcc_feat = mfcc(sig,
                samplerate=16000,
                winlen=0.025,
                winstep=0.01,
                numcep=13,
                nfilt=26,
                preemph=0.95,
                ceplifter=22,
                winfunc=np.hamming)
    # mfcc_feat = delta(mfcc_feat, 1)
    # mfcc_feat = delta(mfcc_feat, 2)
    return np.mean(mfcc_feat, axis=0)



audio_path1 = "../Dataset/01.Ha\'/Ha\'_M1_1.wav"
audio_path2 = "../Dataset/01.Ha\'/Ha\'_M1_2.wav"
audio_path3 = '../Dataset/02.Kha\'/Kha\'_M1_1.wav'
audio_path4 = '../Dataset/02.Kha\'/Kha\'_RY1_1.wav'
audio_path5 = "../Dataset/01.Ha\'/Ha\'_RY1_1.wav"


mfccs1 = extraction(audio_path1)
mfccs2 = extraction(audio_path2)
mfccs3 = extraction(audio_path3)
mfccs4 = extraction(audio_path4)
mfccs5 = extraction(audio_path5)
                                        # Bacaan, Orang
distance, path = fastdtw(mfccs1, mfccs2) # sama sama
distance1, path1 = fastdtw(mfccs1, mfccs5) # sama, beda
distance2, path2 = fastdtw(mfccs1, mfccs3) # beda, sama
distance3, path3 = fastdtw(mfccs1, mfccs4) # beda, beda


print(f'Distance = {distance}')
print(f'Distance = {distance1}')
print(f'Distance = {distance2}')
print(f'Distance = {distance3}')



  rate, sig = wav.read(audio)


Distance = 12.924885158142164
Distance = 183.32743529301706
Distance = 40.407571162398476
Distance = 185.78969781709233


In [7]:
from python_speech_features import mfcc
from fastdtw import fastdtw
import scipy.io.wavfile as wav
import numpy as np

def extraction(audio):
    rate, sig = wav.read(audio)
    mfcc_feat = mfcc(sig,
                samplerate=16000,
                winlen=0.025,
                winstep=0.01,
                numcep=13,
                nfilt=20,
                preemph=0.97,
                ceplifter=0,
                winfunc=np.hamming)
    return mfcc_feat.T



audio_path1 = '../Dataset/01.Ha\'/Ha\'_RY1_1.wav'
audio_path2 = '../Dataset/01.Ha\'/Ha\'_M1_1.wav'

mfccs1 = extraction(audio_path1)
mfccs2 = extraction(audio_path2)

distance, path = fastdtw(mfccs1.T, mfccs2.T)

print(f'Distance = {distance}')


  rate, sig = wav.read(audio)


Distance = 215651.9036543797


In [9]:
from python_speech_features import mfcc
from fastdtw import fastdtw
import scipy.io.wavfile as wav
import numpy as np

def extraction(audio):
    rate, sig = wav.read(audio)
    mfcc_feat = mfcc(sig,
                samplerate=16000,
                winlen=0.025,
                winstep=0.01,
                numcep=13,
                nfilt=20,
                preemph=0.97,
                ceplifter=0,
                winfunc=np.hamming)
    return mfcc_feat.T



audio_path1 = '../Dataset/01.Ha\'/Ha\'_M1_1.wav'
audio_path2 = '../Dataset/02.Kha\'/Kha\'_M1_1.wav'

mfccs1 = extraction(audio_path1)
mfccs2 = extraction(audio_path2)

distance, path = fastdtw(mfccs1.T, mfccs2.T)

print(f'Distance = {distance}')


Distance = 99796.40426798534


In [8]:
from python_speech_features import mfcc
from fastdtw import fastdtw
import scipy.io.wavfile as wav
import numpy as np

def extraction(audio):
    rate, sig = wav.read(audio)
    mfcc_feat = mfcc(sig,
                samplerate=16000,
                winlen=0.025,
                winstep=0.01,
                numcep=13,
                nfilt=20,
                preemph=0.97,
                ceplifter=0,
                winfunc=np.hamming)
    return mfcc_feat.T



audio_path1 = '../Dataset/01.Ha\'/Ha\'_RY1_1.wav'
audio_path2 = '../Dataset/02.Kha\'/Kha\'_M1_1.wav'

mfccs1 = extraction(audio_path1)
mfccs2 = extraction(audio_path2)

distance, path = fastdtw(mfccs1.T, mfccs2.T)

print(f'Distance = {distance}')


  rate, sig = wav.read(audio)


Distance = 225065.86131696092
