In [10]:
from python_speech_features import mfcc
# from python_speech_features import delta
from fastdtw import fastdtw
# import scipy.io.wavfile as wav
import numpy as np
from scipy.spatial.distance import cosine
import librosa
import noisereduce as nr
import os

def preprocessing(audio):
    y , sr = librosa.load(audio, sr=16000)
    y, _ = librosa.effects.trim(y, top_db=20)
    # y = librosa.effects.preemphasis(y)
    y = nr.reduce_noise(y, sr)
    return y,sr

def extraction(audio):
    sig, rate = preprocessing(audio)
    mfcc_feat = mfcc(sig,
                samplerate=16000,
                winlen=0.025,
                winstep=0.01,
                numcep=13,
                nfilt=26,
                preemph=0.97,
                ceplifter=22,
                winfunc=np.hamming)
    # mfcc_feat = delta(mfcc_feat, 1)
    # mfcc_feat = delta(mfcc_feat, 2)
    
    # mfcc_feat = librosa.util.normalize(mfcc_feat)
    # return np.mean(mfcc_feat, axis=0)
    return mfcc_feat



def calculate_dtw(mfccs1, mfccs2):
    distance, _ = fastdtw(mfccs1, mfccs2, dist=cosine)
    return distance

def main(folder1, folder2, output_file):
    files1 = os.listdir(folder1)
    files2 = os.listdir(folder2)

    with open(output_file, 'w') as f:
        for file1 in files1:
            for file2 in files2:
                audio_path1 = os.path.join(folder1, file1)
                audio_path2 = os.path.join(folder2, file2)

                mfccs1 = extraction(audio_path1)
                mfccs2 = extraction(audio_path2)

                dtw_distance = calculate_dtw(mfccs1, mfccs2)

                result_line = f"Jarak DTW antara {file1} dan {file2}: {dtw_distance}\n"
                print(result_line)
                f.write(result_line)



folder1 = "../Dataset/01.Ha\'/"
folder2 = "../Dataset/02.Kha\'/"
output_file = '../hasil_perbandingan/perbandingan2.txt'

main(folder1, folder2, output_file)

Jarak DTW antara Ha'_F1_1.wav dan Kha'_F1_1.wav: 601.4208405757324

Jarak DTW antara Ha'_F1_1.wav dan Kha'_F1_2.wav: 609.2450204492603

Jarak DTW antara Ha'_F1_1.wav dan Kha'_F1_3.wav: 612.6831183955919

Jarak DTW antara Ha'_F1_1.wav dan Kha'_F1_Alt_1.wav: 720.1257814335573

Jarak DTW antara Ha'_F1_1.wav dan Kha'_F1_Alt_2.wav: 782.1401587120358

Jarak DTW antara Ha'_F1_1.wav dan Kha'_F1_Alt_3.wav: 715.9567349796249

Jarak DTW antara Ha'_F1_1.wav dan Kha'_F2_1.wav: 735.9269738766882

Jarak DTW antara Ha'_F1_1.wav dan Kha'_F2_2.wav: 708.1678924777162

Jarak DTW antara Ha'_F1_1.wav dan Kha'_F2_3.wav: 745.8841131768972

Jarak DTW antara Ha'_F1_1.wav dan Kha'_M1_1.wav: 811.6475291156479

Jarak DTW antara Ha'_F1_1.wav dan Kha'_M1_2.wav: 822.7836524092078

Jarak DTW antara Ha'_F1_1.wav dan Kha'_M1_3.wav: 808.7180181809936

Jarak DTW antara Ha'_F1_1.wav dan Kha'_M1_Alt_1.wav: 826.5644581154173

Jarak DTW antara Ha'_F1_1.wav dan Kha'_M1_Alt_2.wav: 832.1332537096429

Jarak DTW antara Ha'_F1_1.wa

In [2]:
import librosa
import matplotlib.pyplot as plt
import numpy as np
import IPython.display as ipd
import noisereduce as nr
from scipy.io import wavfile
from fastdtw import fastdtw
from scipy.spatial.distance import cosine
from scipy.spatial.distance import euclidean
from sklearn.preprocessing import normalize
import os

def preprocessing(audio):
    y , sr = librosa.load(audio, sr=16000)
    y, _ = librosa.effects.trim(y, top_db=20)
    y = librosa.util.normalize(y)
    # y = librosa.effects.preemphasis(y)
    y = nr.reduce_noise(y, sr)
    y = normalize(y[:, np.newaxis], axis=0).ravel()
    return y,sr

def extraction(audio):
    y , sr = preprocessing(audio)
    y = librosa.effects.preemphasis(y)
    mfccs = librosa.feature.mfcc(y=y,
                                 sr=sr,
                                 n_mfcc=13,
                                 window='hamming',
                                 win_length=512,
                                 htk=True,
                                 hop_length=256,
                                 n_fft=512,
                                 norm='ortho',
                                 n_mels=20)
    mfccs = librosa.util.normalize(mfccs)
    return mfccs.T


def calculate_dtw(mfccs1, mfccs2):
    distance, _ = fastdtw(mfccs1, mfccs2, dist=cosine)
    return distance

def main(folder1, folder2, output_file):
    files1 = os.listdir(folder1)
    files2 = os.listdir(folder2)

    with open(output_file, 'w') as f:
        for file1 in files1:
            for file2 in files2:
                audio_path1 = os.path.join(folder1, file1)
                audio_path2 = os.path.join(folder2, file2)

                mfccs1 = extraction(audio_path1)
                mfccs2 = extraction(audio_path2)

                dtw_distance = calculate_dtw(mfccs1, mfccs2)

                result_line = f"Jarak DTW antara {file1} dan {file2}: {dtw_distance}\n"
                print(result_line)
                f.write(result_line)



folder1 = "../Dataset/03.Shad/"
folder2 = "../Dataset/03.Shad/"
output_file = '../hasil_perbandingan/tes.txt'

main(folder1, folder2, output_file)

Jarak DTW antara Shad_F1_1.wav dan Shad_F1_1.wav: 0

Jarak DTW antara Shad_F1_1.wav dan Shad_F1_2.wav: 2.809840669805554

Jarak DTW antara Shad_F1_1.wav dan Shad_F1_3.wav: 2.9990355201752656

Jarak DTW antara Shad_F1_1.wav dan Shad_F1_Alt_1.wav: 21.551894474830227

Jarak DTW antara Shad_F1_1.wav dan Shad_F1_Alt_2.wav: 19.815002681718337

Jarak DTW antara Shad_F1_1.wav dan Shad_F1_Alt_3.wav: 18.679802330266973

Jarak DTW antara Shad_F1_1.wav dan Shad_F2_1.wav: 16.513081000156664

Jarak DTW antara Shad_F1_1.wav dan Shad_F2_2.wav: 13.251458776154887

Jarak DTW antara Shad_F1_1.wav dan Shad_F2_3.wav: 13.465650863690543



KeyboardInterrupt: 

In [8]:
# banding sample_dataset

import librosa
import matplotlib.pyplot as plt
import numpy as np
import IPython.display as ipd
import noisereduce as nr
from scipy.io import wavfile
from fastdtw import fastdtw
from scipy.spatial.distance import cosine
from scipy.spatial.distance import euclidean
from sklearn.preprocessing import normalize
import os

def preprocessing(audio):
    y , sr = librosa.load(audio, sr=16000)
    y, _ = librosa.effects.trim(y, top_db=20)
    y = librosa.util.normalize(y)
    # y = librosa.effects.preemphasis(y)
    y = nr.reduce_noise(y, sr)
    y = normalize(y[:, np.newaxis], axis=0).ravel()
    return y,sr

def extraction(audio):
    y , sr = preprocessing(audio)
    y = librosa.effects.preemphasis(y)
    mfccs = librosa.feature.mfcc(y=y,
                                 sr=sr,
                                 n_mfcc=13,
                                 window='hamming',
                                 htk=False,
                                 hop_length=256,
                                 n_fft=512,
                                 norm='ortho',
                                 n_mels=20)
    mfccs = librosa.util.normalize(mfccs)
    return mfccs.T


def calculate_dtw(mfccs1, mfccs2):
    distance, _ = fastdtw(mfccs1, mfccs2, dist=cosine)
    return distance

def main(folder1, folder2, output_file):
    files1 = os.listdir(folder1)
    files2 = os.listdir(folder2)

    with open(output_file, 'w') as f:
        for file1 in files1:
            for file2 in files2:
                audio_path1 = os.path.join(folder1, file1)
                audio_path2 = os.path.join(folder2, file2)

                mfccs1 = extraction(audio_path1)
                mfccs2 = extraction(audio_path2)

                dtw_distance = calculate_dtw(mfccs1, mfccs2)

                result_line = f"Jarak DTW antara {file1} dan {file2}: {dtw_distance}\n"
                print(result_line)
                f.write(result_line)



folder1 = "../sample_dataset/qa"
folder2 = "../sample_dataset/qa"
output_file = '../hasil_perbandingan/sample_qa-qa.txt'

main(folder1, folder2, output_file)

Jarak DTW antara 21_qa_2_1.wav dan 21_qa_2_1.wav: 0

Jarak DTW antara 21_qa_2_1.wav dan 21_qa_2_2.wav: 0.04226984585037796

Jarak DTW antara 21_qa_2_1.wav dan 21_qa_2_3.wav: 0.05333054996598463

Jarak DTW antara 21_qa_2_1.wav dan 21_qa_2_4.wav: 0.09791763328736369

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_1_1.wav: 0.4289234635171081

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_1_2.wav: 0.3221370552186249

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_1_3.wav: 0.3783781426439674

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_1_4.wav: 0.5190813137822944

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_4_1.wav: 0.41102906198881994

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_4_2.wav: 0.3490508305175336

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_4_3.wav: 0.3786294143337523

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_4_4.wav: 0.3837835537101505

Jarak DTW antara 21_qa_2_1.wav dan 7_qho_5_1.wav: 0.5525860550428402

Jarak DTW antara 21_qa_2_1.wav dan 7_qho_5_2.wav: 0.5036680778229115

Jarak DTW antara 21_qa_2_1.wav da

In [37]:
from python_speech_features import mfcc
# from python_speech_features import delta
from fastdtw import fastdtw
# import scipy.io.wavfile as wav
import numpy as np
from scipy.spatial.distance import cosine
import librosa
import noisereduce as nr
import os

def preprocessing(audio):
    y , sr = librosa.load(audio, sr=16000)
    y, _ = librosa.effects.trim(y, top_db=20)
    # y = librosa.effects.preemphasis(y)
    y = nr.reduce_noise(y, sr)
    y = normalize(y[:, np.newaxis], axis=0).ravel()
    return y,sr

def extraction(audio):
    sig, rate = preprocessing(audio)
    mfcc_feat = mfcc(sig,
                samplerate=16000,
                winlen=0.025,
                winstep=0.01,
                numcep=12,
                nfilt=20,
                preemph=0.97,
                ceplifter=22,
                winfunc=np.hamming)
    # mfcc_feat = delta(mfcc_feat, 1)
    # mfcc_feat = delta(mfcc_feat, 2)
    
    mfcc_feat = librosa.util.normalize(mfcc_feat)
    # return np.mean(mfcc_feat, axis=0)
    return mfcc_feat



def calculate_dtw(mfccs1, mfccs2):
    distance, _ = fastdtw(mfccs1, mfccs2, dist=cosine)
    return distance

def main(folder1, folder2, output_file):
    files1 = os.listdir(folder1)
    files2 = os.listdir(folder2)

    with open(output_file, 'w') as f:
        for file1 in files1:
            for file2 in files2:
                audio_path1 = os.path.join(folder1, file1)
                audio_path2 = os.path.join(folder2, file2)

                mfccs1 = extraction(audio_path1)
                mfccs2 = extraction(audio_path2)

                dtw_distance = calculate_dtw(mfccs1, mfccs2)

                result_line = f"Jarak DTW antara {file1} dan {file2}: {dtw_distance}\n"
                print(result_line)
                f.write(result_line)



folder1 = "../sample_dataset/qa"
folder2 = "../sample_dataset/qa"
output_file = '../hasil_perbandingan/ps_sample_qa-qa.txt'

main(folder1, folder2, output_file)

Jarak DTW antara 21_qa_2_1.wav dan 21_qa_2_1.wav: 0

Jarak DTW antara 21_qa_2_1.wav dan 21_qa_2_2.wav: 3.474618118130489

Jarak DTW antara 21_qa_2_1.wav dan 21_qa_2_3.wav: 4.3047528342247245

Jarak DTW antara 21_qa_2_1.wav dan 21_qa_2_4.wav: 11.13968913063045

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_1_1.wav: 21.776375213520765

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_1_2.wav: 16.400970884294633

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_1_3.wav: 18.96741399132261

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_1_4.wav: 24.98459972565743

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_4_1.wav: 20.027541144506667

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_4_2.wav: 22.374729840762832

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_4_3.wav: 23.56912741997396

Jarak DTW antara 21_qa_2_1.wav dan 21_qo_4_4.wav: 23.56522976742031

Jarak DTW antara 21_qa_2_1.wav dan 7_qho_5_1.wav: 20.362315225369247

Jarak DTW antara 21_qa_2_1.wav dan 7_qho_5_2.wav: 21.006938053105852

Jarak DTW antara 21_qa_2_1.wav dan 7_qho_5_

In [36]:
import librosa
import matplotlib.pyplot as plt
import numpy as np
import IPython.display as ipd
import noisereduce as nr
from scipy.io import wavfile
from fastdtw import fastdtw
from scipy.spatial.distance import cosine
from scipy.spatial.distance import euclidean
from sklearn.preprocessing import normalize
import os
from dtw import *

def preprocessing(audio):
    y , sr = librosa.load(audio, sr=16000)
    y, _ = librosa.effects.trim(y, top_db=20)
    y = librosa.util.normalize(y)
    # y = librosa.effects.preemphasis(y)
    y = nr.reduce_noise(y, sr)
    y = normalize(y[:, np.newaxis], axis=0).ravel()
    return y,sr

def extraction(audio):
    y , sr = preprocessing(audio)
    y = librosa.effects.preemphasis(y)
    mfccs = librosa.feature.mfcc(y=y,
                                 sr=sr,
                                 n_mfcc=13,
                                 window='hamming',
                                 htk=False,
                                 hop_length=256,
                                 n_fft=512,
                                 norm='ortho',
                                 n_mels=20)
    # mfccs = librosa.util.normalize(mfccs)
    return mfccs


def calculate_dtw(mfccs1, mfccs2):
    alignment = dtw(mfccs1, mfccs2, dist=cosine)
    return alignment

def main(folder1, folder2, output_file):
    files1 = os.listdir(folder1)
    files2 = os.listdir(folder2)

    with open(output_file, 'w') as f:
        for file1 in files1:
            for file2 in files2:
                audio_path1 = os.path.join(folder1, file1)
                audio_path2 = os.path.join(folder2, file2)

                mfccs1 = extraction(audio_path1)
                mfccs2 = extraction(audio_path2)

                dtw_distance = calculate_dtw(mfccs1, mfccs2)

                result_line = f"Jarak DTW antara {file1} dan {file2}: {dtw_distance}\n"
                print(result_line)
                f.write(result_line)



folder1 = "../Dataset/01.Ha\'/"
folder2 = "../Dataset/01.Ha\'/"
output_file = '../hasil_perbandingan/dtw-librosa_perbandingan1.txt'

main(folder1, folder2, output_file)

TypeError: 'str' object is not callable

In [None]:
import librosa
import matplotlib.pyplot as plt
import numpy as np
import IPython.display as ipd
import noisereduce as nr
from scipy.io import wavfile
from fastdtw import fastdtw
from scipy.spatial.distance import cosine
from scipy.spatial.distance import euclidean
from sklearn.preprocessing import normalize
import os


def preprocessing(audio):
    y , sr = librosa.load(audio, sr=16000)
    y, _ = librosa.effects.trim(y, top_db=20)
    y = librosa.util.normalize(y)
    # y = librosa.effects.preemphasis(y)
    y = nr.reduce_noise(y, sr)
    y = normalize(y[:, np.newaxis], axis=0).ravel()
    return y,sr

def extraction(audio):
    y , sr = preprocessing(audio)
    y = librosa.effects.preemphasis(y)
    mfccs = librosa.feature.mfcc(y=y,
                                 sr=sr,
                                 n_mfcc=13,
                                 window='hamming',
                                 htk=False,
                                 hop_length=256,
                                 n_fft=512,
                                 norm='ortho',
                                 n_mels=20)
    mfccs = librosa.util.normalize(mfccs)
    return mfccs.T



audio_male = "../Dataset/01.Ha\'/Ha\'_M1_1.wav"
audio_female = "../Dataset/01.Ha\'/Ha\'_F1_1.wav"

mfcc_male = extraction(audio_male)
mfcc_female = extraction(audio_female)

fig = plt.figure(figsize=(14,7))
fig.patch.set_facecolor('white')
plt.plot
