In [1]:
# 1. Import libraries yang diperlukan
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
import os
from scipy.spatial.distance import cdist
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-darkgrid')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['font.size'] = 10
print('âœ“ Libraries imported successfully!')

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [None]:
# 2. Fungsi untuk load audio dataset
def load_audio_dataset(dataset_path, ext=('.wav', '.mp3', '.m4a', '.flac')):
    audio_data, labels, file_paths, sample_rates = [], [], [], []
    for speaker_folder in os.listdir(dataset_path):
        speaker_path = os.path.join(dataset_path, speaker_folder)
        if os.path.isdir(speaker_path):
            for audio_file in os.listdir(speaker_path):
                if audio_file.lower().endswith(ext):
                    file_path = os.path.join(speaker_path, audio_file)
                    try:
                        y, sr = librosa.load(file_path, sr=22050)
                        y, _ = librosa.effects.trim(y, top_db=20)
                        if len(y) > 1000:
                            audio_data.append(y)
                            labels.append(speaker_folder)
                            file_paths.append(audio_file)
                            sample_rates.append(sr)
                    except Exception as e:
                        print(f'Error loading {audio_file}: {e}')
    return audio_data, labels, file_paths, sample_rates


DATASET_PATH = r'c:/Users/achma/OneDrive/Documents/1Semester 5/PSD/speaker_datasets'
audio_data, labels, file_paths, sample_rates = load_audio_dataset(DATASET_PATH)
print(f'Total files: {len(audio_data)}')
print(f'Speakers: {set(labels)}')

In [None]:
# 3. Ekstraksi fitur audio (MFCC)
def extract_mfcc_features(audio_data, sample_rates, n_mfcc=13):
    mfcc_features = []
    for y, sr in zip(audio_data, sample_rates):
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        mfcc = mfcc.T
        mfcc_features.append(mfcc)
    return mfcc_features

mfcc_features = extract_mfcc_features(audio_data, sample_rates)
print(f'MFCC extracted for {len(mfcc_features)} files.')

In [None]:
# 4. Implementasi DTW dengan Euclidean distance
def dtw_distance_matrix(mfcc_features):
    n = len(mfcc_features)
    dtw_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(i, n):
            dist = librosa.sequence.dtw(X=mfcc_features[i].T, Y=mfcc_features[j].T, metric='euclidean')[0][-1, -1]
            dtw_matrix[i, j] = dist
            dtw_matrix[j, i] = dist
    return dtw_matrix

dtw_matrix = dtw_distance_matrix(mfcc_features)
print('DTW distance matrix calculated.')

In [None]:
# 5. Visualisasi panjang audio
lengths = [len(y) for y in audio_data]
plt.figure(figsize=(10, 4))
sns.histplot(lengths, bins=20, kde=True, color='skyblue')
plt.title('Distribusi Panjang Audio (samples)')
plt.xlabel('Jumlah Sample')
plt.ylabel('Frekuensi')
plt.show()

In [None]:
# 6. Visualisasi matriks DTW (cost matrix dan cumulative matrix)
from librosa.sequence import dtw
i, j = 0, 1  # Contoh dua file pertama
D, wp = dtw(X=mfcc_features[i].T, Y=mfcc_features[j].T, metric='euclidean')
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.imshow(D, origin='lower', aspect='auto', cmap='viridis')
plt.title('DTW Cost Matrix')
plt.colorbar()
plt.subplot(1, 2, 2)
plt.imshow(D, origin='lower', aspect='auto', cmap='plasma')
plt.plot(np.array(wp)[:, 1], np.array(wp)[:, 0], 'w')
plt.title('DTW Cumulative Matrix & Path')
plt.colorbar()
plt.tight_layout()
plt.show()

In [None]:
# 7. Perhitungan jarak DTW antara data lama dan data baru
# Misal: bandingkan file ke-0 (lama) dan ke-1 (baru)
dtw_dist = dtw_matrix[0, 1]
print(f'Jarak DTW antara file 0 dan 1: {dtw_dist:.2f}')