# Dynamic Time Warping Analysis untuk Dataset Suara

Notebook ini akan menganalisis dataset suara menggunakan Dynamic Time Warping (DTW) untuk:
1. Menghitung jarak antar file suara
2. Visualisasi panjang sinyal audio
3. Analisis similarity menggunakan DTW
4. Membuat matrix jarak dan heatmap

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
import os
from scipy.spatial.distance import euclidean
from scipy.spatial.distance import cdist
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Implementasi DTW 
def dtw_distance(x, y, dist_func=euclidean):
    """
    Implementasi Dynamic Time Warping distance
    """
    N, M = len(x), len(y)
    
    # Buat matrix cost
    cost = np.inf * np.ones((N, M))
    
    # Initialize
    cost[0, 0] = dist_func(x[0], y[0])
    
    # Fill first row and column
    for i in range(1, N):
        cost[i, 0] = dist_func(x[i], y[0]) + cost[i-1, 0]
    
    for j in range(1, M):
        cost[0, j] = dist_func(x[0], y[j]) + cost[0, j-1]
    
    # Fill the rest of the matrix
    for i in range(1, N):
        for j in range(1, M):
            choices = [cost[i-1, j], cost[i, j-1], cost[i-1, j-1]]
            cost[i, j] = dist_func(x[i], y[j]) + min(choices)
    
    return cost[N-1, M-1]

def dtw_path(x, y, dist_func=euclidean):
    """
    DTW dengan path tracking
    """
    N, M = len(x), len(y)
    
    # Buat matrix cost
    cost = np.inf * np.ones((N, M))
    
    # Initialize
    cost[0, 0] = dist_func(x[0], y[0])
    
    # Fill first row and column
    for i in range(1, N):
        cost[i, 0] = dist_func(x[i], y[0]) + cost[i-1, 0]
    
    for j in range(1, M):
        cost[0, j] = dist_func(x[0], y[j]) + cost[0, j-1]
    
    # Fill the rest of the matrix
    for i in range(1, N):
        for j in range(1, M):
            choices = [cost[i-1, j], cost[i, j-1], cost[i-1, j-1]]
            cost[i, j] = dist_func(x[i], y[j]) + min(choices)
    
    # Backtrack untuk mendapatkan path
    path = []
    i, j = N-1, M-1
    path.append((i, j))
    
    while i > 0 or j > 0:
        if i == 0:
            j -= 1
        elif j == 0:
            i -= 1
        else:
            # Pilih yang minimum
            choices = [cost[i-1, j], cost[i, j-1], cost[i-1, j-1]]
            choice = np.argmin(choices)
            if choice == 0:
                i -= 1
            elif choice == 1:
                j -= 1
            else:
                i -= 1
                j -= 1
        path.append((i, j))
    
    path.reverse()
    return cost[N-1, M-1], path

print("DTW implementation ready!")

DTW implementation ready!


In [2]:
# Load dataset dari folder speaker_datasets
DATASET_PATH = r"c:\Users\achma\OneDrive\Documents\1Semester 5\PSD\speaker_datasets"

def load_audio_dataset(dataset_path):
    """Load semua file audio dari dataset"""
    audio_data = []
    labels = []
    file_paths = []
    
    print("Loading audio files...")
    
    # Scan folders (Harits, Lutfi)
    for speaker_folder in os.listdir(dataset_path):
        speaker_path = os.path.join(dataset_path, speaker_folder)
        
        if os.path.isdir(speaker_path):
            print(f"Processing speaker: {speaker_folder}")
            
            # Scan file audio dalam folder speaker
            audio_files = [f for f in os.listdir(speaker_path) 
                          if f.lower().endswith(('.wav', '.mp3', '.m4a', '.flac'))]
            
            print(f"  Found {len(audio_files)} audio files")
            
            for audio_file in audio_files[:10]:  # Limit untuk demo
                file_path = os.path.join(speaker_path, audio_file)
                
                try:
                    # Load audio dengan librosa
                    y, sr = librosa.load(file_path, sr=22050)
                    
                    # Trim silence
                    y, _ = librosa.effects.trim(y, top_db=20)
                    
                    if len(y) > 1000:  # Minimal length check
                        audio_data.append(y)
                        labels.append(speaker_folder)
                        file_paths.append(audio_file)
                        print(f"    ✓ Loaded {audio_file}: {len(y)} samples")
                    
                except Exception as e:
                    print(f"    ✗ Error loading {audio_file}: {e}")
    
    return audio_data, labels, file_paths

# Load dataset
audio_data, labels, file_paths = load_audio_dataset(DATASET_PATH)

print(f"\nDataset loaded:")
print(f"Total files: {len(audio_data)}")
print(f"Speakers: {set(labels)}")

# Create DataFrame untuk tracking
df_audio = pd.DataFrame({
    'file_name': file_paths,
    'speaker': labels,
    'audio_length': [len(audio) for audio in audio_data],
    'duration_sec': [len(audio)/22050 for audio in audio_data]
})

print("\nDataset summary:")
print(df_audio.groupby('speaker').agg({
    'file_name': 'count',
    'audio_length': ['mean', 'std'],
    'duration_sec': ['mean', 'std']
}).round(3))

Loading audio files...
Processing speaker: Harits
  Found 97 audio files
    ✓ Loaded Buka1.wav: 30720 samples
    ✓ Loaded Buka10.wav: 11776 samples
    ✓ Loaded Buka11.wav: 15360 samples
    ✓ Loaded Buka12.wav: 31232 samples
    ✓ Loaded Buka13.wav: 37888 samples
    ✓ Loaded Buka14.wav: 23552 samples
    ✓ Loaded Buka15.wav: 31744 samples
    ✓ Loaded Buka16.wav: 20992 samples
    ✓ Loaded Buka17.wav: 18432 samples
    ✓ Loaded Buka18.wav: 12288 samples
Processing speaker: Lutfi
  Found 100 audio files
    ✓ Loaded 6 Nov, 18.58​.wav: 19456 samples
    ✓ Loaded 6 Nov, 18.59​(10).wav: 23040 samples
    ✓ Loaded 6 Nov, 18.59​(12).wav: 25600 samples
    ✓ Loaded 6 Nov, 18.59​(13).wav: 26112 samples
    ✓ Loaded 6 Nov, 18.59​(14).wav: 25600 samples
    ✓ Loaded 6 Nov, 18.59​(2).wav: 24064 samples
    ✓ Loaded 6 Nov, 18.59​(3).wav: 22528 samples
    ✓ Loaded 6 Nov, 18.59​(4).wav: 22016 samples
    ✓ Loaded 6 Nov, 18.59​(5).wav: 23552 samples
    ✓ Loaded 6 Nov, 18.59​(6).wav: 25088 sampl