In [1]:
import os
import numpy as np
import librosa, noisereduce as nr
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore")

labels = ["maju", "mundur", "berhenti", "kiri", "kanan"]
train_data = "train"
test_data = "test"

frame_lengths_ms = [20, 25, 30, 35]        # panjang frame (ms)
overlaps = [0.25, 0.50, 0.75]              # persentase overlap
sampling_rates = [8000, 16000, 40000]      # sampling rate
show_detail = False                        

# 1. Fungsi load file, pre-emphasis, dan preprocessing
def safe_load(file_path, sr):
    try:
        y, sr_local = librosa.load(file_path, sr=sr)

        from scipy.signal import butter, filtfilt

        # Preemphasis low pass filtering
        def butter_lowpass_filter(data, cutoff, sr, order=5):
            nyquist = 0.5 * sr
            normal_cutoff = cutoff / nyquist
            b, a = butter(order, normal_cutoff, btype='low', analog=False)
            return filtfilt(b, a, data)

        # Menentukan cutoff berdasarkan sampling rate
        if sr <= 8000:
            cutoff = 3800
        elif sr <= 16000:
            cutoff = 6000
        else:
            cutoff = 10000

        # Preprocessing
        y = butter_lowpass_filter(y, cutoff, sr_local, order=6)
        y, _ = librosa.effects.trim(y)
        y = librosa.util.normalize(y)
        y = nr.reduce_noise(y=y, sr=sr_local)

        return y, sr_local

    except Exception as e:
        print(f"Tidak bisa baca {file_path}: {e}")
        return None, None


# 2. Fungsi Feature Extraction MFCC
def get_mfcc(file_path, sr, frame_ms, overlap):
    y, sr_local = safe_load(file_path, sr)
    if y is None:
        return np.zeros((1, 13))

    n_fft = int(sr * (frame_ms / 1000.0))
    hop_length = int(max(1, n_fft * (1 - overlap)))
    mfcc = librosa.feature.mfcc(y=y, sr=sr_local, n_mfcc=13, n_fft=n_fft, hop_length=hop_length)
    return mfcc.T 

# 3. Fungsi untuk ambil nilai mean dan std deviasi MFCC
def get_feature_vector(mfcc):
    mean_mfcc = np.mean(mfcc, axis=0)
    std_mfcc  = np.std(mfcc, axis=0)
    return np.concatenate([mean_mfcc, std_mfcc])

# 4. Fungsi uji cosine similarity
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-8)

# 5. Fungsi uji DTW
def dtw_distance(mfcc1, mfcc2):
    """DTW antar deret frame MFCC."""
    D, wp = librosa.sequence.dtw(X=mfcc1.T, Y=mfcc2.T, metric="euclidean")
    return D[-1, -1]

# 6. Fungsi untuk menyimpan nilai vektor fitur
def build_train_features(sr, frame_ms, overlap):
    train_mfccs = []      # untuk DTW
    train_vectors = []    # untuk cosine
    train_labels = []

    for label in labels:
        folder = os.path.join(train_data, label)
        if not os.path.exists(folder):
            continue
        for file in os.listdir(folder):
            if not file.lower().endswith(".wav"):
                continue
            path = os.path.join(folder, file)
            mfcc = get_mfcc(path, sr, frame_ms, overlap)
            vec = get_feature_vector(mfcc)
            train_mfccs.append(mfcc)
            train_vectors.append(vec)
            train_labels.append(label)
    return train_mfccs, train_vectors, train_labels

# 7. Fungsi uji cosine simiilarity data test ke semua data train
def predict_cosine_all(mfcc_test, train_vectors, train_labels):
    """Uji cosine ke SEMUA file train (bukan rata-rata kelas)."""
    vec_test = get_feature_vector(mfcc_test)
    sims = [cosine_similarity(vec_test, v) for v in train_vectors]
    best_idx = np.argmax(sims)
    return train_labels[best_idx]

# 8. Fungsi uji DTW data test ke semua data train
def predict_dtw_all(mfcc_test, train_mfccs, train_labels):
    """Uji DTW ke SEMUA file train (MFCC mentah)."""
    dists = [dtw_distance(mfcc_test, mf) for mf in train_mfccs]
    best_idx = np.argmin(dists)
    return train_labels[best_idx]

# 9. Loop untuk menampilkna hasil eksperimen
for sr in sampling_rates:
    print(f"===== SAMPLING RATE: {sr} Hz =====")

    for frame_ms in frame_lengths_ms:
        for overlap in overlaps:
            print(f"\n--- Frame: {frame_ms} ms | Overlap: {int(overlap*100)}% ---")

            train_mfccs, train_vectors, train_labels = build_train_features(sr, frame_ms, overlap)
            true_labels, pred_cosine_all, pred_dtw_all = [], [], []

            for label in labels:
                folder = os.path.join(test_data, label)
                if not os.path.exists(folder):
                    continue

                for file in os.listdir(folder):
                    if not file.lower().endswith(".wav"):
                        continue
                    path = os.path.join(folder, file)
                    mfcc_test = get_mfcc(path, sr, frame_ms, overlap)
                    true_labels.append(label)

                    p_cosine = predict_cosine_all(mfcc_test, train_vectors, train_labels)
                    p_dtw = predict_dtw_all(mfcc_test, train_mfccs, train_labels)

                    pred_cosine_all.append(p_cosine)
                    pred_dtw_all.append(p_dtw)

                    if show_detail:
                        print(f"{file:20s} | Asli: {label:10s} | Cosine: {p_cosine:10s} | DTW: {p_dtw:10s}")

            print("\n=== METODE COSINE SIMILARITY ===")
            print(classification_report(true_labels, pred_cosine_all, labels=labels, zero_division=0))
            print("Confusion Matrix Cosine:\n", confusion_matrix(true_labels, pred_cosine_all, labels=labels))

            print("\n=== METODE DTW ===")
            print(classification_report(true_labels, pred_dtw_all, labels=labels, zero_division=0))
            print("Confusion Matrix DTW:\n", confusion_matrix(true_labels, pred_dtw_all, labels=labels))

===== SAMPLING RATE: 8000 Hz =====

--- Frame: 20 ms | Overlap: 25% ---

=== METODE COSINE SIMILARITY ===
              precision    recall  f1-score   support

        maju       0.75      0.60      0.67         5
      mundur       1.00      0.60      0.75         5
    berhenti       0.62      1.00      0.77         5
        kiri       0.67      0.80      0.73         5
       kanan       1.00      0.80      0.89         5

    accuracy                           0.76        25
   macro avg       0.81      0.76      0.76        25
weighted avg       0.81      0.76      0.76        25

Confusion Matrix Cosine:
 [[3 0 1 1 0]
 [1 3 1 0 0]
 [0 0 5 0 0]
 [0 0 1 4 0]
 [0 0 0 1 4]]

=== METODE DTW ===
              precision    recall  f1-score   support

        maju       1.00      1.00      1.00         5
      mundur       1.00      1.00      1.00         5
    berhenti       1.00      1.00      1.00         5
        kiri       1.00      1.00      1.00         5
       kanan       1.0