# **Speech Recognition**

In [11]:
# ===============================================================
#   SPEECH RECOGNITION PIPELINE
#   MULTI-LABEL: buka / tutup + speaker
#   Full features (156) → PCA → Classification
#   + Feature Selection (Information Gain)
# ===============================================================

import os
import numpy as np
import pandas as pd
import librosa
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
# -----------------------------------------------

from google.colab import drive
drive.mount('/content/drive')

DATASET_DIR = "/content/drive/MyDrive/11. Speech Recognition/dataset_renamed"  # ganti path sesuai lokasi dataset
LABEL_FILE   = "/content/drive/MyDrive/11. Speech Recognition/label.csv"


# ===============================================================
# 1. LOAD LABEL FILE
# ===============================================================
labels = pd.read_csv(LABEL_FILE)
print("Jumlah data:", len(labels))

# ===============================================================
# 2. FEATURE EXTRACTION (TOTAL ±156)
# ===============================================================
def extract_features(path):
    """Ekstraksi fitur statistical, temporal, spectral (±156 fitur)."""
    y, sr = librosa.load(path, sr=None)

    # ===== TEMPORAL FEATURES =====
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    energy = np.mean(y ** 2)
    rms = np.mean(librosa.feature.rms(y=y))

    # ===== SPECTRAL FEATURES =====
    spec_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    spec_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    spec_flux = np.mean(np.diff(np.abs(librosa.stft(y))).mean(axis=1))

    # ===== MFCC 20 + ΔMFCC 20 + ΔΔ MFCC 20 (total 60) =====
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    mfcc_mean = np.mean(mfcc, axis=1)

    mfcc_delta = librosa.feature.delta(mfcc)
    mfcc_delta_mean = np.mean(mfcc_delta, axis=1)

    mfcc_delta2 = librosa.feature.delta(mfcc, order=2)
    mfcc_delta2_mean = np.mean(mfcc_delta2, axis=1)

    # ===== CHROMA (12), MEL (40) =====
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = np.mean(chroma, axis=1)

    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    mel_mean = np.mean(mel, axis=1)

    # ===== STATISTICAL FEATURES =====
    mean = np.mean(y)
    std = np.std(y)
    skew = pd.Series(y).skew()
    kurtosis = pd.Series(y).kurt()

    features = np.hstack([
        zcr, energy, rms,
        spec_centroid, spec_bw, spec_rolloff, spec_flux,
        mfcc_mean, mfcc_delta_mean, mfcc_delta2_mean,
        chroma_mean, mel_mean[:40],   # mel truncated to 40 for 156 total
        mean, std, skew, kurtosis
    ])

    return features


# ===============================================================
# 3. LOAD ALL FEATURES
# ===============================================================
from sklearn.preprocessing import LabelEncoder

X = []
y1 = []  # buka/tutup
y2 = []  # speaker

le_speaker = LabelEncoder()
labels['speaker_enc'] = le_speaker.fit_transform(labels['speaker'])  # tory/putra → 0/1

for i, row in labels.iterrows():
    file_path = os.path.join(DATASET_DIR, row['filename'])
    feats = extract_features(file_path)
    X.append(feats)

    y1.append(1 if row['command'] == "buka" else 0)
    y2.append(row['speaker_enc'])  # encode speaker menjadi angka

X = np.array(X)
y1 = np.array(y1)
y2 = np.array(y2)

print("Shape fitur:", X.shape)
print("Mapping speaker:", dict(zip(le_speaker.classes_, le_speaker.transform(le_speaker.classes_))))


# ===============================================================
# 4. SPLIT DATA
# ===============================================================
X_train, X_test, y_train_cmd, y_test_cmd = train_test_split(
    X, y1, test_size=0.2, random_state=42
)

# ===============================================================
# 5. NORMALISASI
# ===============================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# ===============================================================
# 6. EXPERIMENT 1 — FULL FEATURES → PCA → CLASSIFIER
# ===============================================================
print("\n=== EKSPERIMEN 1: PCA 100 components ===")

pca = PCA(n_components=100)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

clf1 = SVC(kernel="rbf")
clf1.fit(X_train_pca, y_train_cmd)
pred1 = clf1.predict(X_test_pca)

print("Akurasi:", accuracy_score(y_test_cmd, pred1))
print(classification_report(y_test_cmd, pred1))

# ===============================================================
# 7. FEATURE SELECTION (INFORMATION GAIN)
# ===============================================================
print("\n=== FEATURE SELECTION: Information Gain ===")

IG = mutual_info_classif(X_train_scaled, y_train_cmd)
ranking = np.argsort(IG)[::-1]  # urut dari IG tertinggi ke rendah

TOP_K = 30  # jumlah fitur terbaik (silakan ubah)
selected_idx = ranking[:TOP_K]

X_train_sel = X_train_scaled[:, selected_idx]
X_test_sel  = X_test_scaled[:, selected_idx]

# ===============================================================
# 8. EXPERIMENT 2 — IG SELECTED FEATURES → CLASSIFIER
# ===============================================================
print("\n=== EKSPERIMEN 2: Seleksi Fitur (IG top-30) ===")

clf2 = SVC(kernel="rbf")
clf2.fit(X_train_sel, y_train_cmd)
pred2 = clf2.predict(X_test_sel)

print("Akurasi:", accuracy_score(y_test_cmd, pred2))
print(classification_report(y_test_cmd, pred2))



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Jumlah data: 201
Shape fitur: (201, 123)
Mapping speaker: {'putra': np.int64(0), 'tory': np.int64(1)}

=== EKSPERIMEN 1: PCA 100 components ===
Akurasi: 0.975609756097561
              precision    recall  f1-score   support

           0       0.95      1.00      0.97        19
           1       1.00      0.95      0.98        22

    accuracy                           0.98        41
   macro avg       0.97      0.98      0.98        41
weighted avg       0.98      0.98      0.98        41


=== FEATURE SELECTION: Information Gain ===

=== EKSPERIMEN 2: Seleksi Fitur (IG top-30) ===
Akurasi: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        22

    accuracy                           1.00        41
   macro avg       1.00      1.00      1.00     