<a href="https://colab.research.google.com/github/Aisyaululasmi/Assignment/blob/main/PAP_Tugas2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# === Script Konversi M4A ke WAV di Google Colab ===

import os
import glob
import subprocess

# --- Mount Google Drive (Jika dataset ada di Drive) ---
from google.colab import drive
drive.mount('/content/drive')
# Sesuaikan path jika Anda menggunakan Drive
INPUT_ROOT_DIR = "/content/drive/MyDrive/dataset_perintah_m4a"
OUTPUT_ROOT_DIR = "/content/drive/MyDrive/dataset_perintah_wav"

# --- Konfigurasi (Jika dataset diunggah langsung ke sesi Colab) ---
# Pastikan Anda telah mengunggah folder 'dataset_perintah_m4a' ke root sesi Colab
#INPUT_ROOT_DIR = "dataset_perintah_m4a"
#OUTPUT_ROOT_DIR = "dataset_perintah" # Folder ini akan dibuat di root sesi Colab

TARGET_SAMPLE_RATE = 16000
TARGET_CHANNELS = 1
TARGET_BIT_DEPTH = "s16le"

COMMANDS = ["maju", "mundur", "berhenti", "kiri", "kanan"]

# --- Fungsi Konversi (Sama seperti sebelumnya) ---
def convert_m4a_to_wav(input_m4a_path, output_wav_path, sample_rate, channels, bit_depth):
    command = [
        "ffmpeg",
        "-i", input_m4a_path,
        "-acodec", f"pcm_{bit_depth}",
        "-ac", str(channels),
        "-ar", str(sample_rate),
        output_wav_path
    ]

    try:
        subprocess.run(command, check=True, capture_output=True, text=True)
        print(f"  Berhasil mengkonversi: {os.path.basename(input_m4a_path)} -> {os.path.basename(output_wav_path)}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"  Gagal mengkonversi '{os.path.basename(input_m4a_path)}'. Error: {e.stderr.strip()}")
        return False
    except FileNotFoundError:
        print("  Error: FFmpeg tidak ditemukan. (Ini seharusnya tidak terjadi di Colab)")
        return False

# --- Main Program (Sama seperti sebelumnya) ---
if __name__ == "__main__":
    print("--- Memulai Konversi M4A ke WAV ---")

    os.makedirs(OUTPUT_ROOT_DIR, exist_ok=True)

    total_files_processed = 0
    total_files_converted = 0

    for command_name in COMMANDS:
        input_command_path = os.path.join(INPUT_ROOT_DIR, command_name)
        output_command_path = os.path.join(OUTPUT_ROOT_DIR, command_name)

        os.makedirs(output_command_path, exist_ok=True)

        print(f"\nMemproses perintah: '{command_name}'...")

        m4a_files = glob.glob(os.path.join(input_command_path, "*.m4a"))

        if not m4a_files:
            print(f"  Tidak ada file .m4a ditemukan di '{input_command_path}'. Lewati.")
            continue

        for m4a_file in m4a_files:
            total_files_processed += 1
            base_name = os.path.basename(m4a_file)
            wav_file_name = os.path.splitext(base_name)[0] + ".wav"
            output_wav_path = os.path.join(output_command_path, wav_file_name)

            if convert_m4a_to_wav(m4a_file, output_wav_path, TARGET_SAMPLE_RATE, TARGET_CHANNELS, TARGET_BIT_DEPTH):
                total_files_converted += 1

    print("\n--- Konversi Selesai ---")
    print(f"Total file M4A yang ditemukan dan diproses: {total_files_processed}")
    print(f"Total file yang berhasil dikonversi ke WAV: {total_files_converted}")
    print(f"File WAV disimpan di folder: '{OUTPUT_ROOT_DIR}'")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
--- Memulai Konversi M4A ke WAV ---

Memproses perintah: 'maju'...
  Berhasil mengkonversi: maju_02.m4a -> maju_02.wav
  Berhasil mengkonversi: maju_06.m4a -> maju_06.wav
  Berhasil mengkonversi: maju_03.m4a -> maju_03.wav
  Berhasil mengkonversi: maju_10.m4a -> maju_10.wav
  Berhasil mengkonversi: maju_08.m4a -> maju_08.wav
  Berhasil mengkonversi: maju_09.m4a -> maju_09.wav
  Berhasil mengkonversi: maju_07.m4a -> maju_07.wav
  Berhasil mengkonversi: maju_05.m4a -> maju_05.wav
  Berhasil mengkonversi: maju_04.m4a -> maju_04.wav
  Berhasil mengkonversi: maju_01.m4a -> maju_01.wav

Memproses perintah: 'mundur'...
  Berhasil mengkonversi: mundur_08.m4a -> mundur_08.wav
  Berhasil mengkonversi: mundur_09.m4a -> mundur_09.wav
  Berhasil mengkonversi: mundur_10.m4a -> mundur_10.wav
  Berhasil mengkonversi: mundur_05.m4a -> mundur_05.wav
  Berhasil mengkonversi: mu

In [3]:
!pip install wavio

Collecting wavio
  Downloading wavio-0.0.9-py3-none-any.whl.metadata (5.7 kB)
Downloading wavio-0.0.9-py3-none-any.whl (9.5 kB)
Installing collected packages: wavio
Successfully installed wavio-0.0.9


In [33]:
import numpy as np
from scipy.fft import fft, fftfreq
from scipy.signal.windows import hann
import wavio
import os
import glob
import random
from sklearn.metrics import confusion_matrix, classification_report

# Konfigurasi
SAMPLE_RATE = 16000
FRAME_SIZE = 512
OVERLAP_FACTOR = 0.5
N_FFT = FRAME_SIZE

ENERGY_THRESHOLD = 0.01
MIN_SPEECH_DURATION = SAMPLE_RATE * 0.3

DATASET_ROOT_DIR = "/content/drive/MyDrive/dataset_perintah_wav"

TRAIN_RATIO = 0.8
COMMANDS = ["maju", "mundur", "berhenti", "kiri", "kanan"]

# Pre-Processing Data
def normalize_audio(audio_data):
  """Normalisasi amplitudo audio ke rentang -1 hingga 1."""
  max_val = np.max(np.abs(audio_data))
  if max_val > 0:
    return audio_data / max_val
  return audio_data

def voice_activity_detection(audio_data, sr, energy_threshold, min_duration_samples):
  """
  Deteksi awal dan akhir ucapan sederhana berdasarkan energi.
  Mengembalikan segmen audio yang berisi ucapan.
  """
  frame_length = int(sr * 0.02) # 20ms frame
  hop_length = int(sr * 0.01)   # 10ms hop

  energies = []
  for i in range(0, len(audio_data) - frame_length, hop_length):
    frame = audio_data[i:i + frame_length]
    energy = np.sum(frame**2)
    energies.append(energy)

  if not energies:
    return np.array([])

  energies = np.array(energies)
  max_energy = np.max(energies)
  if max_energy == 0:
    return np.array([])

  speech_frames = energies > energy_threshold * max_energy

  in_speech = False
  start_idx = -1
  end_idx = -1

  for i, is_speech in enumerate(speech_frames):
    if is_speech and not in_speech:
      in_speech = True
      start_idx = i * hop_length
    elif not is_speech and in_speech:
      in_speech = False
      end_idx = (i * hop_length) + frame_length
      if (end_idx - start_idx) >= min_duration_samples:
        return audio_data[start_idx:end_idx]

  if in_speech and start_idx != -1 and (len(audio_data) - start_idx) >= min_duration_samples:
    return audio_data[start_idx:]

  return np.array([])

# Ekstraksi Fitur
def extract_fft_spectrum(audio_segment, sr, frame_size, n_fft, overlap_factor):
  """Ekstraksi spektrum daya rata-rata dari segmen audio."""
  if len(audio_segment) < frame_size:
    return np.array([])

  hop_size = int(frame_size * (1 - overlap_factor))

  xf = fftfreq(n_fft, 1 / sr)[:n_fft // 2]

  spectra = []
  window = hann(frame_size)

  for i in range(0, len(audio_segment) - frame_size + 1, hop_size):
    frame = audio_segment[i : i + frame_size]
    windowed_frame = frame * window
    yf = fft(windowed_frame, n=n_fft)
    power_spectrum = np.abs(yf[:n_fft // 2])**2
    spectra.append(power_spectrum)

  if not spectra:
    return np.array([])

  mean_spectrum = np.mean(spectra, axis=0)

  return mean_spectrum / np.sum(mean_spectrum) if np.sum(mean_spectrum) > 0 else mean_spectrum

# Data Splitting
def load_and_split_dataset(commands, dataset_root_dir, train_ratio):
  """Memuat semua file WAV, dan membaginya menjadi set pelatihan dan pengujian."""

  print("\nData Splitting")
  print("==============")

  dataset_split = {cmd: {'train': [], 'test': []} for cmd in commands}

  for command_name in commands:
    command_path = os.path.join(dataset_root_dir, command_name)
    if not os.path.isdir(command_path):
      print(f"Direktori '{command_path}' tidak ditemukan.")
      continue

    wav_files = glob.glob(os.path.join(command_path, "*.wav"))
    if not wav_files:
      print(f"Tidak ada file WAV ditemukan di '{command_path}'.")
      continue

    random.shuffle(wav_files)

    num_train = int(len(wav_files) * train_ratio)

    dataset_split[command_name]['train'] = wav_files[:num_train]
    dataset_split[command_name]['test'] = wav_files[num_train:]

    print(f"Perintah '{command_name}' : {len(dataset_split[command_name]['train'])} training, {len(dataset_split[command_name]['test'])} testing.")

  return dataset_split

# Training Model (dari training set)
def train_model_from_split(training_data_paths_by_command):
  """Melatih model menggunakan data training."""

  print("\nTraining Model")
  print("==============")

  for command_name, wav_paths in training_data_paths_by_command.items():
    if not wav_paths:
      print(f"Tidak ada data pelatihan untuk perintah '{command_name}'.")
      continue

    print(f"Memproses {len(wav_paths)} sampel pelatihan untuk perintah : '{command_name}'...")
    all_spectra_for_command = []
    for wav_file in wav_paths:
      try:
        audio_obj = wavio.read(wav_file)
        audio_data = audio_obj.data.flatten()

        if audio_obj.rate != SAMPLE_RATE:
          print(f"Sample rate file '{wav_file}' ({audio_obj.rate} Hz) tidak cocok.")

        audio_data = normalize_audio(audio_data)
        speech_segment = voice_activity_detection(audio_data, SAMPLE_RATE, ENERGY_THRESHOLD, MIN_SPEECH_DURATION)

        if len(speech_segment) == 0:
          continue

        spectrum = extract_fft_spectrum(speech_segment, SAMPLE_RATE, FRAME_SIZE, N_FFT, OVERLAP_FACTOR)

        if len(spectrum) == 0:
          continue

        all_spectra_for_command.append(spectrum)

      except Exception as e:
        print(f"Terjadi kesalahan saat memproses '{wav_file}' : {e}. Dilewati.")
        continue

    if not all_spectra_for_command:
      print(f"Tidak ada sampel yang valid untuk perintah '{command_name}'. Pola referensi tidak dibuat.")
      continue

    REFERENCE_PATTERNS[command_name] = np.mean(all_spectra_for_command, axis=0)
    print(f"Pola referensi untuk '{command_name}' telah dibuat dari {len(all_spectra_for_command)} sampel valid.")

# Recognition
def recognize_command(audio_data_to_recognize):
  print("Menganalisis ucapan...")

  audio_data_to_recognize = normalize_audio(audio_data_to_recognize)
  speech_segment = voice_activity_detection(audio_data_to_recognize, SAMPLE_RATE, ENERGY_THRESHOLD, MIN_SPEECH_DURATION)

  if len(speech_segment) == 0:
    return "Tidak dikenali (tidak ada ucapan)", float('inf')

  input_spectrum = extract_fft_spectrum(speech_segment, SAMPLE_RATE, FRAME_SIZE, N_FFT, OVERLAP_FACTOR)

  if len(input_spectrum) == 0:
    return "Tidak dikenali (ucapan terlalu pendek)", float('inf')

  best_match_command = "Tidak dikenali"
  min_distance = float('inf')

  for command, ref_spectrum in REFERENCE_PATTERNS.items():
    if len(ref_spectrum) == 0:
      continue

    if len(input_spectrum) != len(ref_spectrum):
      continue

    distance = np.linalg.norm(input_spectrum - ref_spectrum)

    if distance < min_distance:
      min_distance = distance
      best_match_command = command

  DISTANCE_THRESHOLD = 0.5 # Sesuaikan ini berdasarkan pengujian Anda
  if min_distance > DISTANCE_THRESHOLD:
    return "Tidak dikenali (jarak terlalu jauh)", min_distance

  return best_match_command, min_distance

# --- Fungsi Evaluasi Model pada Set Pengujian ---
def evaluate_model(testing_data_paths_by_command, commands):
  """
  Mengevaluasi model pada data pengujian dan menghitung akurasi,
  confusion matrix, dan classification report.
  """

  print("\nEvaluasi Model")
  print("==============")

  true_labels = []
  predicted_labels = []
  total_correct = 0
  total_tested = 0

  for true_command, wav_paths in testing_data_paths_by_command.items():
    if not wav_paths:
      print(f"Tidak ada data pengujian untuk perintah '{true_command}'.")
      continue

    print(f"Menguji {len(wav_paths)} sampel untuk perintah : '{true_command}'")
    for wav_file in wav_paths:
      total_tested += 1
      try:
        audio_obj = wavio.read(wav_file)
        test_audio = audio_obj.data.flatten()

        if audio_obj.rate != SAMPLE_RATE:
          pass

        recognized_cmd, _ = recognize_command(test_audio)

        true_labels.append(true_command)
        predicted_labels.append(recognized_cmd)

        if recognized_cmd == true_command:
          total_correct += 1
          print(f"  '{os.path.basename(wav_file)}' : BENAR (Dikenali sebagai '{recognized_cmd}')")
        else:
          print(f"  '{os.path.basename(wav_file)}' : SALAH (Asli: '{true_command}', Dikenali: '{recognized_cmd}')")
      except Exception as e:
        print(f"  Terjadi kesalahan saat menguji '{wav_file}': {e}")
        true_labels.append(true_command)
        predicted_labels.append("Processing Error")

    print("-----------------------------------------------------------------------")

  if total_tested == 0:
    print("Tidak ada data pengujian yang diproses.")
    return 0.0

  accuracy = (total_correct / total_tested) * 100
  print(f"\nHasil Evaluasi")
  print("==============")
  print(f"Total Sampel yang Diuji: {total_tested}")
  print(f"Jumlah Benar: {total_correct}")
  print(f"Akurasi: {accuracy:.2f}%")

  # Confusion Matrix dan Classification Report
  print(f"\nConfusion Matrix")
  print("================")
  all_labels = sorted(list(set(true_labels + predicted_labels)))
  if "Processing Error" in all_labels:
      all_labels.remove("Processing Error")
      all_labels.append("Processing Error")

  cm = confusion_matrix(true_labels, predicted_labels, labels=all_labels)
  print(cm)

  print(f"\nClassification Report")
  print("=====================")
  target_names = sorted(list(set(true_labels).union(set(predicted_labels))))
  if "Processing Error" in target_names and "Processing Error" not in commands:
      target_names.remove("Processing Error")
  if "Processing Error" in predicted_labels and "Processing Error" not in target_names:
      target_names.append("Processing Error")


  report_target_names = sorted(list(set(commands).union(set(predicted_labels))))
  if "Processing Error" in report_target_names and "Processing Error" not in commands:
      report_target_names.remove("Processing Error")
  if "Processing Error" in predicted_labels and "Processing Error" not in report_target_names:
      report_target_names.append("Processing Error")

  print(classification_report(true_labels, predicted_labels, labels=report_target_names, zero_division=0))

  return accuracy

# Main Program
if __name__ == "__main__":
  # 1. Muat dan bagi dataset
  dataset_split = load_and_split_dataset(COMMANDS, DATASET_ROOT_DIR, TRAIN_RATIO)

  # Pisahkan jalur file untuk pelatihan dan pengujian
  train_data_paths = {cmd: split_data['train'] for cmd, split_data in dataset_split.items()}
  test_data_paths = {cmd: split_data['test'] for cmd, split_data in dataset_split.items()}

  # 2. Latih model menggunakan data pelatihan
  train_model_from_split(train_data_paths)

  if not REFERENCE_PATTERNS:
    print("\nTidak ada pola referensi yang berhasil dibuat. Tidak dapat melanjutkan.")
  else:
    print("\n=== Pelatihan Selesai. Pola Referensi Dibuat ===")
    print("Pola referensi yang tersedia :", list(REFERENCE_PATTERNS.keys()))

    # 3. Evaluasi model menggunakan data pengujian
    evaluate_model(test_data_paths, COMMANDS)


Data Splitting
Perintah 'maju' : 8 training, 2 testing.
Perintah 'mundur' : 8 training, 2 testing.
Perintah 'berhenti' : 8 training, 2 testing.
Perintah 'kiri' : 8 training, 2 testing.
Perintah 'kanan' : 8 training, 2 testing.

Training Model
Memproses 8 sampel pelatihan untuk perintah : 'maju'...
Pola referensi untuk 'maju' telah dibuat dari 8 sampel valid.
Memproses 8 sampel pelatihan untuk perintah : 'mundur'...
Pola referensi untuk 'mundur' telah dibuat dari 8 sampel valid.
Memproses 8 sampel pelatihan untuk perintah : 'berhenti'...
Pola referensi untuk 'berhenti' telah dibuat dari 8 sampel valid.
Memproses 8 sampel pelatihan untuk perintah : 'kiri'...
Pola referensi untuk 'kiri' telah dibuat dari 8 sampel valid.
Memproses 8 sampel pelatihan untuk perintah : 'kanan'...
Pola referensi untuk 'kanan' telah dibuat dari 8 sampel valid.

=== Pelatihan Selesai. Pola Referensi Dibuat ===
Pola referensi yang tersedia : ['maju', 'mundur', 'berhenti', 'kiri', 'kanan']

Evaluasi Model
Menguji