In [1]:
import os
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm

# Tentukan path ke DATASET AUGMENTASI Anda
augmented_path = "C:\\Dokumen\\PSD\\dataset\\voice_augmented"
categories = ["buka", "tutup"]
all_data = []

print(f"Memulai ekstraksi fitur MFCC dari {augmented_path}...")

def extract_mfcc(file_path):
    """Memuat file audio dan mengekstrak rata-rata MFCC."""
    try:
        audio, sr = librosa.load(file_path, sr=None)
        # Ekstrak 40 MFCC
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        # Ambil rata-ratanya untuk jadi 1 baris fitur
        mfccs_mean = np.mean(mfccs.T, axis=0)
        return mfccs_mean
    except Exception as e:
        print(f"Error memproses {file_path}: {e}")
        return None

# Proses semua 200 file
for category in categories:
    category_path = os.path.join(augmented_path, category)
    files = [f for f in os.listdir(category_path) if f.endswith('.wav')]
    
    for file_name in tqdm(files, desc=f"Memproses {category}"):
        file_path = os.path.join(category_path, file_name)
        features = extract_mfcc(file_path)
        
        if features is not None:
            # Tambahkan fitur dan labelnya
            all_data.append([*features, category])

print("\nEkstraksi fitur selesai.")

# Buat DataFrame Akhir
feature_cols = [f'mfcc_{i+1}' for i in range(40)]
column_names = feature_cols + ['label']
data = pd.DataFrame(all_data, columns=column_names)

# Simpan ke CSV
data.to_csv("voice_features.csv", index=False)
print(f"Berhasil! Data fitur (200 baris) telah disimpan ke voice_features.csv")

Memulai ekstraksi fitur MFCC dari C:\Dokumen\PSD\dataset\voice_augmented...


Memproses buka: 100%|██████████| 100/100 [00:02<00:00, 36.28it/s]
Memproses tutup: 100%|██████████| 100/100 [00:00<00:00, 144.76it/s]


Ekstraksi fitur selesai.
Berhasil! Data fitur (200 baris) telah disimpan ke voice_features.csv



