In [1]:
# -*- coding: utf-8 -*-
"""MFCC.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1HOJPNGj00VgyuDTV7iKrUkJg2ClwOz0F
"""

import numpy as np
import os
import librosa
import librosa.display
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import drive
import glob
import time

drive.mount('/content/drive')

def extract_mfcc(audio_file, plot_dir, n_mfcc=13, n_fft=2048, hop_length=512, n_mels=23):
    # Load the audio file using librosa
    y, sr = librosa.load(audio_file, sr=None)

    max_length = 100

    # Calculate the Mel filterbank
    mel_filterbank = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels)

    # Extract MFCCs with Hanning window and specified hop_length
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length, htk=True, n_mels=n_mels, win_length=hop_length, window='hann')

    # Pad or truncate MFCCs to a consistent length
    if mfcc.shape[1] < max_length:
        pad_width = max_length - mfcc.shape[1]
        mfcc = np.pad(mfcc, ((0, 0), (0, pad_width)), mode='constant')
    elif mfcc.shape[1] > max_length:
        mfcc = mfcc[:, :max_length]

    # Tentukan label sesuai dengan nama file
    filename = os.path.basename(audio_file)
    label = filename.split(".")[0]  # Ambil nama file tanpa ekstensi sebagai label

    # Plot MFCC
    plt.figure()
    librosa.display.specshow(mfcc, x_axis='time')
    plt.colorbar()
    plt.title(f"{label}")
    plt.xlabel("Time")
    plt.ylabel("MFCC Coefficients")

    # Simpan plot ke direktori yang ditentukan
    plot_filename = os.path.join(plot_dir, f"{label}.png")
    plt.savefig(plot_filename)
    plt.close()  # Tutup plot setelah disimpan

    return mfcc, label

def main():
    # Directory containing audio files
    start_time = time.time()
    audio_dir = "/content/drive/My Drive/Semester 5/Pemrosesan Suara/project/new_dataset/*"
    output_dir = "/content/drive/My Drive/Semester 5/Pemrosesan Suara/project/new_mfcc1"  # Output directory

    mfcc_list = []  # Initialize a list to store all MFCC features
    labels = []  # Initialize a list to store labels

    # Mencocokkan semua file .wav dalam direktori
    audio_files = glob.glob(os.path.join(audio_dir, "*.wav"))

    # Iterate over each audio file
    for audio_file in audio_files:
        mfcc, label = extract_mfcc(audio_file, plot_dir=output_dir)
        mfcc_list.append(mfcc)
        labels.append(label)

    # Pastikan mfcc_list tidak kosong sebelum mencoba menggabungkannya
    if mfcc_list:
        # Combine all MFCC features and labels into one DataFrame
        mfcc_df = pd.DataFrame(np.vstack(mfcc_list))
        label_df = pd.DataFrame(labels, columns=["Label"])
        df = pd.concat([label_df, mfcc_df], axis=1)

        # Data as you provided
        data = {
            "Label": labels,
            "FilePath": [os.path.join(output_dir, os.path.basename(audio_file).replace(".wav", ".png")) for audio_file in audio_files]
        }

        # Create a DataFrame
        df_data = pd.DataFrame(data)

        # Split the "Label" column based on underscores and keep the first part
        df_data["Label"] = df_data["Label"].str.split("_").str[0]

        # Specify the output CSV file name
        csv_filename = os.path.join(output_dir, "labels_and_file_paths.csv")

        # Save the DataFrame to a single CSV file
        df_data.to_csv(csv_filename, index=False)

        print("Data sudah disimpan di csv")

        end_time = time.time()  # Catat waktu akhir kompilasi
        execution_time = end_time - start_time  # Hitung waktu kompilasi

        print(f"Execution time: {execution_time:.2f} seconds")

if __name__ == "__main__":
    main()


Mounted at /content/drive
Data sudah disimpan di csv
Execution time: 1948.39 seconds
