In [None]:
import numpy as np
import os
import scipy.io.wavfile as wav
from scipy.fftpack import dct
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import drive
import glob
import time
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def erb_bandwidth(f):
    return 24.7 * (4.37e-3 * f + 1.0)

In [None]:
def gammatone_filter(f, f_center, sr, NFFT):
    t = np.linspace(0, 1, sr)[:NFFT // 2 + 1]
    gamma_tone = np.power(t, 3) * np.exp(-2 * np.pi * erb_bandwidth(f_center) * t) * np.cos(2 * np.pi * f * t)
    return np.abs(np.fft.rfft(gamma_tone, NFFT)[:NFFT // 2 + 1])

In [None]:
def make_gammatone_filterbank(nfilt, NFFT, sr):
    fmin = 20
    fmax = sr / 2
    fcenter = np.geomspace(fmin, fmax, nfilt)
    gt_filters = np.zeros((nfilt, NFFT // 2 + 1))
    for i in range(nfilt):
        gt_filters[i] = gammatone_filter(fcenter[i], fcenter[i], sr, NFFT)
    return gt_filters

In [None]:
def extract_gfcc(audio_file, plot_dir, n_gfcc=13):
    # Load the audio file using scipy
    sr, y = wav.read(audio_file)

    # Pre-emphasis
    pre_emphasis = 0.97
    emphasized_signal = np.append(y[0], y[1:] - pre_emphasis * y[:-1])

    # Framing
    frame_size = 0.025
    frame_stride = 0.01
    frame_length, frame_step = frame_size * sr, frame_stride * sr
    signal_length = len(emphasized_signal)
    frame_length = int(round(frame_length))
    frame_step = int(round(frame_step))
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step))

    pad_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(emphasized_signal, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + np.tile(
        np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]

    # Apply Hanning window
    frames *= np.hanning(frame_length)

    # Fourier Transform and Power Spectrum
    NFFT = 512
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))
    pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2))

    # Gammatone Filter Bank
    nfilt = 40
    gt_filters = make_gammatone_filterbank(nfilt, NFFT, sr)
    filter_banks = np.dot(pow_frames, gt_filters.T)
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)
    filter_banks = 20 * np.log10(filter_banks)

    # DCT to get GFCC
    gfcc = dct(filter_banks, type=2, axis=1, norm='ortho')[:, 1: (n_gfcc + 1)]

    # Tentukan label sesuai dengan nama file
    filename = os.path.basename(audio_file)
    label = filename.split(".")[0]  # Ambil nama file tanpa ekstensi sebagai label

    # Plot GFCC with additional parameters for smoother visualization
        # Plot GFCC
    plt.figure()
    plt.imshow(gfcc, cmap='viridis', origin='lower', aspect='auto')
    plt.title(f"GFCC for {label}")
    plt.xlabel("Frames")
    plt.ylabel("GFCC Coefficients")
    plt.colorbar()

    # Simpan plot ke direktori yang ditentukan
    plot_filename = os.path.join(plot_dir, f"{label}.png")
    plt.savefig(plot_filename)
    plt.close()  # Tutup plot setelah disimpan

    return gfcc, label

In [None]:
def main():
    # Directory containing audio files
    start_time = time.time()
    audio_dir = "/content/drive/My Drive/Semester 5/Pemrosesan Suara/project/new_dataset/*"
    output_dir = "/content/drive/My Drive/Semester 5/Pemrosesan Suara/project/bismillah_fix"  # Output directory

    gfcc_list = []  # Initialize a list to store all GFCC features
    labels = []  # Initialize a list to store labels

    # Mencocokkan semua file .wav dalam direktori
    audio_files = glob.glob(os.path.join(audio_dir, "*.wav"))

    # Iterate over each audio file
    for audio_file in audio_files:
        gfcc, label = extract_gfcc(audio_file, n_gfcc=13, plot_dir=output_dir)
        gfcc_list.append(gfcc)
        labels.append(label)

    # Pastikan gfcc_list tidak kosong sebelum mencoba menggabungkannya
    if gfcc_list:
        # Combine all GFCC features and labels into one DataFrame
        gfcc_df = pd.DataFrame(np.vstack(gfcc_list))
        label_df = pd.DataFrame(labels, columns=["Label"])
        df = pd.concat([label_df, gfcc_df], axis=1)

        # Data as you provided
        data = {
            "Label": labels,
            "FilePath": [os.path.join(output_dir, os.path.basename(audio_file).replace(".wav", ".png"))
            for audio_file in audio_files]
        }

        # Create a DataFrame
        df_data = pd.DataFrame(data)

        # Split the "Label" column based on underscores and keep the first part
        df_data["Label"] = df_data["Label"].str.split("_").str[0]

        # Specify the output CSV file name
        csv_filename = os.path.join(output_dir, "labels_and_file_paths.csv")

        # Save the DataFrame to a single CSV file
        df_data.to_csv(csv_filename, index=False)

        print("Data sudah disimpan di csv")

        end_time = time.time()  # Catat waktu akhir kompilasi
        execution_time = end_time - start_time  # Hitung waktu kompilasi

        print(f"Execution time: {execution_time:.2f} seconds")

        # Create a DataFrame containing labels and file paths

if __name__ == "__main__":
    main()


Data sudah disimpan di csv
Execution time: 1829.07 seconds
