In [3]:
import librosa
import librosa.display
import numpy as np
import pandas as pd
import pywt
import os

# Constants for frame size and overlap
N_MFCC = 13
N_FFT = 512
HOP_LENGTH = 256

# Function for denoising using wavelet
def denoise_wavelet(audio, threshold=1e-6, preserve_threshold=0.05):
    coeffs = pywt.wavedec(audio, 'db1', level=6)
    preserved_coeffs = [coeffs[0]]

    for i in range(1, len(coeffs)):
        thresholded_coeff = pywt.threshold(coeffs[i], threshold, mode='soft')
        if np.sum(np.abs(thresholded_coeff)) > preserve_threshold:
            preserved_coeffs.append(thresholded_coeff)
        else:
            preserved_coeffs.append(np.zeros_like(thresholded_coeff))

    audio_denoised = pywt.waverec(preserved_coeffs, 'db1')

    return audio_denoised

# Function to convert audio to digital values
def audio_to_digital(audio_file):
    audio, sr = librosa.load(audio_file, sr=16000, res_type='kaiser_fast')
    digital_values = (audio / np.max(np.abs(audio))) * 1023
    return digital_values

# Function for audio preprocessing
def preprocess_audio(file_path, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH):
    digital_values = audio_to_digital(file_path)
    amplitude = digital_values / 1023
    audio_denoised = denoise_wavelet(amplitude, preserve_threshold=0.05)

    mfccs = librosa.feature.mfcc(y=audio_denoised, sr=16000, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    rmse = librosa.feature.rms(y=audio_denoised, frame_length=n_fft, hop_length=hop_length)
    spectral_centroid = librosa.feature.spectral_centroid(y=audio_denoised, sr=16000, n_fft=n_fft, hop_length=hop_length)

    mean_mfcc = np.mean(mfccs, axis=1)
    mean_rmse = np.mean(rmse)
    mean_centroid = np.mean(spectral_centroid)

    return mean_mfcc, mean_rmse, mean_centroid

# Function to process all audio files in a directory
def process_all_files(directory_path):
    all_results_df = pd.DataFrame()

    for file_name in os.listdir(directory_path):
        file_path = os.path.join(directory_path, file_name)

        if os.path.isfile(file_path) and file_name.endswith('.wav'):
            label = 1 if "scream" in file_name.lower() else 0

            mean_mfcc, mean_rmse, mean_centroid = preprocess_audio(file_path)

            result_dict = {
                'file': file_name,
                'Label': label,
                **{f'MFCC_{j}': mean_mfcc[j] for j in range(len(mean_mfcc))},
                'Mean Root-Mean-Square Energy': mean_rmse,
                'Mean Spectral Centroid': mean_centroid
            }
            result_df = pd.DataFrame([result_dict])
            all_results_df = pd.concat([all_results_df, result_df], ignore_index=True)

        else:
            print(f'Skipping non-WAV file: {file_name}')

    all_results_df.to_csv('feature_train.csv', index=False)

directory_path = 'suara/data/sound/train'
process_all_files(directory_path)

In [4]:
import librosa
import librosa.display
import numpy as np
import pandas as pd
import pywt
import os

# Constants for frame size and overlap
N_MFCC = 13
N_FFT = 512
HOP_LENGTH = 256

# Function for denoising using wavelet
def denoise_wavelet(audio, threshold=1e-6, preserve_threshold=0.05):
    coeffs = pywt.wavedec(audio, 'db1', level=6)
    preserved_coeffs = [coeffs[0]]

    for i in range(1, len(coeffs)):
        thresholded_coeff = pywt.threshold(coeffs[i], threshold, mode='soft')
        if np.sum(np.abs(thresholded_coeff)) > preserve_threshold:
            preserved_coeffs.append(thresholded_coeff)
        else:
            preserved_coeffs.append(np.zeros_like(thresholded_coeff))

    audio_denoised = pywt.waverec(preserved_coeffs, 'db1')

    return audio_denoised

# Function to convert audio to digital values
def audio_to_digital(audio_file):
    audio, sr = librosa.load(audio_file, sr=16000, res_type='kaiser_fast')
    digital_values = (audio / np.max(np.abs(audio))) * 1023
    return digital_values

# Function for audio preprocessing
def preprocess_audio(file_path, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH):
    digital_values = audio_to_digital(file_path)
    amplitude = digital_values / 1023
    audio_denoised = denoise_wavelet(amplitude, preserve_threshold=0.05)

    mfccs = librosa.feature.mfcc(y=audio_denoised, sr=16000, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    rmse = librosa.feature.rms(y=audio_denoised, frame_length=n_fft, hop_length=hop_length)
    spectral_centroid = librosa.feature.spectral_centroid(y=audio_denoised, sr=16000, n_fft=n_fft, hop_length=hop_length)

    mean_mfcc = np.mean(mfccs, axis=1)
    mean_rmse = np.mean(rmse)
    mean_centroid = np.mean(spectral_centroid)

    return mean_mfcc, mean_rmse, mean_centroid

# Function to process all audio files in a directory
def process_all_files(directory_path):
    all_results_df = pd.DataFrame()

    for file_name in os.listdir(directory_path):
        file_path = os.path.join(directory_path, file_name)

        if os.path.isfile(file_path) and file_name.endswith('.wav'):
            label = 1 if "scream" in file_name.lower() else 0

            mean_mfcc, mean_rmse, mean_centroid = preprocess_audio(file_path)

            result_dict = {
                'file': file_name,
                'Label': label,
                **{f'MFCC_{j}': mean_mfcc[j] for j in range(len(mean_mfcc))},
                'Mean Root-Mean-Square Energy': mean_rmse,
                'Mean Spectral Centroid': mean_centroid
            }
            result_df = pd.DataFrame([result_dict])
            all_results_df = pd.concat([all_results_df, result_df], ignore_index=True)

        else:
            print(f'Skipping non-WAV file: {file_name}')

    all_results_df.to_csv('feature_val.csv', index=False)

directory_path = 'suara/data/sound/val'
process_all_files(directory_path)