Library


In [14]:
import math
import joblib
import numpy as np
import scipy.signal
import scipy.fftpack
import librosa
import soundfile as sf
import skfuzzy as fuzz
from skfuzzy import control as ctrl
import matplotlib.pyplot as plt
from scipy.signal import resample_poly
import os
import pandas as pd
import json
import pickle
from sklearn.preprocessing import StandardScaler

Konstanta


In [15]:
SAMPLE_RATE = 48000
SEGMENT_DURATION = int(1 * SAMPLE_RATE)
OVERLAP_DURATION = int(0.5 * SAMPLE_RATE)

MAIN_PATH = r"C:\Users\Lulay\Documents\GitHub\Dasar-Kecerdasan-Artificial_Tugas-Besar"
# MAIN_PATH = 

Dir Folder


In [16]:
dir_data_xc = MAIN_PATH + r"\Dataset\xeno-canto"
dir_data_nad = MAIN_PATH + r"\Dataset\noise-audio-data"

Dir Dataset


In [17]:
dir_dataset = MAIN_PATH + r"\Dataset\dataset_labelled.pkl"

# Pra-Pemrosesan


In [18]:
def load_file_audio(path):
    audio, sr = sf.read(path)
    return np.array(audio), sr

def prapemrosesan_downmixing(audio):
    if audio.ndim > 1:
        audio = np.mean(audio, axis=1)
    return audio.astype(np.float32)

def prapemrosesan_resampling(audio, sr):
    if sr == SAMPLE_RATE:
        return audio.copy(), SAMPLE_RATE
    
    ratio = SAMPLE_RATE / sr
    n_samples = int(np.round(len(audio) * ratio))
    
    x_old = np.linspace(0, 1, len(audio))
    x_new = np.linspace(0, 1, n_samples)
    return np.interp(x_new, x_old, audio), SAMPLE_RATE

def prapemrosesan_padding(audio):
    if np.mod(audio.shape[0], SEGMENT_DURATION) != 0:
        padding = SEGMENT_DURATION - (audio.shape[0] % SEGMENT_DURATION)
        audio = np.pad(audio, (0, padding))
    return audio

def prapemrosesan_splitting(audio):
    num_segments = int(np.floor((len(audio) - SEGMENT_DURATION) / OVERLAP_DURATION)) + 1
    segments = []

    for i in range(num_segments):
        start = int(i * OVERLAP_DURATION)
        end = int(start + SEGMENT_DURATION)
        segment = audio[start:end]
        if len(segment) < SEGMENT_DURATION:
            segment = np.pad(segment, (0, SEGMENT_DURATION - len(segment)), mode='constant')
        segments.append(segment)

    return np.array(segments)

# Ekstraksi Fitur


In [19]:
def get_rms(segment):
    return np.sqrt(np.mean(segment ** 2))

def get_zcr(segment):
    return np.sum(np.abs(np.diff(np.signbit(segment)))) / (len(segment) / SAMPLE_RATE)

def get_lms(segment):
    return librosa.feature.melspectrogram(y=segment, sr=SAMPLE_RATE)

# Visualize


In [20]:
def load_and_plot(path, list_result, final_time):
    file_name = os.path.basename(path)
    audio, sr = librosa.load(path, sr=SAMPLE_RATE)
    time_axis = np.linspace(0, len(audio) / sr, len(audio))

    plt.figure(figsize=(20, 4))
    plt.plot(time_axis, audio, label="Amplitude")
    plt.title(f"Audio waveform with loud segments marked - {file_name}") 
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")

    for i in enumerate(list_result):
        start_time = i[1][0]
        end_time = i[1][1]
        plt.axvspan(start_time, end_time, color='red', alpha=0.3)

    plt.xticks(np.arange(0, final_time, 0.5))
    plt.grid(which='both', alpha=0.5)
    plt.xlim(0, final_time)
    plt.ylim(-1, 1)
    plt.tight_layout()
    plt.legend()
    plt.show()

# Load Dataset


In [21]:
with open(f"{MAIN_PATH}/Dataset/dataset_labelled.pkl", "rb") as f:
    dataset_labelled = pickle.load(f)

# Ekstraksi Fitur


In [22]:
dataset_labelled[:3]

[('C:\\Users\\Lulay\\Documents\\GitHub\\Dasar-Kecerdasan-Artificial_Tugas-Besar\\Dataset\\xeno-canto\\19655.mp3',
  [0.0, 1.0],
  2),
 ('C:\\Users\\Lulay\\Documents\\GitHub\\Dasar-Kecerdasan-Artificial_Tugas-Besar\\Dataset\\xeno-canto\\19655.mp3',
  [0.5, 1.5],
  2),
 ('C:\\Users\\Lulay\\Documents\\GitHub\\Dasar-Kecerdasan-Artificial_Tugas-Besar\\Dataset\\xeno-canto\\19655.mp3',
  [1.0, 2.0],
  2)]

In [10]:
dataset_ef = []
audio_cache = ""

for audio_file, timestamp, label in dataset_labelled:
    if audio_file != audio_cache:
        audio_cache = audio_file
        audio, sr = load_file_audio(audio_file)
        audio = prapemrosesan_downmixing(audio)
        audio, sr = prapemrosesan_resampling(audio, sr)
        audio = prapemrosesan_padding(audio)
        segments = prapemrosesan_splitting(audio)

    index = timestamp[0] / 0.5
    segment = segments[int(index)]

    rms = get_rms(segment)
    zcr = get_zcr(segment)
    lms = get_lms(segment)
    
    dataset_ef.append([audio_file, timestamp, label, rms, zcr, lms])

In [23]:
dataset_ef = []
audio_cache = ""

for audio_file, timestamp, label in dataset_labelled:
    if audio_file != audio_cache:
        audio_cache = audio_file
        audio, sr = load_file_audio(audio_file)
        audio = prapemrosesan_downmixing(audio)
        audio, sr = prapemrosesan_resampling(audio, sr)
        audio = prapemrosesan_padding(audio)
        segments = prapemrosesan_splitting(audio)
    
    dataset_ef.append([audio_file, timestamp, label, segments])

# Save


In [11]:
with open(f"{MAIN_PATH}/Dataset/dataset_preprocessed.pkl", "wb") as f:
    pickle.dump(dataset_ef, f)