Library


In [68]:
import math
import joblib
import numpy as np
import scipy.signal
import scipy.fftpack
import librosa
import soundfile as sf
import skfuzzy as fuzz
from skfuzzy import control as ctrl
import matplotlib.pyplot as plt
from scipy.signal import resample_poly

Konstanta


In [69]:
SAMPLE_RATE = 48000
SEGMENT_DURATION = int(1 * SAMPLE_RATE)
OVERLAP_DURATION = int(0.5 * SAMPLE_RATE)

# Pra-Pemrosesan


In [70]:
def load_file_audio(path):
    audio, sr = sf.read(path)
    return np.array(audio), sr

def prapemrosesan_downmixing(audio):
    if audio.ndim > 1:
        audio = np.mean(audio, axis=1)
    return audio.astype(np.float32)

def prapemrosesan_resampling(audio, sr):
    if sr == SAMPLE_RATE:
        return audio.copy(), SAMPLE_RATE
    
    ratio = SAMPLE_RATE / sr
    n_samples = int(np.round(len(audio) * ratio))
    
    x_old = np.linspace(0, 1, len(audio))
    x_new = np.linspace(0, 1, n_samples)
    return np.interp(x_new, x_old, audio), SAMPLE_RATE

def prapemrosesan_padding(audio):
    if np.mod(audio.shape[0], SEGMENT_DURATION) != 0:
        padding = SEGMENT_DURATION - (audio.shape[0] % SEGMENT_DURATION)
        audio = np.pad(audio, (0, padding))
    return audio

def prapemrosesan_splitting(audio):
    num_segments = int(np.floor((len(audio) - SEGMENT_DURATION) / OVERLAP_DURATION)) + 1
    segments = []

    for i in range(num_segments):
        start = int(i * OVERLAP_DURATION)
        end = int(start + SEGMENT_DURATION)
        segment = audio[start:end]
        if len(segment) < SEGMENT_DURATION:
            segment = np.pad(segment, (0, SEGMENT_DURATION - len(segment)), mode='constant')
        segments.append(segment)

    return np.array(segments)

# Ekstraksi Fitur


In [71]:
def get_rms(segment):
    return np.sqrt(np.mean(segment ** 2))

def get_zcr(segment):
    return np.sum(np.abs(np.diff(np.signbit(segment)))) / SAMPLE_RATE

def get_lms(segment):
    return librosa.feature.melspectrogram(y=segment, sr=SAMPLE_RATE)

# Ekstraksi Data


In [135]:
list_segments_xc = [
    ["19655.mp3", [1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [3.0, 4.0], [3.5, 4.5], [5.0, 6.0], [8.5, 9.5], [9.0, 10.0], [9.5, 10.5], [10.0, 11.0], [10.5, 11.5], [14.0, 15.0], [14.5, 15.5], [15.0, 16.0], [15.5, 16.5], [16.0, 17.0], [16.5, 17.5]],
    ["90809.mp3", [0.0, 1.0], [0.5, 1.5], [1.0, 2.0], [1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [3.0, 4.0], [3.5, 4.5], [4.0, 5.0], [4.5, 5.5], [5.0, 6.0], [5.5, 6.5], [6.0, 7.0], [6.5, 7.5], [7.0, 8.0], [7.5, 8.5], [8.0, 9.0], [8.5, 9.5], [9.0, 10.0], [9.5, 10.5], [10.0, 11.0], [10.5, 11.5], [11.0, 12.0], [11.5, 12.5], [12.0, 13.0], [12.5, 13.5], [13.0, 14.0], [13.5, 14.5], [14.0, 15.0], [14.5, 15.5], [15.0, 16.0], [15.5, 16.5], [16.0, 17.0], [16.5, 17.5], [17.0, 18.0], [17.5, 18.5], [18.0, 19.0], [18.5, 19.5], [19.0, 20.0], [19.5, 20.5], [20.0, 21.0], [20.5, 21.5], [21.0, 22.0], [21.5, 22.5]],
    ["151761.mp3", [0.5, 1.5], [1.0, 2.0], [1.5, 2.5], [11.0, 12.0], [11.5, 12.5], [12.0, 13.0], [12.5, 13.5], [13.0, 14.0], [13.5, 14.5], [21.0, 22.0], [21.5, 22.5], [22.0, 23.0]],
    ["193835.mp3", [1.5, 2.5], [3.0, 4.0], [4.5, 5.5], [7.5, 8.5], [9.5, 10.5], [13.5, 14.5], [14.0, 15.0], [14.5, 15.5], [18.0, 19.0], [19.5, 20.5], [21.0, 22.0]],
    ["279210.mp3", [0.0, 1.0], [0.5, 1.5], [1.0, 2.0], [1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [15.0, 16.0], [15.5, 16.5], [16.0, 17.0], [16.5, 17.5], [17.0, 18.0]],
    ["282469.mp3", [0.5, 1.5], [1.0, 2.0], [1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [4.5, 5.5], [8.0, 9.0], [9.5, 10.5], [10.0, 11.0], [10.5, 11.5], [11.0, 12.0], [11.5, 12.5], [12.0, 13.0], [12.5, 13.5], [13.0, 14.0], [14.5, 15.5], [15.0, 16.0], [15.5, 16.5], [16.0, 17.0], [16.5, 17.5], [17.0, 18.0], [17.5, 18.5], [18.0, 19.0], [18.5, 19.5], [19.0, 20.0], [20.5, 21.5], [21.0, 22.0], [21.5, 22.5], [22.0, 23.0]],
    ["359523.mp3", [0.5, 1.5], [1.0, 2.0], [1.5, 2.5], [2.0, 3.0], [5.0, 6.0], [5.5, 6.5], [10.0, 11.0], [10.5, 11.5], [11.0, 12.0], [11.5, 12.5], [15.0, 16.0], [16.5, 17.5]],
    ["359552.mp3", [0.5, 1.5], [1.0, 2.0], [4.0, 5.0], [4.5, 5.5], [6.0, 7.0], [6.5, 7.5], [8.5, 9.5], [12.0, 13.0], [14.0, 15.0]],
    ["498380.mp3", [0.0, 1.0], [0.5, 1.5], [1.0, 2.0], [1.5, 2.5], [2.0, 3.0], [3.5, 4.5], [4.0, 5.0], [4.5, 5.5], [5.0, 6.0], [6.5, 7.5], [7.0, 8.0], [7.5, 8.5], [8.0, 9.0], [8.5, 9.5], [10.0, 11.0], [10.5, 11.5], [11.0, 12.0], [11.5, 12.5], [12.0, 13.0], [14.5, 15.5], [15.0, 16.0], [15.5, 16.5], [17.0, 18.0], [17.5, 18.5], [18.0, 19.0], [18.5, 19.5], [19.0, 20.0], [19.5, 20.5], [20.0, 21.0], [20.5, 21.5]],
    ["505545.mp3", [2.5, 3.5], [4.0, 5.0], [10.5, 11.5], [11.0, 12.0], [11.5, 12.5], [18.5, 19.5], [19.0, 20.0], [19.5, 20.5], [20.0, 21.0], [20.5, 21.5], [22.0, 23.0], [22.5, 23.5]],
    ["531042.mp3", [0.5, 1.5], [1.0, 2.0], [1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [3.0, 4.0], [3.5, 4.5], [4.0, 5.0], [4.5, 5.5], [5.0, 6.0], [5.5, 6.5], [6.0, 7.0], [6.5, 7.5], [7.0, 8.0], [7.5, 8.5], [8.0, 9.0], [8.5, 9.5], [9.0, 10.0], [9.5, 10.5], [11.0, 12.0], [11.5, 12.5], [12.0, 13.0], [13.5, 14.5], [14.0, 15.0], [14.5, 15.5], [15.0, 16.0], [15.5, 16.5], [16.0, 17.0], [16.5, 17.5], [17.0, 18.0], [17.5, 18.5], [18.0, 19.0], [18.5, 19.5], [19.0, 20.0], [19.5, 20.5], [20.0, 21.0]],
]

list_segments_nad = [
    ["1-977-A-39.wav", [[0.0, 1.0]]],
    ["1-1791-A-26.wav", [[0.0, 1.0], [0.5, 1.5], [1.0, 2.0]]],
    ["1-7974-A-49.wav", [[0.0, 1.0], [0.5, 1.5], [1.0, 2.0], [1.5, 2.5], [2.0, 3.0], [3.5, 4.5], [4.0, 5.0]]],
    ["1-7974-B-49.wav", [[0.0, 1.0], [0.5, 1.5], [1.0, 2.0]]]
]

In [152]:
def extract_segments_data(list_time, folder_path):
    list_data = []
    
    for data_time in list_time:
        audio_name = data_time[0]
        audio_file, sr = load_file_audio(f"{folder_path}/{audio_name}")
        audio_file = prapemrosesan_downmixing(audio_file)
        audio_file, sr = prapemrosesan_resampling(audio_file, sr)
        
        time_segments = data_time[1:]
        for time_segment in time_segments:
            print(time_segments)
            audio_segment = audio_file[int(time_segment[0]*sr):int(time_segment[1]*sr)]
            rms = get_rms(audio_segment)
            zcr = get_zcr(audio_segment)
            lms = np.array(get_lms(audio_segment))
            list_data.append([{"rms":rms, "zcr":zcr, "lms":lms}])

    return list_data

# Main


In [148]:
print(list_segments_xc[0][1:])

[[1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [3.0, 4.0], [3.5, 4.5], [5.0, 6.0], [8.5, 9.5], [9.0, 10.0], [9.5, 10.5], [10.0, 11.0], [10.5, 11.5], [14.0, 15.0], [14.5, 15.5], [15.0, 16.0], [15.5, 16.5], [16.0, 17.0], [16.5, 17.5]]


In [153]:
dir_data_xc = r"C:\Users\Lulay\Documents\GitHub\Dasar-Kecerdasan-Artificial_Tugas-Besar\Dataset\xeno-canto"
dir_data_nad = r"C:\Users\Lulay\Documents\GitHub\Dasar-Kecerdasan-Artificial_Tugas-Besar\Dataset\noise-audio-data"

data_xc = extract_segments_data(list_segments_xc, dir_data_xc)
data_nad = extract_segments_data(list_segments_nad, dir_data_nad)

[[1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [3.0, 4.0], [3.5, 4.5], [5.0, 6.0], [8.5, 9.5], [9.0, 10.0], [9.5, 10.5], [10.0, 11.0], [10.5, 11.5], [14.0, 15.0], [14.5, 15.5], [15.0, 16.0], [15.5, 16.5], [16.0, 17.0], [16.5, 17.5]]
[[1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [3.0, 4.0], [3.5, 4.5], [5.0, 6.0], [8.5, 9.5], [9.0, 10.0], [9.5, 10.5], [10.0, 11.0], [10.5, 11.5], [14.0, 15.0], [14.5, 15.5], [15.0, 16.0], [15.5, 16.5], [16.0, 17.0], [16.5, 17.5]]
[[1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [3.0, 4.0], [3.5, 4.5], [5.0, 6.0], [8.5, 9.5], [9.0, 10.0], [9.5, 10.5], [10.0, 11.0], [10.5, 11.5], [14.0, 15.0], [14.5, 15.5], [15.0, 16.0], [15.5, 16.5], [16.0, 17.0], [16.5, 17.5]]
[[1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [3.0, 4.0], [3.5, 4.5], [5.0, 6.0], [8.5, 9.5], [9.0, 10.0], [9.5, 10.5], [10.0, 11.0], [10.5, 11.5], [14.0, 15.0], [14.5, 15.5], [15.0, 16.0], [15.5, 16.5], [16.0, 17.0], [16.5, 17.5]]
[[1.5, 2.5], [2.0, 3.0], [2.5, 3.5], [3.0, 4.0], [3.5, 4.5], [5.0, 6.0], [8.5, 9.5], [9.0, 10.0], [9.5, 10.5

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'list'

In [None]:
print(len(data_xc), (data_xc[0]))

223 1


In [144]:
list_rms_xc = []
list_zcr_xc = []
list_lms_xc = []

for data in data_xc:
    list_rms_xc.append(data[0]['rms'])
    list_zcr_xc.append(data[0]['zcr'])
    list_lms_xc.append(data[0]['lms'])
    
list_rms_nad = []
list_zcr_nad = []
list_lms_nad = []

for data in data_nad:
    list_rms_nad.append(data[0]['rms'])
    list_zcr_nad.append(data[0]['zcr'])
    list_lms_nad.append(data[0]['lms'])

In [145]:
list_rms_xc, list_rms_nad

([0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.060411089271234626,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.06916548815857243,
  0.0691654881585

In [None]:
list_zcr_xc, list_zcr_nad

([4.789291666666666,
  4.746020833333334,
  5.0068125,
  5.541625,
  1.2939166666666666,
  3.4892083333333335,
  2.8875,
  2.6810833333333335,
  5.828541666666666,
  4.3989375,
  3.2639375],
 [0.364875, 0.07522916666666667, 0.2436875, 0.09058333333333334])