Library


In [75]:
import math
import pickle
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.signal
import scipy.fftpack
import librosa
import soundfile as sf
from scipy.signal import resample_poly
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import lightgbm as lgb
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import skfuzzy as fuzz
from skfuzzy import control as ctrl
from sklearn.tree import DecisionTreeClassifier, export_text
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

Main Path


In [76]:
# MAIN_PATH = r"C:\Users\Lulay\Documents\GitHub\Dasar-Kecerdasan-Artificial_Tugas-Besar\Dataset"
MAIN_PATH = r"D:\Kuliah\Matkul\Semester 4\DASAR KECERDASAN ARTIFICIAL (DKA)\[2] Tugas\[3] Tugas Besar\Dasar-Kecerdasan-Artificial_Tugas-Besar\Dataset"

Konstanta


In [77]:
SAMPLE_RATE = 48000
BATCH_SIZE = 32
SEGMENT_DURATION = int(1 * SAMPLE_RATE)
OVERLAP_DURATION = int(0.5 * SAMPLE_RATE)
RANDOM_STATE = 21
BUFFER_SIZE = 1000

Random Seed


In [78]:
RANDOM_SEED = 21

try:
    from sklearn.utils import check_random_state
    random_state = check_random_state(RANDOM_SEED)
except ImportError:
    pass

# Prapemrosesan


In [79]:
def load_file_audio(path):
    audio, sr = sf.read(path)
    return np.array(audio), sr

def prapemrosesan_downmixing(audio):
    if audio.ndim > 1:
        audio = np.mean(audio, axis=1)
    return audio.astype(np.float32)

def prapemrosesan_resampling(audio, sr):
    if sr == SAMPLE_RATE:
        return audio.copy(), SAMPLE_RATE
    
    ratio = SAMPLE_RATE / sr
    n_samples = int(np.round(len(audio) * ratio))
    
    x_old = np.linspace(0, 1, len(audio))
    x_new = np.linspace(0, 1, n_samples)
    return np.interp(x_new, x_old, audio), SAMPLE_RATE

def prapemrosesan_padding(audio):
    if np.mod(audio.shape[0], SEGMENT_DURATION) != 0:
        padding = SEGMENT_DURATION - (audio.shape[0] % SEGMENT_DURATION)
        audio = np.pad(audio, (0, padding))
    return audio

def prapemrosesan_splitting(audio):
    num_segments = int(np.floor((len(audio) - SEGMENT_DURATION) / OVERLAP_DURATION)) + 1
    segments = []

    for i in range(num_segments):
        start = int(i * OVERLAP_DURATION)
        end = int(start + SEGMENT_DURATION)
        segment = audio[start:end]
        if len(segment) < SEGMENT_DURATION:
            segment = np.pad(segment, (0, SEGMENT_DURATION - len(segment)), mode='constant')
        segments.append(segment)

    return np.array(segments)

# Ekstraksi Fitur


In [80]:
def get_rms(segment):
    return np.sqrt(np.mean(segment ** 2))

def get_zcr(segment):
    return np.sum(np.abs(np.diff(np.signbit(segment)))) / (len(segment) / SAMPLE_RATE)

def get_lms(segment):
    mel_spec = librosa.feature.melspectrogram(y=segment, sr=SAMPLE_RATE)
    return np.mean(mel_spec)

# Dataset


Raw


In [81]:
with open(f"{MAIN_PATH}/dataset_preprocessed.pkl", "rb") as f:
    dataset = pickle.load(f)

In [82]:
len(dataset)

793

Normalisasi, Split, dan Batching


In [83]:
dataset[0][1:]

[[0.0, 1.0],
 2,
 0.012409585523878887,
 11094.0,
 array([[2.16197470e-13, 1.03119000e-09, 1.56593188e-07, ...,
         7.38014642e-07, 4.86190822e-07, 3.69650615e-05],
        [1.71594322e-13, 1.73647630e-09, 6.84359839e-08, ...,
         1.20641453e-06, 1.28787660e-06, 3.60378650e-05],
        [2.33699181e-13, 1.29767993e-09, 1.06911272e-08, ...,
         1.49087633e-06, 7.68526166e-06, 5.72646908e-05],
        ...,
        [3.98201299e-13, 4.03694204e-11, 6.71124721e-10, ...,
         2.79048998e-07, 2.46822598e-07, 8.22193067e-07],
        [5.72516160e-13, 3.12001303e-11, 4.63515168e-10, ...,
         3.61625092e-07, 2.90140389e-07, 8.36865461e-07],
        [8.76847812e-13, 2.16880748e-11, 3.50852229e-10, ...,
         2.80869441e-07, 2.98575333e-07, 8.50766193e-07]])]

In [84]:
X = [data[5] for data in dataset]
original_labels = [int(data[2]) for data in dataset]
y = [1 if label in (0, 1) else 2 for label in original_labels]
X = np.array(X)

In [85]:
X[0].shape

(128, 94)

In [86]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE, stratify=y)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=RANDOM_STATE, stratify=y_test)

In [87]:
X_train_reshaped = X_train.reshape(X_train.shape[0], -1)  
scl = StandardScaler()
X_train_scaled = scl.fit_transform(X_train_reshaped)
X_test_scaled = scl.transform(X_test.reshape(X_test.shape[0], -1))
X_val_scaled = scl.transform(X_val.reshape(X_val.shape[0], -1))


In [88]:
display(
    X_train_scaled.shape,
    X_test_scaled.shape,
    X_val_scaled.shape
)

(634, 12032)

(79, 12032)

(80, 12032)

# LGBM


Inisialisasi


In [89]:
lgbm_model = LGBMClassifier(random_state=RANDOM_STATE, verbose=-1)

training


In [90]:
lgbm_model.fit(X_train_scaled, y_train)

# Decision Tree


Inisialisasi


In [91]:
# dt_model = DecisionTreeClassifier(random_state=RANDOM_STATE)

training


In [92]:
# dt_model.fit(X_train_scaled, y_train)

# CNN


Inisialisasi


In [93]:
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Linear(256, 256)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.gap(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

training


In [94]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = CNN(num_classes=10).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

X_train_scaled_tensor = torch.tensor(X_train_scaled, dtype=torch.float32).reshape(X_train_scaled.shape[0], 1, 128, 94).to(device)
X_val_scaled_tensor = torch.tensor(X_val_scaled, dtype=torch.float32).reshape(X_val_scaled.shape[0], 1, 128, 94).to(device)

num_epochs = 500
batch_size = 128

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    y_train_preds = []
    y_train_true = []
    
    for i in range(0, len(X_train_scaled_tensor), batch_size):
        inputs = X_train_scaled_tensor[i:i+batch_size]
        targets = torch.tensor(y_train[i:i+batch_size], dtype=torch.long).to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        
        loss = criterion(outputs, targets)
        
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        y_train_preds.extend(predicted.cpu().numpy())
        y_train_true.extend(targets.cpu().numpy())

    train_accuracy = accuracy_score(y_train_true, y_train_preds)
    train_f1_score = f1_score(y_train_true, y_train_preds, average='weighted')

    model.eval()
    y_val_preds = []
    y_val_true = []
    
    with torch.no_grad():
        for i in range(0, len(X_val_scaled_tensor), batch_size):
            inputs = X_val_scaled_tensor[i:i+batch_size]
            targets = torch.tensor(y_val[i:i+batch_size], dtype=torch.long).to(device)

            outputs = model(inputs)

            _, predicted = torch.max(outputs, 1)
            y_val_preds.extend(predicted.cpu().numpy())
            y_val_true.extend(targets.cpu().numpy())

    val_accuracy = accuracy_score(y_val_true, y_val_preds)
    val_f1_score = f1_score(y_val_true, y_val_preds, average='weighted')

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(X_train_scaled_tensor):.4f}, "
          f"Train Accuracy: {train_accuracy:.4f}, Train F1 Score: {train_f1_score:.4f}, "
          f"Val Accuracy: {val_accuracy:.4f}, Val F1 Score: {val_f1_score:.4f}")


Epoch [1/500], Loss: 0.0149, Train Accuracy: 0.5710, Train F1 Score: 0.6545, Val Accuracy: 0.8375, Val F1 Score: 0.8352
Epoch [2/500], Loss: 0.0086, Train Accuracy: 0.7634, Train F1 Score: 0.7647, Val Accuracy: 0.7250, Val F1 Score: 0.6858
Epoch [3/500], Loss: 0.0046, Train Accuracy: 0.7271, Train F1 Score: 0.7246, Val Accuracy: 0.6375, Val F1 Score: 0.5462
Epoch [4/500], Loss: 0.0040, Train Accuracy: 0.7539, Train F1 Score: 0.7462, Val Accuracy: 0.7375, Val F1 Score: 0.7032
Epoch [5/500], Loss: 0.0039, Train Accuracy: 0.7603, Train F1 Score: 0.7561, Val Accuracy: 0.7750, Val F1 Score: 0.7529
Epoch [6/500], Loss: 0.0039, Train Accuracy: 0.7713, Train F1 Score: 0.7643, Val Accuracy: 0.7750, Val F1 Score: 0.7571
Epoch [7/500], Loss: 0.0037, Train Accuracy: 0.7666, Train F1 Score: 0.7613, Val Accuracy: 0.7875, Val F1 Score: 0.7723
Epoch [8/500], Loss: 0.0036, Train Accuracy: 0.7934, Train F1 Score: 0.7883, Val Accuracy: 0.7875, Val F1 Score: 0.7783
Epoch [9/500], Loss: 0.0036, Train Accur

# Conformer


Inisialisasi


In [95]:
# config = ConformerConfig(
#     hidden_size=144,
#     num_attention_heads=4,
#     intermediate_size=576,
#     conv_kernel_size=32,
#     num_hidden_layers=8,
#     input_feat_per_channel=n_mels,
#     input_channels=1,
#     max_position_embeddings=time_steps
# )

# conformer_model = TFConformerModel(config)

# input_layer = tf.keras.layers.Input(shape=(n_mels, time_steps))
# expand_dim = tf.keras.layers.Reshape((1, n_mels, time_steps))(input_layer)
# conv_proj = tf.keras.layers.Conv2D(
#     filters=config.hidden_size,
#     kernel_size=(3, 3),
#     padding='same',
#     activation='relu'
# )(expand_dim)
# squeeze_dim = tf.keras.layers.Reshape((n_mels, time_steps, config.hidden_size))(conv_proj)
# conformer_output = conformer_model(squeeze_dim).last_hidden_state
# gap = tf.keras.layers.GlobalAveragePooling1D()(conformer_output)
# output = tf.keras.layers.Dense(num_classes, activation='softmax')(gap)

# conformer_model = tf.keras.Model(inputs=input_layer, outputs=output)

training


# Model Lain


inisialisasi


In [96]:
models = {
    "rf": RandomForestClassifier(random_state=RANDOM_STATE),
    "xg": XGBClassifier(random_state=RANDOM_STATE),
    "cat": CatBoostClassifier(random_state=RANDOM_STATE, verbose=0),
    "gb": GradientBoostingClassifier(random_state=RANDOM_STATE),
    "lr": LogisticRegression(random_state=RANDOM_STATE),
    "knn": KNeighborsClassifier(),
    "dt": DecisionTreeClassifier(random_state=RANDOM_STATE),
    "svm": SVC(random_state=RANDOM_STATE)
}

training


In [97]:
results = {}

for model_name, model in models.items():
    model.fit(X_train_input, y_train)
    y_pred = model.predict(X_test_input)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    
    results[model_name] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'confusion_matrix': cm.tolist()
    }

NameError: name 'X_train_input' is not defined

# Evaluasi


LGBM


In [None]:
y_pred = lgbm_model.predict(X_val_scaled)

print("Validasi")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")
print(f"Precision: {precision_score(y_val, y_pred)}")
print(f"Recall: {recall_score(y_val, y_pred)}")
print(f"F1 Score: {f1_score(y_val, y_pred)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_val, y_pred)}")

y_pred = lgbm_model.predict(X_test_scaled)

print("\n")
print("Test")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Precision: {precision_score(y_test, y_pred)}")
print(f"Recall: {recall_score(y_test, y_pred)}")
print(f"F1 Score: {f1_score(y_test, y_pred)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_test, y_pred)}")

CNN


Conformer


Descision Tree


In [None]:
y_pred = dt_model.predict(X_val_scaled)

print("Validasi")
print(f"Accuracy: {accuracy_score(y_val, y_pred)}")
print(f"Precision: {precision_score(y_val, y_pred)}")
print(f"Recall: {recall_score(y_val, y_pred)}")
print(f"F1 Score: {f1_score(y_val, y_pred)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_val, y_pred)}")

y_pred = dt_model.predict(X_test_scaled)

print("\n")
print("Test")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Precision: {precision_score(y_test, y_pred)}")
print(f"Recall: {recall_score(y_test, y_pred)}")
print(f"F1 Score: {f1_score(y_test, y_pred)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_test, y_pred)}")

In [None]:
feature_names = ['RMS', 'ZCR']
tree_rules = export_text(dt_model, feature_names=feature_names)
print("Extracted Decision Rules:")
print(tree_rules)

Model Lain


In [None]:
for model_name, model in models.items():
    print(f"{model_name} Model:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Confusion Matrix:\n{cm}")
    print("-" * 40)

# Visualisasi Hasil

In [None]:
def load_and_plot(path, list_result, final_time):
    file_name = os.path.basename(path)
    audio, sr = librosa.load(path, sr=SAMPLE_RATE)
    time_axis = np.linspace(0, len(audio) / sr, len(audio))

    plt.figure(figsize=(20, 4))
    plt.plot(time_axis, audio, label="Amplitude")
    plt.title(f"Audio waveform with loud segments marked - {file_name}") 
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")

    for i in enumerate(list_result):
        start_time = i[1][0]
        end_time = i[1][1]
        plt.axvspan(start_time, end_time, color='red', alpha=0.3)

    plt.xticks(np.arange(0, final_time, 0.5))
    plt.grid(which='both', alpha=0.5)
    plt.xlim(0, final_time)
    plt.ylim(-1, 1)
    plt.tight_layout()
    plt.legend()
    plt.show()

### LGBM

In [None]:
def single_inference(path):
    audio, sr = load_file_audio(path)
    audio = prapemrosesan_downmixing(audio)
    audio, sr = prapemrosesan_resampling(audio, sr)
    audio = prapemrosesan_padding(audio)
    segments = prapemrosesan_splitting(audio)
    
    list_result = []
    for index, segment in enumerate(segments):
        rms = get_rms(segment)
        zcr = get_zcr(segment)
        X = np.array([rms, zcr])
        X = scl.transform(X.reshape(1, -1))

        y_pred = lgbm_model.predict(X)
        if y_pred == 0:
            list_result.append(([index*0.5, index*0.5 + 1]))
    return list_result

In [None]:
# for file in os.listdir(f"{MAIN_PATH}/noise-audio-data"):
#     list_result = single_inference(f"{MAIN_PATH}/noise-audio-data/{file}")
#     print(list_result)
#     load_and_plot(f"{MAIN_PATH}/noise-audio-data/{file}", list_result, 5)  

In [None]:
# for file in os.listdir(f"{MAIN_PATH}/xeno-canto"):
#     list_result = single_inference(f"{MAIN_PATH}/xeno-canto/{file}")
#     print(list_result)
#     load_and_plot(f"{MAIN_PATH}/xeno-canto/{file}", list_result, 25)  