Library


In [1]:
import math
import pickle
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.signal
import scipy.fftpack
import librosa
import soundfile as sf
from scipy.signal import resample_poly
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import lightgbm as lgb
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import skfuzzy as fuzz
from skfuzzy import control as ctrl

Main Path


In [2]:
MAIN_PATH = r"C:\Users\Lulay\Documents\GitHub\Dasar-Kecerdasan-Artificial_Tugas-Besar"

Konstanta


In [3]:
SAMPLE_RATE = 48000
BATCH_SIZE = 32
SEGMENT_DURATION = int(1 * SAMPLE_RATE)
OVERLAP_DURATION = int(0.5 * SAMPLE_RATE)
RANDOM_STATE = 21
BUFFER_SIZE = 1000

Random Seed


In [4]:
RANDOM_SEED = 21

try:
    from sklearn.utils import check_random_state
    random_state = check_random_state(RANDOM_SEED)
except ImportError:
    pass

# Prapemrosesan


In [5]:
def load_file_audio(path):
    audio, sr = sf.read(path)
    return np.array(audio), sr

def prapemrosesan_downmixing(audio):
    if audio.ndim > 1:
        audio = np.mean(audio, axis=1)
    return audio.astype(np.float32)

def prapemrosesan_resampling(audio, sr):
    if sr == SAMPLE_RATE:
        return audio.copy(), SAMPLE_RATE
    
    ratio = SAMPLE_RATE / sr
    n_samples = int(np.round(len(audio) * ratio))
    
    x_old = np.linspace(0, 1, len(audio))
    x_new = np.linspace(0, 1, n_samples)
    return np.interp(x_new, x_old, audio), SAMPLE_RATE

def prapemrosesan_padding(audio):
    if np.mod(audio.shape[0], SEGMENT_DURATION) != 0:
        padding = SEGMENT_DURATION - (audio.shape[0] % SEGMENT_DURATION)
        audio = np.pad(audio, (0, padding))
    return audio

def prapemrosesan_splitting(audio):
    num_segments = int(np.floor((len(audio) - SEGMENT_DURATION) / OVERLAP_DURATION)) + 1
    segments = []

    for i in range(num_segments):
        start = int(i * OVERLAP_DURATION)
        end = int(start + SEGMENT_DURATION)
        segment = audio[start:end]
        if len(segment) < SEGMENT_DURATION:
            segment = np.pad(segment, (0, SEGMENT_DURATION - len(segment)), mode='constant')
        segments.append(segment)

    return np.array(segments)

# Ekstraksi Fitur


In [6]:
def get_rms(segment):
    return np.sqrt(np.mean(segment ** 2))

def get_zcr(segment):
    return np.sum(np.abs(np.diff(np.signbit(segment)))) / (len(segment) / SAMPLE_RATE)

def get_lms(segment):
    mel_spec = librosa.feature.melspectrogram(y=segment, sr=SAMPLE_RATE)
    return np.mean(mel_spec)

# Dataset


Raw


In [12]:
with open(r"D:\Kuliah\Matkul\Semester 4\DASAR KECERDASAN ARTIFICIAL (DKA)\[2] Tugas\[3] Tugas Besar\Dasar-Kecerdasan-Artificial_Tugas-Besar\Dataset\dataset_labelled.pkl", "rb") as f:
    dataset = pickle.load(f)

In [13]:
len(dataset)

793

Normalisasi, Split, dan Batching


In [14]:
X = [[data[3], data[4]] for data in dataset if data[2] != '2']
y = [int(data[2]) for data in dataset if data[2] != '2']

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=RANDOM_STATE)

In [16]:
X_train = np.array(X_train)
X_test = np.array(X_test)
X_val = np.array(X_val)

X_train_reshaped = X_train.reshape(X_train.shape[0], -1)
X_test_reshaped = X_test.reshape(X_test.shape[0], -1)
X_val_reshaped = X_val.reshape(X_val.shape[0], -1)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_reshaped)
X_test_scaled = scaler.transform(X_test_reshaped)
X_val_scaled = scaler.transform(X_val_reshaped)

X_train_scaled = X_train_scaled.reshape(X_train.shape)
X_test_scaled = X_test_scaled.reshape(X_test.shape)
X_val_scaled = X_val_scaled.reshape(X_val.shape)

X_train_input = X_train_scaled.reshape(X_train_scaled.shape[0], -1)
X_test_input = X_test_scaled.reshape(X_test_scaled.shape[0], -1)
X_val_input = X_val_scaled.reshape(X_val_scaled.shape[0], -1)

In [17]:
display(
    X_train_input.shape,
    X_test_input.shape,
    X_val_input.shape
)

(437, 2)

(55, 2)

(55, 2)

# LGBM


Inisialisasi


In [18]:
lgbm_model = LGBMClassifier(random_state=RANDOM_STATE)

training


In [19]:
lgbm_model.fit(X_train_input, y_train)

[LightGBM] [Info] Number of positive: 172, number of negative: 265
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000446 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 292
[LightGBM] [Info] Number of data points in the train set: 437, number of used features: 2
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.393593 -> initscore=-0.432235
[LightGBM] [Info] Start training from score -0.432235


In [20]:
models = {
    "rf": RandomForestClassifier(random_state=21),
    "xg": XGBClassifier(random_state=21),
    "cat": CatBoostClassifier(random_state=21, verbose=0),
    "gb": GradientBoostingClassifier(random_state=21),
    "lr": LogisticRegression(random_state=21),
    "knn": KNeighborsClassifier(),
    "dt": DecisionTreeClassifier(random_state=21),
    "svm": SVC(random_state=21)
}

In [21]:
results = {}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    
    results[model_name] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'confusion_matrix': cm.tolist()
    }
    
    print(f"{model_name} Model:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Confusion Matrix:\n{cm}")
    print("-" * 40)


rf Model:
Accuracy: 0.9091
Precision: 0.9444
Recall: 0.8095
F1 Score: 0.8718
Confusion Matrix:
[[33  1]
 [ 4 17]]
----------------------------------------
xg Model:
Accuracy: 0.8727
Precision: 0.8889
Recall: 0.7619
F1 Score: 0.8205
Confusion Matrix:
[[32  2]
 [ 5 16]]
----------------------------------------
cat Model:
Accuracy: 0.9455
Precision: 1.0000
Recall: 0.8571
F1 Score: 0.9231
Confusion Matrix:
[[34  0]
 [ 3 18]]
----------------------------------------
gb Model:
Accuracy: 0.8909
Precision: 0.9412
Recall: 0.7619
F1 Score: 0.8421
Confusion Matrix:
[[33  1]
 [ 5 16]]
----------------------------------------
lr Model:
Accuracy: 0.9273
Precision: 0.9474
Recall: 0.8571
F1 Score: 0.9000
Confusion Matrix:
[[33  1]
 [ 3 18]]
----------------------------------------
knn Model:
Accuracy: 0.7818
Precision: 0.9091
Recall: 0.4762
F1 Score: 0.6250
Confusion Matrix:
[[33  1]
 [11 10]]
----------------------------------------
dt Model:
Accuracy: 0.8727
Precision: 0.8889
Recall: 0.7619
F1 Score

# CNN


Inisialisasi


In [22]:
# class CNN(nn.Module):
#     def __init__(self, num_classes=10):
#         super(CNN, self).__init__()
#         test_input = np.random.rand(SAMPLE_RATE * SEGMENT_DURATION)
#         lms = get_lms(test_input)
#         in_channels = 1
#         self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1)
#         self.bn1 = nn.BatchNorm2d(64)
#         self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
#         self.bn2 = nn.BatchNorm2d(128)
#         self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
#         self.bn3 = nn.BatchNorm2d(256)
#         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
#         self.gap = nn.AdaptiveAvgPool2d(1)
#         self.fc1 = nn.Linear(256, 256)
#         self.dropout = nn.Dropout(0.5)
#         self.fc2 = nn.Linear(256, num_classes)
#     def forward(self, x):
#         x = self.pool(F.relu(self.bn1(self.conv1(x))))
#         x = self.pool(F.relu(self.bn2(self.conv2(x))))
#         x = self.pool(F.relu(self.bn3(self.conv3(x))))
#         x = self.gap(x)
#         x = x.view(x.size(0), -1)
#         x = F.relu(self.fc1(x))
#         x = self.dropout(x)
#         x = self.fc2(x)
#         return x

training


# Conformer


Inisialisasi


In [23]:
# config = ConformerConfig(
#     hidden_size=144,
#     num_attention_heads=4,
#     intermediate_size=576,
#     conv_kernel_size=32,
#     num_hidden_layers=8,
#     input_feat_per_channel=n_mels,
#     input_channels=1,
#     max_position_embeddings=time_steps
# )

# conformer_model = TFConformerModel(config)

# input_layer = tf.keras.layers.Input(shape=(n_mels, time_steps))
# expand_dim = tf.keras.layers.Reshape((1, n_mels, time_steps))(input_layer)
# conv_proj = tf.keras.layers.Conv2D(
#     filters=config.hidden_size,
#     kernel_size=(3, 3),
#     padding='same',
#     activation='relu'
# )(expand_dim)
# squeeze_dim = tf.keras.layers.Reshape((n_mels, time_steps, config.hidden_size))(conv_proj)
# conformer_output = conformer_model(squeeze_dim).last_hidden_state
# gap = tf.keras.layers.GlobalAveragePooling1D()(conformer_output)
# output = tf.keras.layers.Dense(num_classes, activation='softmax')(gap)

# conformer_model = tf.keras.Model(inputs=input_layer, outputs=output)

training


# Evaluasi


LGBM


In [24]:
y_pred = lgbm_model.predict(X_val_input)

print(f"Accuracy: {accuracy_score(y_val, y_pred)}")
print(f"Precision: {precision_score(y_val, y_pred)}")
print(f"Recall: {recall_score(y_val, y_pred)}")
print(f"F1 Score: {f1_score(y_val, y_pred)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_val, y_pred)}")

y_pred = lgbm_model.predict(X_test_input)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Precision: {precision_score(y_test, y_pred)}")
print(f"Recall: {recall_score(y_test, y_pred)}")
print(f"F1 Score: {f1_score(y_test, y_pred)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_test, y_pred)}")

Accuracy: 0.9090909090909091
Precision: 0.9047619047619048
Recall: 0.8636363636363636
F1 Score: 0.8837209302325582
Confusion Matrix:
 [[31  2]
 [ 3 19]]
Accuracy: 0.8909090909090909
Precision: 0.9411764705882353
Recall: 0.7619047619047619
F1 Score: 0.8421052631578947
Confusion Matrix:
 [[33  1]
 [ 5 16]]


CNN


Conformer
