# **Import Library**

In [None]:
!pip install -U noisereduce tensorflowjs --quiet

In [None]:
# Common
import os
import gdown
import random
import librosa
import numpy as np
import soundfile as sf
import tensorflow as tf
import noisereduce as nr
import IPython.display as ipd
import matplotlib.pyplot as plt

# Datasets
from datasets import Audio, load_from_disk, Dataset, concatenate_datasets, ClassLabel
from collections import Counter, defaultdict

# Modelling
import tensorflowjs as tfjs
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from tensorflow.keras import models, layers, callbacks, optimizers
from sklearn.utils.class_weight import compute_class_weight

In [None]:
# Secret
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
BirdSet_RAW_URL = user_secrets.get_secret("BirdSet_RAW_URL")
BirdSet_10_16Khz_URL = user_secrets.get_secret("BirdSet_10_16Khz_URL")
BirdSet_10_16Khz_Features_URL = user_secrets.get_secret("BirdSet_10_16Khz_Features_URL")
BirdSet_10_16Khz_Balanced_URL = user_secrets.get_secret("BirdSet_10_16Khz_Balanced_URL")

# **Load Dataset**

In [None]:
gdown.download_folder(BirdSet_RAW_URL, output="../temp/BirdSet_RAW", quiet=True)

In [None]:
dataset = load_from_disk("../temp/BirdSet_RAW")
dataset

In [None]:
train_ds = dataset["train"]
train_ds

# **Exploratory Data Analysis**

In [None]:
ebird_code_names = train_ds.features["ebird_code"].names
ebird_code_names

In [None]:
label_counter = Counter()

# Hitung berdasarkan ebird_code sebagai key
def count_label_ids(example):
    label_id = example["ebird_code"]
    label_counter.update([label_id])

train_ds.map(count_label_ids)

# Menampilkan label top 10 kelas
for label_id, count in label_counter.most_common(10):
    label_name = ebird_code_names[label_id]
    print(f"{label_name}: {count}")

# **Preprocessing Dataset**

## **Select Top 10 Labels**

In [None]:
gdown.download_folder(BirdSet_RAW_URL, output="../temp/BirdSet_RAW", quiet=True)

In [None]:
dataset = load_from_disk("../temp/BirdSet_RAW")
dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
dataset

In [None]:
train_ds = dataset["train"]
train_ds

In [None]:
top_10_labels = dict(label_counter.most_common(10))
train_ds = train_ds.filter(lambda x: x["ebird_code"] in top_10_labels)
train_ds

In [None]:
# Buat mapping: label lama -> label baru (0-9)
ebird_code_names = train_ds.features["ebird_code"].names
ebird_codes = set(train_ds["ebird_code"])
label2idx = {label: idx for idx, label in enumerate(ebird_codes)}

def remap_labels(example):
    example["ebird_code"] = label2idx[example["ebird_code"]]
    return example

# Terapkan ke seluruh dataset
train_ds = train_ds.map(remap_labels)

# Buat objek ClassLabel baru
new_class_label = ClassLabel(num_classes=len(ebird_codes), names=[ebird_code_names[ebird_code] for ebird_code in ebird_codes])

# Cast ulang kolom label agar metadata-nya ikut berubah
train_ds = train_ds.cast_column("ebird_code", new_class_label)

In [None]:
train_ds.save_to_disk("../temp/BirdSet_10_16Khz", num_proc=os.cpu_count())

In [None]:
!tar -czvf "./BirdSet_10_16Khz.tar.gz" "../temp/BirdSet_10_16Khz"

## **Event Based Audio**

In [None]:
gdown.download_folder(BirdSet_10_16Khz_URL, output="../temp/BirdSet_10_16Khz", quiet=True)

In [None]:
train_ds = load_from_disk("../temp/BirdSet_10_16Khz")
train_ds

In [None]:
def cut_events(audio_array, sampling_rate, detected_events, min_len, max_len):
    total_duration = len(audio_array) / sampling_rate
    segments = []
    
    for start, end in detected_events:
        event_duration = end - start

        if event_duration < min_len:
            extension = (min_len - event_duration) / 2
            start = max(0, start - extension)
            end = min(total_duration, end + extension)

        if end - start > max_len:
            end = start + max_len

        start_idx = int(start * sampling_rate)
        end_idx = int(end * sampling_rate)
        segment = audio_array[start_idx:end_idx]
        segments.append(segment)

    if segments:
        return np.concatenate(segments)
    else:
        return audio_array  # fallback kalau kosong


def cut_time_range(audio_array, sampling_rate, start, end):
    start_idx = int(start * sampling_rate)
    end_idx = int(end * sampling_rate)

    return audio_array[start_idx:end_idx]


def denoise_audio(audio_array, sampling_rate):
    return nr.reduce_noise(y=audio_array, sr=sampling_rate)


def pad_or_trim(audio_array, sampling_rate, max_len):
    desired_len = int(max_len * sampling_rate)
    current_len = len(audio_array)

    if current_len < desired_len:
        pad_len = desired_len - current_len
        audio_array = np.pad(audio_array, (0, pad_len))
    else:
        audio_array = audio_array[:desired_len]

    return audio_array

def load_audio_all_events(sample, min_len, max_len):
    ebird_code = sample["ebird_code"]
    _, audio_array, sampling_rate = sample["audio"].values()

    # Potong berdasarkan event atau time range
    if len(sample["detected_events"]) > 0:
        audio_array = cut_events(audio_array, sampling_rate, sample["detected_events"], min_len, max_len)
    elif sample["start_time"] is not None and sample["end_time"] is not None:
        audio_array = cut_time_range(audio_array, sampling_rate, sample["start_time"], sample["end_time"])

    # Denoising + pad/trim
    audio_array = denoise_audio(audio_array, sampling_rate)
    audio_array = pad_or_trim(audio_array, sampling_rate, max_len)

    return {
        "ebird_code": ebird_code,
        "sampling_rate": sampling_rate,
        "features": audio_array
    }

In [None]:
feature_ds = train_ds.map(
    lambda x: load_audio_all_events(x, min_len=5, max_len=5),
    remove_columns=train_ds.column_names,
    desc="Select Audio by Event..."
)
feature_ds

In [None]:
feature_ds.save_to_disk("../temp/BirdSet_10_16Khz_Features", num_proc=os.cpu_count())

In [None]:
!tar -czvf "./BirdSet_10_16Khz_Features.tar.gz" "../temp/BirdSet_10_16Khz_Features"

## **Augmentation**

In [None]:
gdown.download_folder(BirdSet_10_16Khz_Features_URL, output="../temp/BirdSet_10_16Khz_Features", quiet=True)

In [None]:
feature_ds = load_from_disk("../temp/BirdSet_10_16Khz_Features")
feature_ds

In [None]:
label_counter = Counter()

# Hitung berdasarkan ebird_code sebagai key
def count_label_ids(example):
    label_id = example["ebird_code"]
    label_counter.update([label_id])

feature_ds.map(count_label_ids)

ebird_code_names = feature_ds.features["ebird_code"].names

# Menampilkan label top 10 kelas
for label_id, count in label_counter.most_common(10):
    label_name = ebird_code_names[label_id]
    print(f"{label_name} (id: {label_id}): {count}")

In [None]:
# === Fungsi augmentasi berbasis librosa ===
# Menambahkan noise acak
def add_noise(audio_array, noise_factor=0.005):
    noise = np.random.randn(len(audio_array))
    return audio_array + noise_factor * noise

# Menggeser amplitudo (volume)
def change_volume(audio_array, gain_db_range=(-6, 6)):
    gain = np.random.uniform(*gain_db_range)
    factor = 10.0 ** (gain / 20.0)
    return audio_array * factor

# Mengubah pitch tanpa mengubah panjang
def apply_pitch_shift(audio_array, sampling_rate, steps_range=(-2, 2)):
    n_steps = np.random.uniform(*steps_range)
    return librosa.effects.pitch_shift(y=np.array(audio_array), sr=sampling_rate, n_steps=n_steps)

# Inversi sinyal (seperti refleksi cermin)
def invert_waveform(audio_array):
    return -audio_array

# Kliping sinyal (membatasi amplitudo)
def clip_audio(audio_array, clip_factor=0.8):
    max_val = np.max(np.abs(audio_array)) * clip_factor
    return np.clip(audio_array, -max_val, max_val)

# Fungsi augmentasi satu sample
def safe_augment_sample(example):
    _, sampling_rate, features = example.values()

    if random.random() < 0.5:
        features = add_noise(features)

    if random.random() < 0.5:
        features = change_volume(features)

    if random.random() < 0.5:
        features = apply_pitch_shift(features, sampling_rate)

    if random.random() < 0.3:
        features = invert_waveform(features)

    if random.random() < 0.3:
        features = clip_audio(features)

    example["features"] = features
    return example

# Hitung distribusi awal
def get_class_counts(ds):
    return Counter(ds["ebird_code"])

# Mulai proses augmentasi sampai seimbang
def balance_dataset(dataset, target_per_class):
    all_augmented = []

    class_counts = get_class_counts(dataset)
    label_info = dataset.features["ebird_code"]
    under_classes = [cls for cls, count in class_counts.items() if count < target_per_class]

    for cls in under_classes:
        # Ambil semua sampel dari kelas ini
        samples = dataset.filter(lambda x: x["ebird_code"] == cls)
        current_count = len(samples)
        needed = target_per_class - current_count

        augmented_examples = []

        while len(augmented_examples) < needed:
            sample = samples[random.randint(0, current_count - 1)]
            augmented = augment_sample(sample)

            augmented_examples.append(augmented)

        # Batasi hanya sampai `needed`
        augmented_dataset = Dataset.from_list(augmented_examples[:needed])
        augmented_dataset = augmented_dataset.cast_column("ebird_code", label_info)
        all_augmented.append(augmented_dataset)

        print(f"Augmented {cls} from {current_count} → {target_per_class} samples.")

    # Gabungkan semua augmented dengan original dataset
    if all_augmented:
        dataset = concatenate_datasets([dataset] + all_augmented)

    return dataset

In [None]:
balanced_ds = balance_dataset(feature_ds, target_per_class=500)
balanced_ds

In [None]:
label_counter = Counter()

# Hitung berdasarkan ebird_code sebagai key
def count_label_ids(example):
    label_id = example["ebird_code"]
    label_counter.update([label_id])

balanced_ds.map(count_label_ids)

ebird_code_names = balanced_ds.features["ebird_code"].names

# Menampilkan label top 10 kelas
for label_id, count in label_counter.most_common(10):
    label_name = ebird_code_names[label_id]
    print(f"{label_name} (id: {label_id}): {count}")

In [None]:
balanced_ds.save_to_disk("../temp/BirdSet_10_16Khz_Balanced", num_proc=os.cpu_count())

In [None]:
!tar -czvf "./BirdSet_10_16Khz_Balanced.tar.gz" "../temp/BirdSet_10_16Khz_Balanced"

# **Extracting Features**

In [None]:
gdown.download_folder(BirdSet_10_16Khz_Balanced_URL, output="../temp/BirdSet_10_16Khz_Balanced", quiet=True)

In [None]:
balanced_ds = load_from_disk("../temp/BirdSet_10_16Khz_Balanced")
balanced_ds

Tidak menggunakan hasil augmentasi!!!

In [None]:
def to_melspectrogram(sample):
    ebird_code = sample["ebird_code"]
    sampling_rate = sample["sampling_rate"]
    audio_array = np.array(sample["features"])

    mel_spec = librosa.feature.melspectrogram(y=audio_array, sr=sampling_rate, n_mels=40)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    mel_spec_db = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

    return {
        "ebird_code": ebird_code,
        "sampling_rate": sampling_rate,
        "features": mel_spec_db
    }

def to_mfcc(sample):
    ebird_code = sample["ebird_code"]
    sampling_rate = sample["sampling_rate"]
    audio_array = np.array(sample["features"])
    
    mfcc = librosa.feature.mfcc(y=audio_array, sr=sampling_rate, n_mfcc=40)

    return {
        "ebird_code": ebird_code,
        "sampling_rate": sampling_rate,
        "features": mfcc
    }

## **Mel-Spectrogram Dataset**

In [None]:
melspectro_ds = feature_ds.map(
    to_melspectrogram,
    remove_columns=feature_ds.column_names,
    num_proc=os.cpu_count(),
    desc="Extracting Mel-Spectrogram Features..."
)
melspectro_ds

In [None]:
melspectro_ds.save_to_disk("../temp/BirdSet_10_16Khz_Spectrogram", num_proc=os.cpu_count())

In [None]:
!tar -czvf "./BirdSet_10_16Khz_Spectrogram.tar.gz" "../temp/BirdSet_10_16Khz_Spectrogram"

## **MFCC Dataset**

In [None]:
mfcc_ds = feature_ds.map(
    to_mfcc,
    remove_columns=feature_ds.column_names,
    num_proc=os.cpu_count(),
    desc="Extracting MFCC Features..."
)
mfcc_ds

In [None]:
mfcc_ds.save_to_disk("../temp/BirdSet_10_16Khz_MFCC", num_proc=os.cpu_count())

In [None]:
!tar -czvf "./BirdSet_10_16Khz_MFCC.tar.gz" "../temp/BirdSet_10_16Khz_MFCC"

# **Quality Check**

In [None]:
ipd.Audio(feature_ds[0]["features"], rate=16_000)

In [None]:
audio_data = librosa.feature.inverse.mel_to_audio(
    np.array(melspectro_ds[0]["features"]).squeeze(), sr=16_000
)

ipd.Audio(audio_data, rate=16_000)

In [None]:
audio_data = librosa.feature.inverse.mfcc_to_audio(
    np.array(mfcc_ds[0]["features"]).squeeze(), sr=16_000
)

ipd.Audio(audio_data, rate=16_000)

# **Modelling (MFCC)**

In [None]:
gdown.download_folder(BirdSet_10_16Khz_Features_URL, output="../temp/BirdSet_10_16Khz_Features", quiet=True)

In [None]:
feature_ds = load_from_disk("../temp/BirdSet_10_16Khz_Features")
feature_ds

## **Train-Test Split**

In [None]:
# Split awal jadi train dan temp (valid + test)
split = mfcc_ds.train_test_split(test_size=0.2, seed=42)
temp_split = split["test"].train_test_split(test_size=0.5, seed=42)

# Gabungkan semua
train_ds = split["train"]
val_ds = temp_split["train"]
test_ds = temp_split["test"]

# Konversi ke tensorflow dataset
train_tfds = train_ds.to_tf_dataset(columns="features", label_cols="ebird_code", batch_size=32, shuffle=True)
val_tfds = val_ds.to_tf_dataset(columns="features", label_cols="ebird_code", batch_size=128)
test_tfds  = test_ds.to_tf_dataset(columns="features", label_cols="ebird_code", batch_size=128)

train_tfds = train_tfds.map(lambda x, y: (tf.expand_dims(x, -1), y)).cache().shuffle(1000).prefetch(tf.data.AUTOTUNE)
val_tfds   = val_tfds.map(lambda x, y: (tf.expand_dims(x, -1), y)).cache().prefetch(tf.data.AUTOTUNE)
test_tfds  = test_tfds.map(lambda x, y: (tf.expand_dims(x, -1), y)).cache().prefetch(tf.data.AUTOTUNE)

## **Architecture**

In [None]:
lr_schedule = optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-3,
    decay_steps=1000,
    decay_rate=0.9)
optimizer = optimizers.AdamW(learning_rate=lr_schedule, weight_decay=1e-5)
def create_cnn_model(input_shape, num_classes):
    model = models.Sequential([
        layers.Input(shape=input_shape),

        layers.Conv2D(32, (3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Dropout(0.5),
        layers.GlobalAveragePooling2D(),
        
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.25),
        layers.Dense(32, activation="relu"),
        layers.Dropout(0.3),
        
        layers.Dense(num_classes, activation="softmax")
    ])

    model.compile(
        optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

# **Training**

In [None]:
for batch in train_tfds.take(1):
    print(batch[0].shape, batch[1].shape)

In [None]:
labels = [example["ebird_code"] for example in mfcc_ds]
classes = np.unique(labels)

class_weights = compute_class_weight(
    class_weight="balanced", 
    classes=classes, 
    y=labels
)
class_weights = dict(enumerate(class_weights))

In [None]:
input_shape = (40, 157, 1)
num_classes = len(classes)

early_stop = callbacks.EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True,
    verbose=True
)

mfcc_model = create_cnn_model(input_shape, num_classes)

history = mfcc_model.fit(
    train_tfds,
    validation_data=val_tfds,
    class_weight=class_weights,
    callbacks=[early_stop],
    epochs=200,
)

In [None]:
# Prediksi label test set
y_true = []
y_pred = []

for batch in test_tfds:
    X_batch, y_batch = batch
    preds = mfcc_model.predict(X_batch)
    pred_labels = np.argmax(preds, axis=1)
    y_true.extend(y_batch.numpy())
    y_pred.extend(pred_labels)

y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Buat confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)

plt.title("Confusion Matrix Test Set")
plt.show()

# **Modeling (Mel-Spectrogram)**

In [None]:
gdown.download_folder(BirdSet_10_16Khz_Features_URL, output="../temp/BirdSet_10_16Khz_Features", quiet=True)

In [None]:
feature_ds = load_from_disk("../temp/BirdSet_10_16Khz_Features")
feature_ds

## **Train-Test Split**

In [None]:
# Split awal jadi train dan temp (valid + test)
split = melspectro_ds.train_test_split(test_size=0.2, seed=42)
temp_split = split["test"].train_test_split(test_size=0.5, seed=42)

# Gabungkan semua
train_ds = split["train"]
val_ds = temp_split["train"]
test_ds = temp_split["test"]

# Konversi ke tensorflow dataset
train_tfds = train_ds.to_tf_dataset(columns="features", label_cols="ebird_code", batch_size=32, shuffle=True)
val_tfds = val_ds.to_tf_dataset(columns="features", label_cols="ebird_code", batch_size=128)
test_tfds  = test_ds.to_tf_dataset(columns="features", label_cols="ebird_code", batch_size=128)

train_tfds = train_tfds.map(lambda x, y: (tf.expand_dims(x, -1), y)).cache().shuffle(1000).prefetch(tf.data.AUTOTUNE)
val_tfds   = val_tfds.map(lambda x, y: (tf.expand_dims(x, -1), y)).cache().prefetch(tf.data.AUTOTUNE)
test_tfds  = test_tfds.map(lambda x, y: (tf.expand_dims(x, -1), y)).cache().prefetch(tf.data.AUTOTUNE)

## **Architecture**

In [None]:
lr_schedule = optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-3,
    decay_steps=1000,
    decay_rate=0.9)
optimizer = optimizers.AdamW(learning_rate=lr_schedule, weight_decay=1e-5)

def create_cnn_model(input_shape, num_classes):
    model = models.Sequential([
        layers.Input(shape=input_shape),

        layers.Conv2D(32, (3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(256, (3, 3), activation="relu", padding="same"),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Dropout(0.5),
        layers.Flatten(),
        
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.25),
        layers.Dense(32, activation="relu"),
        layers.Dropout(0.3),
        
        layers.Dense(num_classes, activation="softmax")
    ])

    model.compile(
        optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

# **Training**

In [None]:
for batch in train_tfds.take(1):
    print(batch[0].shape, batch[1].shape)

In [None]:
labels = [example["ebird_code"] for example in melspectro_ds]
classes = np.unique(labels)

class_weights = compute_class_weight(
    class_weight="balanced", 
    classes=classes, 
    y=labels
)
class_weights = dict(enumerate(class_weights))

In [None]:
early_stop = callbacks.EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True,
    verbose=True
)

input_shape = (40, 157, 1)
num_classes = len(classes)

melspectro_model = create_cnn_model(input_shape, num_classes)
history = melspectro_model.fit(
    train_tfds,
    validation_data=val_tfds,
    class_weight=class_weights,
    callbacks=[early_stop],
    epochs=200,
)

In [None]:
# Prediksi label test set
y_true = []
y_pred = []

for batch in test_tfds:
    X_batch, y_batch = batch
    preds = melspectro_model.predict(X_batch)
    pred_labels = np.argmax(preds, axis=1)
    y_true.extend(y_batch.numpy())
    y_pred.extend(pred_labels)

y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Buat confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)

plt.title("Confusion Matrix Test Set")
plt.show()

# **Uji Prediksi**

# **Export Model**

In [None]:
# <model>.export("SavedModel")
# !tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model SavedModel TFJS
# !tar -czvf "TFJS.tar.gz" "TFJS"