In [1]:
# Setup
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

# Load Data
DATA_DIR = '../data/spectogram/'
X = []
y = []

def add_noise(spectrogram, noise_level=0.02):
    """
    Add random Gaussian noise to a spectrogram.

    Args:
        spectrogram (np.ndarray): Normalized spectrogram [0,1]
        noise_level (float): Standard deviation of Gaussian noise

    Returns:
        np.ndarray: Noisy spectrogram, clipped to [0,1]
    """
    noise = np.random.normal(0, noise_level, spectrogram.shape)
    return np.clip(spectrogram + noise, 0, 1)

# Load spectrograms from both class folders
for label, folder in enumerate(['nonhuman', 'human']):
    folder_path = os.path.join(DATA_DIR, folder)
    for file in os.listdir(folder_path):
        if file.endswith('.npy'):
            spectrogram = np.load(os.path.join(folder_path, file))
            
            # Skip invalid or empty spectrograms
            if np.max(spectrogram) > -100 and not np.isnan(spectrogram).any():
                # Normalize from dB range to [0, 1]
                spectrogram = (spectrogram + 80) / 80  
                X.append(spectrogram)
                y.append(label)

                noisy_spec = add_noise(spectrogram, noise_level=0.03)
                X.append(noisy_spec)
                y.append(label)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

# Reshape for CNN input format: (samples, height, width, channels)
X = X.reshape(-1, 64, 64, 1)

# Split into train and test sets (stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print(f"Train: {X_train.shape}, Test: {X_test.shape}")

Train: (9604, 64, 64, 1), Test: (2402, 64, 64, 1)


In [2]:
import numpy as np

print("Train label distribusi:")
unique, counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique, counts)))

print("Test label distribusi:")
unique, counts = np.unique(y_test, return_counts=True)
print(dict(zip(unique, counts)))

print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))

print(f"Train min: {X_train.min()}, max: {X_train.max()}")
print(f"Test min: {X_test.min()}, max: {X_test.max()}")

print(np.bincount(y_train))
print(np.bincount(y_test))

Train label distribusi:
{0: 3597, 1: 6007}
Test label distribusi:
{0: 899, 1: 1503}
(array([0, 1]), array([3597, 6007], dtype=int64))
(array([0, 1]), array([ 899, 1503], dtype=int64))
Train min: 0.0, max: 1.0
Test min: 0.0, max: 1.0
[3597 6007]
[ 899 1503]


In [21]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.utils import class_weight
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
from glob import glob

# Define CNN model
model = models.Sequential([
    layers.SeparableConv2D(16, (3,3), activation='relu', input_shape=(64, 64, 1)),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.SeparableConv2D(32, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.SeparableConv2D(64, (3,3), activation='relu'),
    layers.BatchNormalization(),

    layers.GlobalAveragePooling2D(),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# Compute class weights
class_weights = class_weight.compute_class_weight('balanced',
                                                  classes=np.unique(y_train),
                                                  y=y_train.flatten())
class_weights = dict(enumerate(class_weights))

# Compile
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)

# Augmentation
datagen = ImageDataGenerator(zoom_range=0.05, horizontal_flip=False)
datagen.fit(X_train)

# Train
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=64),
    epochs=50,
    validation_data=(X_test, y_test),
    callbacks=[early_stop, reduce_lr],
    class_weight=class_weights,
    verbose=1
)

# Save model
model.save('../models/model.h5')

# Evaluate
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Accuracy: {test_acc:.4f}")

  super().__init__(
  self._warn_if_super_not_called()


Epoch 1/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 136ms/step - accuracy: 0.5385 - loss: 0.6687 - val_accuracy: 0.3743 - val_loss: 0.7173 - learning_rate: 5.0000e-04
Epoch 2/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 125ms/step - accuracy: 0.6743 - loss: 0.5981 - val_accuracy: 0.3743 - val_loss: 0.7678 - learning_rate: 5.0000e-04
Epoch 3/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 126ms/step - accuracy: 0.7288 - loss: 0.5442 - val_accuracy: 0.3743 - val_loss: 0.8732 - learning_rate: 5.0000e-04
Epoch 4/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 124ms/step - accuracy: 0.7549 - loss: 0.5004 - val_accuracy: 0.5920 - val_loss: 0.6547 - learning_rate: 5.0000e-04
Epoch 5/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 121ms/step - accuracy: 0.7701 - loss: 0.4577 - val_accuracy: 0.7698 - val_loss: 0.4814 - learning_rate: 5.0000e-04
Epoch 6/50
[1m151/151[0m [32m━━━



76/76 - 1s - 15ms/step - accuracy: 0.8714 - loss: 0.2924
Test Accuracy: 0.8714


In [24]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred_prob = model.predict(X_test)

y_pred = (y_pred_prob > 0.5).astype(int)

print(classification_report(y_test, y_pred, digits=4))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step
              precision    recall  f1-score   support

           0     0.8278    0.8287    0.8282       899
           1     0.8975    0.8969    0.8972      1503

    accuracy                         0.8714      2402
   macro avg     0.8626    0.8628    0.8627      2402
weighted avg     0.8714    0.8714    0.8714      2402

Confusion Matrix:
[[ 745  154]
 [ 155 1348]]


In [25]:
def representative_data_gen():
    folder = "../data/quantization_samples/"
    files = glob(os.path.join(folder, "*.npy"))

    for path in files[:100]:
        spect = np.load(path).astype(np.float32)
        if spect.ndim == 2:
            spect = np.expand_dims(spect, axis=-1)
        spect = np.expand_dims(spect, axis=0)
        yield [spect]

model = tf.keras.models.load_model('../models/model.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

tflite_model = converter.convert()

with open('../models/model_quantized.tflite', 'wb') as f:
    f.write(tflite_model)

print("Model quantized saved.")



INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmpzonyezdl\assets


INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmpzonyezdl\assets


Saved artifact at 'C:\Users\Asus\AppData\Local\Temp\tmpzonyezdl'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 64, 64, 1), dtype=tf.float32, name='input_layer_5')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  1839637517072: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1839637518800: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1839637518992: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1839637517648: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1839637519184: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1839637517840: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1839637517264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1839637519568: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1839637522832: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1839637521680: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1839637

