In [1]:
# Setup
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

# Load Data
DATA_DIR = '../data/spectogram/'
X = []
y = []

def add_noise(spectrogram, noise_level=0.02):
    """
    Add random Gaussian noise to a spectrogram.

    Args:
        spectrogram (np.ndarray): Normalized spectrogram [0,1]
        noise_level (float): Standard deviation of Gaussian noise

    Returns:
        np.ndarray: Noisy spectrogram, clipped to [0,1]
    """
    noise = np.random.normal(0, noise_level, spectrogram.shape)
    return np.clip(spectrogram + noise, 0, 1)

# Load spectrograms from both class folders
for label, folder in enumerate(['nonhuman', 'human']):
    folder_path = os.path.join(DATA_DIR, folder)
    for file in os.listdir(folder_path):
        if file.endswith('.npy'):
            spectrogram = np.load(os.path.join(folder_path, file))
            
            # Skip invalid or empty spectrograms
            if np.max(spectrogram) > -100 and not np.isnan(spectrogram).any():
                # Normalize from dB range to [0, 1]
                spectrogram = (spectrogram + 80) / 80  
                X.append(spectrogram)
                y.append(label)

                # Data augmentation: Add noisy version for human class
                if label == 1:
                    noisy_spec = add_noise(spectrogram, noise_level=0.03)
                    X.append(noisy_spec)
                    y.append(label)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

# Reshape for CNN input format: (samples, height, width, channels)
X = X.reshape(-1, 64, 64, 1)

# Split into train and test sets (stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print(f"Train: {X_train.shape}, Test: {X_test.shape}")

Train: (7806, 64, 64, 1), Test: (1952, 64, 64, 1)


In [60]:
import numpy as np

print("Train label distribusi:")
unique, counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique, counts)))

print("Test label distribusi:")
unique, counts = np.unique(y_test, return_counts=True)
print(dict(zip(unique, counts)))

print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))

print(f"Train min: {X_train.min()}, max: {X_train.max()}")
print(f"Test min: {X_test.min()}, max: {X_test.max()}")

print(np.bincount(y_train))
print(np.bincount(y_test))



Train label distribusi:
{np.int64(0): np.int64(1797), np.int64(1): np.int64(3003)}
Test label distribusi:
{np.int64(0): np.int64(449), np.int64(1): np.int64(752)}
(array([0, 1]), array([1797, 3003]))
(array([0, 1]), array([449, 752]))
Train min: 0.0, max: 0.9999987483024597
Test min: 0.0, max: 0.9999936819076538
[1797 3003]
[449 752]


In [2]:
from sklearn.utils import class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define CNN model (Input: 64x64x1, Output: binary classification)
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(64, 64, 1)),
    layers.BatchNormalization(),
    layers.Conv2D(32, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(64, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(128, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.BatchNormalization(),

    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # Binary classification (human vs nonhuman)
])

# Compute class weights to handle class imbalance
class_weights = class_weight.compute_class_weight('balanced',
                                                  classes=np.unique(y_train),
                                                  y=y_train.flatten())

class_weights = dict(enumerate(class_weights))

# Compile model with Adam optimizer and binary crossentropy loss
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

model.compile(optimizer=optimizer,
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Early stopping to avoid overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Data augmentation (only basic zoom, no flips)
datagen = ImageDataGenerator(
    zoom_range=0.05,
    horizontal_flip=False
)

datagen.fit(X_train)

# Learning rate scheduler to reduce LR on plateau
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-6,
    verbose=1
)

# Train model
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=64),
    epochs=50,
    validation_data=(X_test, y_test),
    callbacks=[early_stop, reduce_lr],
    class_weight=class_weights,
    verbose=1
)

# Evaluate on test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Accuracy: {test_acc:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/50
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 500ms/step - accuracy: 0.6578 - loss: 0.5929 - val_accuracy: 0.2305 - val_loss: 1.0781 - learning_rate: 5.0000e-04
Epoch 2/50
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 504ms/step - accuracy: 0.7810 - loss: 0.4183 - val_accuracy: 0.2305 - val_loss: 1.7377 - learning_rate: 5.0000e-04
Epoch 3/50
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 538ms/step - accuracy: 0.8112 - loss: 0.3436 - val_accuracy: 0.2305 - val_loss: 1.1116 - learning_rate: 5.0000e-04
Epoch 4/50
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 536ms/step - accuracy: 0.8229 - loss: 0.3066 - val_accuracy: 0.2485 - val_loss: 1.5147 - learning_rate: 5.0000e-04
Epoch 5/50
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 526ms/step - accuracy: 0.8519 - loss: 0.2748 - val_accuracy: 0.7715 - val_loss: 0.4177 - learning_rate: 5.0000e-04
Epoch 6/50
[1m122/122[0m [32m━━━

In [3]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred_prob = model.predict(X_test)

y_pred = (y_pred_prob > 0.5).astype(int)

print(classification_report(y_test, y_pred, digits=4))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step
              precision    recall  f1-score   support

           0     0.8299    0.8022    0.8158       450
           1     0.9413    0.9507    0.9460      1502

    accuracy                         0.9165      1952
   macro avg     0.8856    0.8765    0.8809      1952
weighted avg     0.9156    0.9165    0.9160      1952

Confusion Matrix:
[[ 361   89]
 [  74 1428]]


In [4]:
# Save model
model.save('../models/model_cnn_64x64.h5')



In [5]:
# Convert to tflite
model = tf.keras.models.load_model('../models/model_cnn_64x64.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(model)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open('../models/model_cnn_64x64.tflite', 'wb') as f:
    f.write(tflite_model)

print("Model saved")



INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmpb009viwb\assets


INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmpb009viwb\assets


Saved artifact at 'C:\Users\Asus\AppData\Local\Temp\tmpb009viwb'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 64, 64, 1), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  2973359502736: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2973359501968: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2973359503888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2973359503312: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2973359503696: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2973359489104: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2973359505232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2973359497360: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2973359501392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2973366413136: TensorSpec(shape=(), dtype=tf.resource, name=None)
  297335948