In [1]:
# Setup
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

# Load Data
DATA_DIR = '../data/spectogram/'
X = []
y = []

def add_noise(spectrogram, noise_level=0.02):
    """
    Add random Gaussian noise to a spectrogram.

    Args:
        spectrogram (np.ndarray): Normalized spectrogram [0,1]
        noise_level (float): Standard deviation of Gaussian noise

    Returns:
        np.ndarray: Noisy spectrogram, clipped to [0,1]
    """
    noise = np.random.normal(0, noise_level, spectrogram.shape)
    return np.clip(spectrogram + noise, 0, 1)

# Load spectrograms from both class folders
for label, folder in enumerate(['nonhuman', 'human']):
    folder_path = os.path.join(DATA_DIR, folder)
    for file in os.listdir(folder_path):
        if file.endswith('.npy'):
            spectrogram = np.load(os.path.join(folder_path, file))
            
            # Skip invalid or empty spectrograms
            if np.max(spectrogram) > -100 and not np.isnan(spectrogram).any():
                # Normalize from dB range to [0, 1]
                spectrogram = (spectrogram + 80) / 80  
                X.append(spectrogram)
                y.append(label)

                noisy_spec = add_noise(spectrogram, noise_level=0.03)
                X.append(noisy_spec)
                y.append(label)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

# Reshape for CNN input format: (samples, height, width, channels)
X = X.reshape(-1, 64, 64, 1)

# Split into train and test sets (stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print(f"Train: {X_train.shape}, Test: {X_test.shape}")

Train: (9614, 64, 64, 1), Test: (2404, 64, 64, 1)


In [2]:
import numpy as np

print("Train label distribusi:")
unique, counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique, counts)))

print("Test label distribusi:")
unique, counts = np.unique(y_test, return_counts=True)
print(dict(zip(unique, counts)))

print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))

print(f"Train min: {X_train.min()}, max: {X_train.max()}")
print(f"Test min: {X_test.min()}, max: {X_test.max()}")

print(np.bincount(y_train))
print(np.bincount(y_test))

Train label distribusi:
{0: 3598, 1: 6016}
Test label distribusi:
{0: 900, 1: 1504}
(array([0, 1]), array([3598, 6016], dtype=int64))
(array([0, 1]), array([ 900, 1504], dtype=int64))
Train min: 0.0, max: 1.0
Test min: 0.0, max: 1.0
[3598 6016]
[ 900 1504]


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.utils import class_weight
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
from glob import glob

# Define CNN model (Input: 64x64x1, Output: binary classification)
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(64, 64, 1)),
    layers.BatchNormalization(),
    layers.Conv2D(32, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(64, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(128, (3,3), activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.BatchNormalization(),

    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # Binary classification (human vs nonhuman)
])

# Compute class weights to handle class imbalance
class_weights = class_weight.compute_class_weight('balanced',
                                                  classes=np.unique(y_train),
                                                  y=y_train.flatten())

class_weights = dict(enumerate(class_weights))

# Compile model with Adam optimizer and binary crossentropy loss
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

model.compile(optimizer=optimizer,
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Early stopping to avoid overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Data augmentation (only basic zoom, no flips)
datagen = ImageDataGenerator(
    zoom_range=0.05,
    horizontal_flip=False
)

datagen.fit(X_train)

# Learning rate scheduler to reduce LR on plateau
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=1e-6,
    verbose=1
)

# Train model
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=64),
    epochs=50,
    validation_data=(X_test, y_test),
    callbacks=[early_stop, reduce_lr],
    class_weight=class_weights,
    verbose=1
)

# Evaluate on test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Accuracy: {test_acc:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/50


In [4]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred_prob = model.predict(X_test)

y_pred = (y_pred_prob > 0.5).astype(int)

print(classification_report(y_test, y_pred, digits=4))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step
              precision    recall  f1-score   support

           0     0.9346    0.9367    0.9356       900
           1     0.9621    0.9608    0.9614      1504

    accuracy                         0.9517      2404
   macro avg     0.9483    0.9487    0.9485      2404
weighted avg     0.9518    0.9517    0.9518      2404

Confusion Matrix:
[[ 843   57]
 [  59 1445]]


In [6]:
import tensorflow as tf

model = tf.keras.models.load_model('../models/model.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open('../models/model.tflite', 'wb') as f:
    f.write(tflite_model)

print("✅ Model TFLite saved to '../models/model.tflite'")



INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmp6mmy01_x\assets


INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmp6mmy01_x\assets


Saved artifact at 'C:\Users\Asus\AppData\Local\Temp\tmp6mmy01_x'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 64, 64, 1), dtype=tf.float32, name='input_layer_5')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  2021703740560: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2023375977936: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2023375976208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2023375977168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2023375976400: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2023375977744: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2023375971984: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2023375975824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2023375974288: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2023375974096: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2023375