In [1]:
# Import libraries
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, BatchNormalization, Add
from tqdm import tqdm
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print("TensorFlow is using:", tf.test.gpu_device_name())
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled.")
    except RuntimeError as e:
        print(e)

Num GPUs Available: 0
TensorFlow is using: 


In [2]:
# Data Preparation
def load_data(spectrogram_dir, label_dir):
    spectrograms = []
    labels = []

    # Load spectrograms
    spectrogram_files = sorted(os.listdir(spectrogram_dir))
    label_files = sorted(os.listdir(label_dir))

    total_files = len(spectrogram_files)  

    for spec_file, label_file in tqdm(zip(spectrogram_files, label_files), 
                                      total=total_files, 
                                      desc="Loading Data", 
                                      unit="file"):
        
        spectrogram = np.load(os.path.join(spectrogram_dir, spec_file))  
        label = np.load(os.path.join(label_dir, label_file)).T 

        # Pad or trim labels
        T_spec = spectrogram.shape[0]  
        T_label = label.shape[0]      
        if T_label < T_spec:
            diff = T_spec - T_label
            label = np.pad(label, ((0, diff), (0, 0)), mode='constant')
        elif T_label > T_spec:
            label = label[:T_spec]
            
        spectrograms.append(spectrogram)
        labels.append(label)

    # Convert to numpy arrays
    X = np.vstack(spectrograms)  
    Y = np.vstack(labels)     

    return X, Y

In [None]:
# ========= USING VALIDATION SET FOR SMALLER DATA ==========

X_train, Y_train = load_data("spectrograms_validation", "primary_labels_validation") 
X_train = np.expand_dims(X_train, axis=-1)  
print("X_train:", X_train.shape, "Y_train:", Y_train.shape)

Loading Data: 100%|██████████| 270/270 [00:01<00:00, 217.78file/s]


X_train: (65912, 128, 87, 1) Y_train: (65912, 129)


In [4]:
# Define CNN model
model = Sequential([
    Input(shape=(128, 87, 1)),  # Shape of each input spectrogram
    
    Conv2D(32, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(256, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    
    Dense(129, activation='sigmoid') 
])

# Compile the model with binary cross-entropy loss and binary accuracy as the metric
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])

I0000 00:00:1739002793.197008    1175 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5564 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070, pci bus id: 0000:06:00.0, compute capability: 8.6


In [5]:
# Train model
history = model.fit(X_train, Y_train, batch_size=64, epochs=20, verbose=1)

# Final loss & accuracy
final_loss = history.history['loss'][-1]
final_accuracy = history.history['binary_accuracy'][-1]
print(f"Final Loss: {final_loss:.4f}, Final Accuracy: {final_accuracy:.4f}")

2025-02-08 00:19:56.936896: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2935984128 exceeds 10% of free system memory.
2025-02-08 00:19:58.485076: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2935984128 exceeds 10% of free system memory.


Epoch 1/20


I0000 00:00:1739002800.517272    1602 service.cc:148] XLA service 0x7f914800ef00 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1739002800.517330    1602 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 3070, Compute Capability 8.6
2025-02-08 00:20:00.566275: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1739002800.840929    1602 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m   6/1030[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m25s[0m 25ms/step - binary_accuracy: 0.6051 - loss: 0.8345 

I0000 00:00:1739002805.890111    1602 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 30ms/step - binary_accuracy: 0.9470 - loss: 0.1512
Epoch 2/20
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 25ms/step - binary_accuracy: 0.9659 - loss: 0.0994
Epoch 3/20
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 24ms/step - binary_accuracy: 0.9694 - loss: 0.0859
Epoch 4/20
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 25ms/step - binary_accuracy: 0.9728 - loss: 0.0746
Epoch 5/20
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 25ms/step - binary_accuracy: 0.9756 - loss: 0.0666
Epoch 6/20
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 25ms/step - binary_accuracy: 0.9778 - loss: 0.0602
Epoch 7/20
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 25ms/step - binary_accuracy: 0.9793 - loss: 0.0559
Epoch 8/20
[1m1030/1030[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 24ms/step - bi

In [6]:
# Get predictions
Y_pred = model.predict(X_train)

# Convert to binary (threshold the probabilities)
threshold = 0.5
Y_pred_binary = (Y_pred > threshold).astype(int)

# Calculate metrics
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(Y_train, Y_pred_binary, average='samples', zero_division=0)
recall = recall_score(Y_train, Y_pred_binary, average='samples', zero_division=0)
f1 = f1_score(Y_train, Y_pred_binary, average='samples', zero_division=0)

# Print the metrics
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

2025-02-08 00:28:46.864360: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2935984128 exceeds 10% of free system memory.
2025-02-08 00:28:48.572777: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 2935984128 exceeds 10% of free system memory.


[1m2056/2060[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step




[1m2060/2060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 6ms/step
Precision: 0.8948
Recall: 0.8541
F1-Score: 0.8631


In [None]:

X_test, Y_test = load_data("spectrograms_test", "primary_labels_test")  
X_test = np.expand_dims(X_test, axis=-1)  

Y_pred_test = model.predict(X_test)
Y_pred_test_binary = (Y_pred_test > 0.5).astype(int)

precision_test = precision_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)
recall_test = recall_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)
f1_test = f1_score(Y_test, Y_pred_test_binary, average='samples', zero_division=0)

print(f"Test Precision: {precision_test:.4f}")
print(f"Test Recall: {recall_test:.4f}")
print(f"Test F1-Score: {f1_test:.4f}")

# F1-score on different set not so good, likely overfitting

Loading Data: 100%|██████████| 151/151 [00:01<00:00, 90.89file/s]


[1m1234/1234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step
Test Precision: 0.5462
Test Recall: 0.3136
Test F1-Score: 0.3787
